[27190] | 1 | package URI;
|
---|
| 2 |
|
---|
| 3 | use strict;
|
---|
| 4 | use vars qw($VERSION);
|
---|
| 5 | $VERSION = "1.60";
|
---|
| 6 |
|
---|
| 7 | use vars qw($ABS_REMOTE_LEADING_DOTS $ABS_ALLOW_RELATIVE_SCHEME $DEFAULT_QUERY_FORM_DELIMITER);
|
---|
| 8 |
|
---|
| 9 | my %implements; # mapping from scheme to implementor class
|
---|
| 10 |
|
---|
| 11 | # Some "official" character classes
|
---|
| 12 |
|
---|
| 13 | use vars qw($reserved $mark $unreserved $uric $scheme_re);
|
---|
| 14 | $reserved = q(;/?:@&=+$,[]);
|
---|
| 15 | $mark = q(-_.!~*'()); #'; emacs
|
---|
| 16 | $unreserved = "A-Za-z0-9\Q$mark\E";
|
---|
| 17 | $uric = quotemeta($reserved) . $unreserved . "%";
|
---|
| 18 |
|
---|
| 19 | $scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*';
|
---|
| 20 |
|
---|
| 21 | use Carp ();
|
---|
| 22 | use URI::Escape ();
|
---|
| 23 |
|
---|
| 24 | use overload ('""' => sub { ${$_[0]} },
|
---|
| 25 | '==' => sub { _obj_eq(@_) },
|
---|
| 26 | '!=' => sub { !_obj_eq(@_) },
|
---|
| 27 | fallback => 1,
|
---|
| 28 | );
|
---|
| 29 |
|
---|
| 30 | # Check if two objects are the same object
|
---|
| 31 | sub _obj_eq {
|
---|
| 32 | return overload::StrVal($_[0]) eq overload::StrVal($_[1]);
|
---|
| 33 | }
|
---|
| 34 |
|
---|
| 35 | sub new
|
---|
| 36 | {
|
---|
| 37 | my($class, $uri, $scheme) = @_;
|
---|
| 38 |
|
---|
| 39 | $uri = defined ($uri) ? "$uri" : ""; # stringify
|
---|
| 40 | # Get rid of potential wrapping
|
---|
| 41 | $uri =~ s/^<(?:URL:)?(.*)>$/$1/; #
|
---|
| 42 | $uri =~ s/^"(.*)"$/$1/;
|
---|
| 43 | $uri =~ s/^\s+//;
|
---|
| 44 | $uri =~ s/\s+$//;
|
---|
| 45 |
|
---|
| 46 | my $impclass;
|
---|
| 47 | if ($uri =~ m/^($scheme_re):/so) {
|
---|
| 48 | $scheme = $1;
|
---|
| 49 | }
|
---|
| 50 | else {
|
---|
| 51 | if (($impclass = ref($scheme))) {
|
---|
| 52 | $scheme = $scheme->scheme;
|
---|
| 53 | }
|
---|
| 54 | elsif ($scheme && $scheme =~ m/^($scheme_re)(?::|$)/o) {
|
---|
| 55 | $scheme = $1;
|
---|
| 56 | }
|
---|
| 57 | }
|
---|
| 58 | $impclass ||= implementor($scheme) ||
|
---|
| 59 | do {
|
---|
| 60 | require URI::_foreign;
|
---|
| 61 | $impclass = 'URI::_foreign';
|
---|
| 62 | };
|
---|
| 63 |
|
---|
| 64 | return $impclass->_init($uri, $scheme);
|
---|
| 65 | }
|
---|
| 66 |
|
---|
| 67 |
|
---|
| 68 | sub new_abs
|
---|
| 69 | {
|
---|
| 70 | my($class, $uri, $base) = @_;
|
---|
| 71 | $uri = $class->new($uri, $base);
|
---|
| 72 | $uri->abs($base);
|
---|
| 73 | }
|
---|
| 74 |
|
---|
| 75 |
|
---|
| 76 | sub _init
|
---|
| 77 | {
|
---|
| 78 | my $class = shift;
|
---|
| 79 | my($str, $scheme) = @_;
|
---|
| 80 | # find all funny characters and encode the bytes.
|
---|
| 81 | $str = $class->_uric_escape($str);
|
---|
| 82 | $str = "$scheme:$str" unless $str =~ /^$scheme_re:/o ||
|
---|
| 83 | $class->_no_scheme_ok;
|
---|
| 84 | my $self = bless \$str, $class;
|
---|
| 85 | $self;
|
---|
| 86 | }
|
---|
| 87 |
|
---|
| 88 |
|
---|
| 89 | sub _uric_escape
|
---|
| 90 | {
|
---|
| 91 | my($class, $str) = @_;
|
---|
| 92 | $str =~ s*([^$uric\#])* URI::Escape::escape_char($1) *ego;
|
---|
| 93 | utf8::downgrade($str);
|
---|
| 94 | return $str;
|
---|
| 95 | }
|
---|
| 96 |
|
---|
| 97 |
|
---|
| 98 | sub implementor
|
---|
| 99 | {
|
---|
| 100 | my($scheme, $impclass) = @_;
|
---|
| 101 | if (!$scheme || $scheme !~ /\A$scheme_re\z/o) {
|
---|
| 102 | require URI::_generic;
|
---|
| 103 | return "URI::_generic";
|
---|
| 104 | }
|
---|
| 105 |
|
---|
| 106 | $scheme = lc($scheme);
|
---|
| 107 |
|
---|
| 108 | if ($impclass) {
|
---|
| 109 | # Set the implementor class for a given scheme
|
---|
| 110 | my $old = $implements{$scheme};
|
---|
| 111 | $impclass->_init_implementor($scheme);
|
---|
| 112 | $implements{$scheme} = $impclass;
|
---|
| 113 | return $old;
|
---|
| 114 | }
|
---|
| 115 |
|
---|
| 116 | my $ic = $implements{$scheme};
|
---|
| 117 | return $ic if $ic;
|
---|
| 118 |
|
---|
| 119 | # scheme not yet known, look for internal or
|
---|
| 120 | # preloaded (with 'use') implementation
|
---|
| 121 | $ic = "URI::$scheme"; # default location
|
---|
| 122 |
|
---|
| 123 | # turn scheme into a valid perl identifier by a simple transformation...
|
---|
| 124 | $ic =~ s/\+/_P/g;
|
---|
| 125 | $ic =~ s/\./_O/g;
|
---|
| 126 | $ic =~ s/\-/_/g;
|
---|
| 127 |
|
---|
| 128 | no strict 'refs';
|
---|
| 129 | # check we actually have one for the scheme:
|
---|
| 130 | unless (@{"${ic}::ISA"}) {
|
---|
| 131 | # Try to load it
|
---|
| 132 | eval "require $ic";
|
---|
| 133 | die $@ if $@ && $@ !~ /Can\'t locate.*in \@INC/;
|
---|
| 134 | return unless @{"${ic}::ISA"};
|
---|
| 135 | }
|
---|
| 136 |
|
---|
| 137 | $ic->_init_implementor($scheme);
|
---|
| 138 | $implements{$scheme} = $ic;
|
---|
| 139 | $ic;
|
---|
| 140 | }
|
---|
| 141 |
|
---|
| 142 |
|
---|
| 143 | sub _init_implementor
|
---|
| 144 | {
|
---|
| 145 | my($class, $scheme) = @_;
|
---|
| 146 | # Remember that one implementor class may actually
|
---|
| 147 | # serve to implement several URI schemes.
|
---|
| 148 | }
|
---|
| 149 |
|
---|
| 150 |
|
---|
| 151 | sub clone
|
---|
| 152 | {
|
---|
| 153 | my $self = shift;
|
---|
| 154 | my $other = $$self;
|
---|
| 155 | bless \$other, ref $self;
|
---|
| 156 | }
|
---|
| 157 |
|
---|
| 158 |
|
---|
| 159 | sub _no_scheme_ok { 0 }
|
---|
| 160 |
|
---|
| 161 | sub _scheme
|
---|
| 162 | {
|
---|
| 163 | my $self = shift;
|
---|
| 164 |
|
---|
| 165 | unless (@_) {
|
---|
| 166 | return unless $$self =~ /^($scheme_re):/o;
|
---|
| 167 | return $1;
|
---|
| 168 | }
|
---|
| 169 |
|
---|
| 170 | my $old;
|
---|
| 171 | my $new = shift;
|
---|
| 172 | if (defined($new) && length($new)) {
|
---|
| 173 | Carp::croak("Bad scheme '$new'") unless $new =~ /^$scheme_re$/o;
|
---|
| 174 | $old = $1 if $$self =~ s/^($scheme_re)://o;
|
---|
| 175 | my $newself = URI->new("$new:$$self");
|
---|
| 176 | $$self = $$newself;
|
---|
| 177 | bless $self, ref($newself);
|
---|
| 178 | }
|
---|
| 179 | else {
|
---|
| 180 | if ($self->_no_scheme_ok) {
|
---|
| 181 | $old = $1 if $$self =~ s/^($scheme_re)://o;
|
---|
| 182 | Carp::carp("Oops, opaque part now look like scheme")
|
---|
| 183 | if $^W && $$self =~ m/^$scheme_re:/o
|
---|
| 184 | }
|
---|
| 185 | else {
|
---|
| 186 | $old = $1 if $$self =~ m/^($scheme_re):/o;
|
---|
| 187 | }
|
---|
| 188 | }
|
---|
| 189 |
|
---|
| 190 | return $old;
|
---|
| 191 | }
|
---|
| 192 |
|
---|
| 193 | sub scheme
|
---|
| 194 | {
|
---|
| 195 | my $scheme = shift->_scheme(@_);
|
---|
| 196 | return unless defined $scheme;
|
---|
| 197 | lc($scheme);
|
---|
| 198 | }
|
---|
| 199 |
|
---|
| 200 |
|
---|
| 201 | sub opaque
|
---|
| 202 | {
|
---|
| 203 | my $self = shift;
|
---|
| 204 |
|
---|
| 205 | unless (@_) {
|
---|
| 206 | $$self =~ /^(?:$scheme_re:)?([^\#]*)/o or die;
|
---|
| 207 | return $1;
|
---|
| 208 | }
|
---|
| 209 |
|
---|
| 210 | $$self =~ /^($scheme_re:)? # optional scheme
|
---|
| 211 | ([^\#]*) # opaque
|
---|
| 212 | (\#.*)? # optional fragment
|
---|
| 213 | $/sx or die;
|
---|
| 214 |
|
---|
| 215 | my $old_scheme = $1;
|
---|
| 216 | my $old_opaque = $2;
|
---|
| 217 | my $old_frag = $3;
|
---|
| 218 |
|
---|
| 219 | my $new_opaque = shift;
|
---|
| 220 | $new_opaque = "" unless defined $new_opaque;
|
---|
| 221 | $new_opaque =~ s/([^$uric])/ URI::Escape::escape_char($1)/ego;
|
---|
| 222 | utf8::downgrade($new_opaque);
|
---|
| 223 |
|
---|
| 224 | $$self = defined($old_scheme) ? $old_scheme : "";
|
---|
| 225 | $$self .= $new_opaque;
|
---|
| 226 | $$self .= $old_frag if defined $old_frag;
|
---|
| 227 |
|
---|
| 228 | $old_opaque;
|
---|
| 229 | }
|
---|
| 230 |
|
---|
| 231 | *path = \&opaque; # alias
|
---|
| 232 |
|
---|
| 233 |
|
---|
| 234 | sub fragment
|
---|
| 235 | {
|
---|
| 236 | my $self = shift;
|
---|
| 237 | unless (@_) {
|
---|
| 238 | return unless $$self =~ /\#(.*)/s;
|
---|
| 239 | return $1;
|
---|
| 240 | }
|
---|
| 241 |
|
---|
| 242 | my $old;
|
---|
| 243 | $old = $1 if $$self =~ s/\#(.*)//s;
|
---|
| 244 |
|
---|
| 245 | my $new_frag = shift;
|
---|
| 246 | if (defined $new_frag) {
|
---|
| 247 | $new_frag =~ s/([^$uric])/ URI::Escape::escape_char($1) /ego;
|
---|
| 248 | utf8::downgrade($new_frag);
|
---|
| 249 | $$self .= "#$new_frag";
|
---|
| 250 | }
|
---|
| 251 | $old;
|
---|
| 252 | }
|
---|
| 253 |
|
---|
| 254 |
|
---|
| 255 | sub as_string
|
---|
| 256 | {
|
---|
| 257 | my $self = shift;
|
---|
| 258 | $$self;
|
---|
| 259 | }
|
---|
| 260 |
|
---|
| 261 |
|
---|
| 262 | sub as_iri
|
---|
| 263 | {
|
---|
| 264 | my $self = shift;
|
---|
| 265 | my $str = $$self;
|
---|
| 266 | if ($str =~ s/%([89a-fA-F][0-9a-fA-F])/chr(hex($1))/eg) {
|
---|
| 267 | # All this crap because the more obvious:
|
---|
| 268 | #
|
---|
| 269 | # Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift })
|
---|
| 270 | #
|
---|
| 271 | # doesn't work before Encode 2.39. Wait for a standard release
|
---|
| 272 | # to bundle that version.
|
---|
| 273 |
|
---|
| 274 | require Encode;
|
---|
| 275 | my $enc = Encode::find_encoding("UTF-8");
|
---|
| 276 | my $u = "";
|
---|
| 277 | while (length $str) {
|
---|
| 278 | $u .= $enc->decode($str, Encode::FB_QUIET());
|
---|
| 279 | if (length $str) {
|
---|
| 280 | # escape next char
|
---|
| 281 | $u .= URI::Escape::escape_char(substr($str, 0, 1, ""));
|
---|
| 282 | }
|
---|
| 283 | }
|
---|
| 284 | $str = $u;
|
---|
| 285 | }
|
---|
| 286 | return $str;
|
---|
| 287 | }
|
---|
| 288 |
|
---|
| 289 |
|
---|
| 290 | sub canonical
|
---|
| 291 | {
|
---|
| 292 | # Make sure scheme is lowercased, that we don't escape unreserved chars,
|
---|
| 293 | # and that we use upcase escape sequences.
|
---|
| 294 |
|
---|
| 295 | my $self = shift;
|
---|
| 296 | my $scheme = $self->_scheme || "";
|
---|
| 297 | my $uc_scheme = $scheme =~ /[A-Z]/;
|
---|
| 298 | my $esc = $$self =~ /%[a-fA-F0-9]{2}/;
|
---|
| 299 | return $self unless $uc_scheme || $esc;
|
---|
| 300 |
|
---|
| 301 | my $other = $self->clone;
|
---|
| 302 | if ($uc_scheme) {
|
---|
| 303 | $other->_scheme(lc $scheme);
|
---|
| 304 | }
|
---|
| 305 | if ($esc) {
|
---|
| 306 | $$other =~ s{%([0-9a-fA-F]{2})}
|
---|
| 307 | { my $a = chr(hex($1));
|
---|
| 308 | $a =~ /^[$unreserved]\z/o ? $a : "%\U$1"
|
---|
| 309 | }ge;
|
---|
| 310 | }
|
---|
| 311 | return $other;
|
---|
| 312 | }
|
---|
| 313 |
|
---|
| 314 | # Compare two URIs, subclasses will provide a more correct implementation
|
---|
| 315 | sub eq {
|
---|
| 316 | my($self, $other) = @_;
|
---|
| 317 | $self = URI->new($self, $other) unless ref $self;
|
---|
| 318 | $other = URI->new($other, $self) unless ref $other;
|
---|
| 319 | ref($self) eq ref($other) && # same class
|
---|
| 320 | $self->canonical->as_string eq $other->canonical->as_string;
|
---|
| 321 | }
|
---|
| 322 |
|
---|
| 323 | # generic-URI transformation methods
|
---|
| 324 | sub abs { $_[0]; }
|
---|
| 325 | sub rel { $_[0]; }
|
---|
| 326 |
|
---|
| 327 | sub secure { 0 }
|
---|
| 328 |
|
---|
| 329 | # help out Storable
|
---|
| 330 | sub STORABLE_freeze {
|
---|
| 331 | my($self, $cloning) = @_;
|
---|
| 332 | return $$self;
|
---|
| 333 | }
|
---|
| 334 |
|
---|
| 335 | sub STORABLE_thaw {
|
---|
| 336 | my($self, $cloning, $str) = @_;
|
---|
| 337 | $$self = $str;
|
---|
| 338 | }
|
---|
| 339 |
|
---|
| 340 | 1;
|
---|
| 341 |
|
---|
| 342 | __END__
|
---|
| 343 |
|
---|
| 344 | =head1 NAME
|
---|
| 345 |
|
---|
| 346 | URI - Uniform Resource Identifiers (absolute and relative)
|
---|
| 347 |
|
---|
| 348 | =head1 SYNOPSIS
|
---|
| 349 |
|
---|
| 350 | $u1 = URI->new("http://www.perl.com");
|
---|
| 351 | $u2 = URI->new("foo", "http");
|
---|
| 352 | $u3 = $u2->abs($u1);
|
---|
| 353 | $u4 = $u3->clone;
|
---|
| 354 | $u5 = URI->new("HTTP://WWW.perl.com:80")->canonical;
|
---|
| 355 |
|
---|
| 356 | $str = $u->as_string;
|
---|
| 357 | $str = "$u";
|
---|
| 358 |
|
---|
| 359 | $scheme = $u->scheme;
|
---|
| 360 | $opaque = $u->opaque;
|
---|
| 361 | $path = $u->path;
|
---|
| 362 | $frag = $u->fragment;
|
---|
| 363 |
|
---|
| 364 | $u->scheme("ftp");
|
---|
| 365 | $u->host("ftp.perl.com");
|
---|
| 366 | $u->path("cpan/");
|
---|
| 367 |
|
---|
| 368 | =head1 DESCRIPTION
|
---|
| 369 |
|
---|
| 370 | This module implements the C<URI> class. Objects of this class
|
---|
| 371 | represent "Uniform Resource Identifier references" as specified in RFC
|
---|
| 372 | 2396 (and updated by RFC 2732).
|
---|
| 373 |
|
---|
| 374 | A Uniform Resource Identifier is a compact string of characters that
|
---|
| 375 | identifies an abstract or physical resource. A Uniform Resource
|
---|
| 376 | Identifier can be further classified as either a Uniform Resource Locator
|
---|
| 377 | (URL) or a Uniform Resource Name (URN). The distinction between URL
|
---|
| 378 | and URN does not matter to the C<URI> class interface. A
|
---|
| 379 | "URI-reference" is a URI that may have additional information attached
|
---|
| 380 | in the form of a fragment identifier.
|
---|
| 381 |
|
---|
| 382 | An absolute URI reference consists of three parts: a I<scheme>, a
|
---|
| 383 | I<scheme-specific part> and a I<fragment> identifier. A subset of URI
|
---|
| 384 | references share a common syntax for hierarchical namespaces. For
|
---|
| 385 | these, the scheme-specific part is further broken down into
|
---|
| 386 | I<authority>, I<path> and I<query> components. These URIs can also
|
---|
| 387 | take the form of relative URI references, where the scheme (and
|
---|
| 388 | usually also the authority) component is missing, but implied by the
|
---|
| 389 | context of the URI reference. The three forms of URI reference
|
---|
| 390 | syntax are summarized as follows:
|
---|
| 391 |
|
---|
| 392 | <scheme>:<scheme-specific-part>#<fragment>
|
---|
| 393 | <scheme>://<authority><path>?<query>#<fragment>
|
---|
| 394 | <path>?<query>#<fragment>
|
---|
| 395 |
|
---|
| 396 | The components into which a URI reference can be divided depend on the
|
---|
| 397 | I<scheme>. The C<URI> class provides methods to get and set the
|
---|
| 398 | individual components. The methods available for a specific
|
---|
| 399 | C<URI> object depend on the scheme.
|
---|
| 400 |
|
---|
| 401 | =head1 CONSTRUCTORS
|
---|
| 402 |
|
---|
| 403 | The following methods construct new C<URI> objects:
|
---|
| 404 |
|
---|
| 405 | =over 4
|
---|
| 406 |
|
---|
| 407 | =item $uri = URI->new( $str )
|
---|
| 408 |
|
---|
| 409 | =item $uri = URI->new( $str, $scheme )
|
---|
| 410 |
|
---|
| 411 | Constructs a new URI object. The string
|
---|
| 412 | representation of a URI is given as argument, together with an optional
|
---|
| 413 | scheme specification. Common URI wrappers like "" and <>, as well as
|
---|
| 414 | leading and trailing white space, are automatically removed from
|
---|
| 415 | the $str argument before it is processed further.
|
---|
| 416 |
|
---|
| 417 | The constructor determines the scheme, maps this to an appropriate
|
---|
| 418 | URI subclass, constructs a new object of that class and returns it.
|
---|
| 419 |
|
---|
| 420 | The $scheme argument is only used when $str is a
|
---|
| 421 | relative URI. It can be either a simple string that
|
---|
| 422 | denotes the scheme, a string containing an absolute URI reference, or
|
---|
| 423 | an absolute C<URI> object. If no $scheme is specified for a relative
|
---|
| 424 | URI $str, then $str is simply treated as a generic URI (no scheme-specific
|
---|
| 425 | methods available).
|
---|
| 426 |
|
---|
| 427 | The set of characters available for building URI references is
|
---|
| 428 | restricted (see L<URI::Escape>). Characters outside this set are
|
---|
| 429 | automatically escaped by the URI constructor.
|
---|
| 430 |
|
---|
| 431 | =item $uri = URI->new_abs( $str, $base_uri )
|
---|
| 432 |
|
---|
| 433 | Constructs a new absolute URI object. The $str argument can
|
---|
| 434 | denote a relative or absolute URI. If relative, then it is
|
---|
| 435 | absolutized using $base_uri as base. The $base_uri must be an absolute
|
---|
| 436 | URI.
|
---|
| 437 |
|
---|
| 438 | =item $uri = URI::file->new( $filename )
|
---|
| 439 |
|
---|
| 440 | =item $uri = URI::file->new( $filename, $os )
|
---|
| 441 |
|
---|
| 442 | Constructs a new I<file> URI from a file name. See L<URI::file>.
|
---|
| 443 |
|
---|
| 444 | =item $uri = URI::file->new_abs( $filename )
|
---|
| 445 |
|
---|
| 446 | =item $uri = URI::file->new_abs( $filename, $os )
|
---|
| 447 |
|
---|
| 448 | Constructs a new absolute I<file> URI from a file name. See
|
---|
| 449 | L<URI::file>.
|
---|
| 450 |
|
---|
| 451 | =item $uri = URI::file->cwd
|
---|
| 452 |
|
---|
| 453 | Returns the current working directory as a I<file> URI. See
|
---|
| 454 | L<URI::file>.
|
---|
| 455 |
|
---|
| 456 | =item $uri->clone
|
---|
| 457 |
|
---|
| 458 | Returns a copy of the $uri.
|
---|
| 459 |
|
---|
| 460 | =back
|
---|
| 461 |
|
---|
| 462 | =head1 COMMON METHODS
|
---|
| 463 |
|
---|
| 464 | The methods described in this section are available for all C<URI>
|
---|
| 465 | objects.
|
---|
| 466 |
|
---|
| 467 | Methods that give access to components of a URI always return the
|
---|
| 468 | old value of the component. The value returned is C<undef> if the
|
---|
| 469 | component was not present. There is generally a difference between a
|
---|
| 470 | component that is empty (represented as C<"">) and a component that is
|
---|
| 471 | missing (represented as C<undef>). If an accessor method is given an
|
---|
| 472 | argument, it updates the corresponding component in addition to
|
---|
| 473 | returning the old value of the component. Passing an undefined
|
---|
| 474 | argument removes the component (if possible). The description of
|
---|
| 475 | each accessor method indicates whether the component is passed as
|
---|
| 476 | an escaped (percent-encoded) or an unescaped string. A component that can be further
|
---|
| 477 | divided into sub-parts are usually passed escaped, as unescaping might
|
---|
| 478 | change its semantics.
|
---|
| 479 |
|
---|
| 480 | The common methods available for all URI are:
|
---|
| 481 |
|
---|
| 482 | =over 4
|
---|
| 483 |
|
---|
| 484 | =item $uri->scheme
|
---|
| 485 |
|
---|
| 486 | =item $uri->scheme( $new_scheme )
|
---|
| 487 |
|
---|
| 488 | Sets and returns the scheme part of the $uri. If the $uri is
|
---|
| 489 | relative, then $uri->scheme returns C<undef>. If called with an
|
---|
| 490 | argument, it updates the scheme of $uri, possibly changing the
|
---|
| 491 | class of $uri, and returns the old scheme value. The method croaks
|
---|
| 492 | if the new scheme name is illegal; a scheme name must begin with a
|
---|
| 493 | letter and must consist of only US-ASCII letters, numbers, and a few
|
---|
| 494 | special marks: ".", "+", "-". This restriction effectively means
|
---|
| 495 | that the scheme must be passed unescaped. Passing an undefined
|
---|
| 496 | argument to the scheme method makes the URI relative (if possible).
|
---|
| 497 |
|
---|
| 498 | Letter case does not matter for scheme names. The string
|
---|
| 499 | returned by $uri->scheme is always lowercase. If you want the scheme
|
---|
| 500 | just as it was written in the URI in its original case,
|
---|
| 501 | you can use the $uri->_scheme method instead.
|
---|
| 502 |
|
---|
| 503 | =item $uri->opaque
|
---|
| 504 |
|
---|
| 505 | =item $uri->opaque( $new_opaque )
|
---|
| 506 |
|
---|
| 507 | Sets and returns the scheme-specific part of the $uri
|
---|
| 508 | (everything between the scheme and the fragment)
|
---|
| 509 | as an escaped string.
|
---|
| 510 |
|
---|
| 511 | =item $uri->path
|
---|
| 512 |
|
---|
| 513 | =item $uri->path( $new_path )
|
---|
| 514 |
|
---|
| 515 | Sets and returns the same value as $uri->opaque unless the URI
|
---|
| 516 | supports the generic syntax for hierarchical namespaces.
|
---|
| 517 | In that case the generic method is overridden to set and return
|
---|
| 518 | the part of the URI between the I<host name> and the I<fragment>.
|
---|
| 519 |
|
---|
| 520 | =item $uri->fragment
|
---|
| 521 |
|
---|
| 522 | =item $uri->fragment( $new_frag )
|
---|
| 523 |
|
---|
| 524 | Returns the fragment identifier of a URI reference
|
---|
| 525 | as an escaped string.
|
---|
| 526 |
|
---|
| 527 | =item $uri->as_string
|
---|
| 528 |
|
---|
| 529 | Returns a URI object to a plain ASCII string. URI objects are
|
---|
| 530 | also converted to plain strings automatically by overloading. This
|
---|
| 531 | means that $uri objects can be used as plain strings in most Perl
|
---|
| 532 | constructs.
|
---|
| 533 |
|
---|
| 534 | =item $uri->as_iri
|
---|
| 535 |
|
---|
| 536 | Returns a Unicode string representing the URI. Escaped UTF-8 sequences
|
---|
| 537 | representing non-ASCII characters are turned into their corresponding Unicode
|
---|
| 538 | code point.
|
---|
| 539 |
|
---|
| 540 | =item $uri->canonical
|
---|
| 541 |
|
---|
| 542 | Returns a normalized version of the URI. The rules
|
---|
| 543 | for normalization are scheme-dependent. They usually involve
|
---|
| 544 | lowercasing the scheme and Internet host name components,
|
---|
| 545 | removing the explicit port specification if it matches the default port,
|
---|
| 546 | uppercasing all escape sequences, and unescaping octets that can be
|
---|
| 547 | better represented as plain characters.
|
---|
| 548 |
|
---|
| 549 | For efficiency reasons, if the $uri is already in normalized form,
|
---|
| 550 | then a reference to it is returned instead of a copy.
|
---|
| 551 |
|
---|
| 552 | =item $uri->eq( $other_uri )
|
---|
| 553 |
|
---|
| 554 | =item URI::eq( $first_uri, $other_uri )
|
---|
| 555 |
|
---|
| 556 | Tests whether two URI references are equal. URI references
|
---|
| 557 | that normalize to the same string are considered equal. The method
|
---|
| 558 | can also be used as a plain function which can also test two string
|
---|
| 559 | arguments.
|
---|
| 560 |
|
---|
| 561 | If you need to test whether two C<URI> object references denote the
|
---|
| 562 | same object, use the '==' operator.
|
---|
| 563 |
|
---|
| 564 | =item $uri->abs( $base_uri )
|
---|
| 565 |
|
---|
| 566 | Returns an absolute URI reference. If $uri is already
|
---|
| 567 | absolute, then a reference to it is simply returned. If the $uri
|
---|
| 568 | is relative, then a new absolute URI is constructed by combining the
|
---|
| 569 | $uri and the $base_uri, and returned.
|
---|
| 570 |
|
---|
| 571 | =item $uri->rel( $base_uri )
|
---|
| 572 |
|
---|
| 573 | Returns a relative URI reference if it is possible to
|
---|
| 574 | make one that denotes the same resource relative to $base_uri.
|
---|
| 575 | If not, then $uri is simply returned.
|
---|
| 576 |
|
---|
| 577 | =item $uri->secure
|
---|
| 578 |
|
---|
| 579 | Returns a TRUE value if the URI is considered to point to a resource on
|
---|
| 580 | a secure channel, such as an SSL or TLS encrypted one.
|
---|
| 581 |
|
---|
| 582 | =back
|
---|
| 583 |
|
---|
| 584 | =head1 GENERIC METHODS
|
---|
| 585 |
|
---|
| 586 | The following methods are available to schemes that use the
|
---|
| 587 | common/generic syntax for hierarchical namespaces. The descriptions of
|
---|
| 588 | schemes below indicate which these are. Unknown schemes are
|
---|
| 589 | assumed to support the generic syntax, and therefore the following
|
---|
| 590 | methods:
|
---|
| 591 |
|
---|
| 592 | =over 4
|
---|
| 593 |
|
---|
| 594 | =item $uri->authority
|
---|
| 595 |
|
---|
| 596 | =item $uri->authority( $new_authority )
|
---|
| 597 |
|
---|
| 598 | Sets and returns the escaped authority component
|
---|
| 599 | of the $uri.
|
---|
| 600 |
|
---|
| 601 | =item $uri->path
|
---|
| 602 |
|
---|
| 603 | =item $uri->path( $new_path )
|
---|
| 604 |
|
---|
| 605 | Sets and returns the escaped path component of
|
---|
| 606 | the $uri (the part between the host name and the query or fragment).
|
---|
| 607 | The path can never be undefined, but it can be the empty string.
|
---|
| 608 |
|
---|
| 609 | =item $uri->path_query
|
---|
| 610 |
|
---|
| 611 | =item $uri->path_query( $new_path_query )
|
---|
| 612 |
|
---|
| 613 | Sets and returns the escaped path and query
|
---|
| 614 | components as a single entity. The path and the query are
|
---|
| 615 | separated by a "?" character, but the query can itself contain "?".
|
---|
| 616 |
|
---|
| 617 | =item $uri->path_segments
|
---|
| 618 |
|
---|
| 619 | =item $uri->path_segments( $segment, ... )
|
---|
| 620 |
|
---|
| 621 | Sets and returns the path. In a scalar context, it returns
|
---|
| 622 | the same value as $uri->path. In a list context, it returns the
|
---|
| 623 | unescaped path segments that make up the path. Path segments that
|
---|
| 624 | have parameters are returned as an anonymous array. The first element
|
---|
| 625 | is the unescaped path segment proper; subsequent elements are escaped
|
---|
| 626 | parameter strings. Such an anonymous array uses overloading so it can
|
---|
| 627 | be treated as a string too, but this string does not include the
|
---|
| 628 | parameters.
|
---|
| 629 |
|
---|
| 630 | Note that absolute paths have the empty string as their first
|
---|
| 631 | I<path_segment>, i.e. the I<path> C</foo/bar> have 3
|
---|
| 632 | I<path_segments>; "", "foo" and "bar".
|
---|
| 633 |
|
---|
| 634 | =item $uri->query
|
---|
| 635 |
|
---|
| 636 | =item $uri->query( $new_query )
|
---|
| 637 |
|
---|
| 638 | Sets and returns the escaped query component of
|
---|
| 639 | the $uri.
|
---|
| 640 |
|
---|
| 641 | =item $uri->query_form
|
---|
| 642 |
|
---|
| 643 | =item $uri->query_form( $key1 => $val1, $key2 => $val2, ... )
|
---|
| 644 |
|
---|
| 645 | =item $uri->query_form( $key1 => $val1, $key2 => $val2, ..., $delim )
|
---|
| 646 |
|
---|
| 647 | =item $uri->query_form( \@key_value_pairs )
|
---|
| 648 |
|
---|
| 649 | =item $uri->query_form( \@key_value_pairs, $delim )
|
---|
| 650 |
|
---|
| 651 | =item $uri->query_form( \%hash )
|
---|
| 652 |
|
---|
| 653 | =item $uri->query_form( \%hash, $delim )
|
---|
| 654 |
|
---|
| 655 | Sets and returns query components that use the
|
---|
| 656 | I<application/x-www-form-urlencoded> format. Key/value pairs are
|
---|
| 657 | separated by "&", and the key is separated from the value by a "="
|
---|
| 658 | character.
|
---|
| 659 |
|
---|
| 660 | The form can be set either by passing separate key/value pairs, or via
|
---|
| 661 | an array or hash reference. Passing an empty array or an empty hash
|
---|
| 662 | removes the query component, whereas passing no arguments at all leaves
|
---|
| 663 | the component unchanged. The order of keys is undefined if a hash
|
---|
| 664 | reference is passed. The old value is always returned as a list of
|
---|
| 665 | separate key/value pairs. Assigning this list to a hash is unwise as
|
---|
| 666 | the keys returned might repeat.
|
---|
| 667 |
|
---|
| 668 | The values passed when setting the form can be plain strings or
|
---|
| 669 | references to arrays of strings. Passing an array of values has the
|
---|
| 670 | same effect as passing the key repeatedly with one value at a time.
|
---|
| 671 | All the following statements have the same effect:
|
---|
| 672 |
|
---|
| 673 | $uri->query_form(foo => 1, foo => 2);
|
---|
| 674 | $uri->query_form(foo => [1, 2]);
|
---|
| 675 | $uri->query_form([ foo => 1, foo => 2 ]);
|
---|
| 676 | $uri->query_form([ foo => [1, 2] ]);
|
---|
| 677 | $uri->query_form({ foo => [1, 2] });
|
---|
| 678 |
|
---|
| 679 | The $delim parameter can be passed as ";" to force the key/value pairs
|
---|
| 680 | to be delimited by ";" instead of "&" in the query string. This
|
---|
| 681 | practice is often recommended for URLs embedded in HTML or XML
|
---|
| 682 | documents as this avoids the trouble of escaping the "&" character.
|
---|
| 683 | You might also set the $URI::DEFAULT_QUERY_FORM_DELIMITER variable to
|
---|
| 684 | ";" for the same global effect.
|
---|
| 685 |
|
---|
| 686 | The C<URI::QueryParam> module can be loaded to add further methods to
|
---|
| 687 | manipulate the form of a URI. See L<URI::QueryParam> for details.
|
---|
| 688 |
|
---|
| 689 | =item $uri->query_keywords
|
---|
| 690 |
|
---|
| 691 | =item $uri->query_keywords( $keywords, ... )
|
---|
| 692 |
|
---|
| 693 | =item $uri->query_keywords( \@keywords )
|
---|
| 694 |
|
---|
| 695 | Sets and returns query components that use the
|
---|
| 696 | keywords separated by "+" format.
|
---|
| 697 |
|
---|
| 698 | The keywords can be set either by passing separate keywords directly
|
---|
| 699 | or by passing a reference to an array of keywords. Passing an empty
|
---|
| 700 | array removes the query component, whereas passing no arguments at
|
---|
| 701 | all leaves the component unchanged. The old value is always returned
|
---|
| 702 | as a list of separate words.
|
---|
| 703 |
|
---|
| 704 | =back
|
---|
| 705 |
|
---|
| 706 | =head1 SERVER METHODS
|
---|
| 707 |
|
---|
| 708 | For schemes where the I<authority> component denotes an Internet host,
|
---|
| 709 | the following methods are available in addition to the generic
|
---|
| 710 | methods.
|
---|
| 711 |
|
---|
| 712 | =over 4
|
---|
| 713 |
|
---|
| 714 | =item $uri->userinfo
|
---|
| 715 |
|
---|
| 716 | =item $uri->userinfo( $new_userinfo )
|
---|
| 717 |
|
---|
| 718 | Sets and returns the escaped userinfo part of the
|
---|
| 719 | authority component.
|
---|
| 720 |
|
---|
| 721 | For some schemes this is a user name and a password separated by
|
---|
| 722 | a colon. This practice is not recommended. Embedding passwords in
|
---|
| 723 | clear text (such as URI) has proven to be a security risk in almost
|
---|
| 724 | every case where it has been used.
|
---|
| 725 |
|
---|
| 726 | =item $uri->host
|
---|
| 727 |
|
---|
| 728 | =item $uri->host( $new_host )
|
---|
| 729 |
|
---|
| 730 | Sets and returns the unescaped hostname.
|
---|
| 731 |
|
---|
| 732 | If the $new_host string ends with a colon and a number, then this
|
---|
| 733 | number also sets the port.
|
---|
| 734 |
|
---|
| 735 | For IPv6 addresses the brackets around the raw address is removed in the return
|
---|
| 736 | value from $uri->host. When setting the host attribute to an IPv6 address you
|
---|
| 737 | can use a raw address or one enclosed in brackets. The address needs to be
|
---|
| 738 | enclosed in brackets if you want to pass in a new port value as well.
|
---|
| 739 |
|
---|
| 740 | =item $uri->ihost
|
---|
| 741 |
|
---|
| 742 | Returns the host in Unicode form. Any IDNA A-labels are turned into U-labels.
|
---|
| 743 |
|
---|
| 744 | =item $uri->port
|
---|
| 745 |
|
---|
| 746 | =item $uri->port( $new_port )
|
---|
| 747 |
|
---|
| 748 | Sets and returns the port. The port is a simple integer
|
---|
| 749 | that should be greater than 0.
|
---|
| 750 |
|
---|
| 751 | If a port is not specified explicitly in the URI, then the URI scheme's default port
|
---|
| 752 | is returned. If you don't want the default port
|
---|
| 753 | substituted, then you can use the $uri->_port method instead.
|
---|
| 754 |
|
---|
| 755 | =item $uri->host_port
|
---|
| 756 |
|
---|
| 757 | =item $uri->host_port( $new_host_port )
|
---|
| 758 |
|
---|
| 759 | Sets and returns the host and port as a single
|
---|
| 760 | unit. The returned value includes a port, even if it matches the
|
---|
| 761 | default port. The host part and the port part are separated by a
|
---|
| 762 | colon: ":".
|
---|
| 763 |
|
---|
| 764 | For IPv6 addresses the bracketing is preserved; thus
|
---|
| 765 | URI->new("http://[::1]/")->host_port returns "[::1]:80". Contrast this with
|
---|
| 766 | $uri->host which will remove the brackets.
|
---|
| 767 |
|
---|
| 768 | =item $uri->default_port
|
---|
| 769 |
|
---|
| 770 | Returns the default port of the URI scheme to which $uri
|
---|
| 771 | belongs. For I<http> this is the number 80, for I<ftp> this
|
---|
| 772 | is the number 21, etc. The default port for a scheme can not be
|
---|
| 773 | changed.
|
---|
| 774 |
|
---|
| 775 | =back
|
---|
| 776 |
|
---|
| 777 | =head1 SCHEME-SPECIFIC SUPPORT
|
---|
| 778 |
|
---|
| 779 | Scheme-specific support is provided for the following URI schemes. For C<URI>
|
---|
| 780 | objects that do not belong to one of these, you can only use the common and
|
---|
| 781 | generic methods.
|
---|
| 782 |
|
---|
| 783 | =over 4
|
---|
| 784 |
|
---|
| 785 | =item B<data>:
|
---|
| 786 |
|
---|
| 787 | The I<data> URI scheme is specified in RFC 2397. It allows inclusion
|
---|
| 788 | of small data items as "immediate" data, as if it had been included
|
---|
| 789 | externally.
|
---|
| 790 |
|
---|
| 791 | C<URI> objects belonging to the data scheme support the common methods
|
---|
| 792 | and two new methods to access their scheme-specific components:
|
---|
| 793 | $uri->media_type and $uri->data. See L<URI::data> for details.
|
---|
| 794 |
|
---|
| 795 | =item B<file>:
|
---|
| 796 |
|
---|
| 797 | An old specification of the I<file> URI scheme is found in RFC 1738.
|
---|
| 798 | A new RFC 2396 based specification in not available yet, but file URI
|
---|
| 799 | references are in common use.
|
---|
| 800 |
|
---|
| 801 | C<URI> objects belonging to the file scheme support the common and
|
---|
| 802 | generic methods. In addition, they provide two methods for mapping file URIs
|
---|
| 803 | back to local file names; $uri->file and $uri->dir. See L<URI::file>
|
---|
| 804 | for details.
|
---|
| 805 |
|
---|
| 806 | =item B<ftp>:
|
---|
| 807 |
|
---|
| 808 | An old specification of the I<ftp> URI scheme is found in RFC 1738. A
|
---|
| 809 | new RFC 2396 based specification in not available yet, but ftp URI
|
---|
| 810 | references are in common use.
|
---|
| 811 |
|
---|
| 812 | C<URI> objects belonging to the ftp scheme support the common,
|
---|
| 813 | generic and server methods. In addition, they provide two methods for
|
---|
| 814 | accessing the userinfo sub-components: $uri->user and $uri->password.
|
---|
| 815 |
|
---|
| 816 | =item B<gopher>:
|
---|
| 817 |
|
---|
| 818 | The I<gopher> URI scheme is specified in
|
---|
| 819 | <draft-murali-url-gopher-1996-12-04> and will hopefully be available
|
---|
| 820 | as a RFC 2396 based specification.
|
---|
| 821 |
|
---|
| 822 | C<URI> objects belonging to the gopher scheme support the common,
|
---|
| 823 | generic and server methods. In addition, they support some methods for
|
---|
| 824 | accessing gopher-specific path components: $uri->gopher_type,
|
---|
| 825 | $uri->selector, $uri->search, $uri->string.
|
---|
| 826 |
|
---|
| 827 | =item B<http>:
|
---|
| 828 |
|
---|
| 829 | The I<http> URI scheme is specified in RFC 2616.
|
---|
| 830 | The scheme is used to reference resources hosted by HTTP servers.
|
---|
| 831 |
|
---|
| 832 | C<URI> objects belonging to the http scheme support the common,
|
---|
| 833 | generic and server methods.
|
---|
| 834 |
|
---|
| 835 | =item B<https>:
|
---|
| 836 |
|
---|
| 837 | The I<https> URI scheme is a Netscape invention which is commonly
|
---|
| 838 | implemented. The scheme is used to reference HTTP servers through SSL
|
---|
| 839 | connections. Its syntax is the same as http, but the default
|
---|
| 840 | port is different.
|
---|
| 841 |
|
---|
| 842 | =item B<ldap>:
|
---|
| 843 |
|
---|
| 844 | The I<ldap> URI scheme is specified in RFC 2255. LDAP is the
|
---|
| 845 | Lightweight Directory Access Protocol. An ldap URI describes an LDAP
|
---|
| 846 | search operation to perform to retrieve information from an LDAP
|
---|
| 847 | directory.
|
---|
| 848 |
|
---|
| 849 | C<URI> objects belonging to the ldap scheme support the common,
|
---|
| 850 | generic and server methods as well as ldap-specific methods: $uri->dn,
|
---|
| 851 | $uri->attributes, $uri->scope, $uri->filter, $uri->extensions. See
|
---|
| 852 | L<URI::ldap> for details.
|
---|
| 853 |
|
---|
| 854 | =item B<ldapi>:
|
---|
| 855 |
|
---|
| 856 | Like the I<ldap> URI scheme, but uses a UNIX domain socket. The
|
---|
| 857 | server methods are not supported, and the local socket path is
|
---|
| 858 | available as $uri->un_path. The I<ldapi> scheme is used by the
|
---|
| 859 | OpenLDAP package. There is no real specification for it, but it is
|
---|
| 860 | mentioned in various OpenLDAP manual pages.
|
---|
| 861 |
|
---|
| 862 | =item B<ldaps>:
|
---|
| 863 |
|
---|
| 864 | Like the I<ldap> URI scheme, but uses an SSL connection. This
|
---|
| 865 | scheme is deprecated, as the preferred way is to use the I<start_tls>
|
---|
| 866 | mechanism.
|
---|
| 867 |
|
---|
| 868 | =item B<mailto>:
|
---|
| 869 |
|
---|
| 870 | The I<mailto> URI scheme is specified in RFC 2368. The scheme was
|
---|
| 871 | originally used to designate the Internet mailing address of an
|
---|
| 872 | individual or service. It has (in RFC 2368) been extended to allow
|
---|
| 873 | setting of other mail header fields and the message body.
|
---|
| 874 |
|
---|
| 875 | C<URI> objects belonging to the mailto scheme support the common
|
---|
| 876 | methods and the generic query methods. In addition, they support the
|
---|
| 877 | following mailto-specific methods: $uri->to, $uri->headers.
|
---|
| 878 |
|
---|
| 879 | Note that the "[email protected]" part of a mailto is I<not> the
|
---|
| 880 | C<userinfo> and C<host> but instead the C<path>. This allows a
|
---|
| 881 | mailto URI to contain multiple comma separated email addresses.
|
---|
| 882 |
|
---|
| 883 | =item B<mms>:
|
---|
| 884 |
|
---|
| 885 | The I<mms> URL specification can be found at L<http://sdp.ppona.com/>.
|
---|
| 886 | C<URI> objects belonging to the mms scheme support the common,
|
---|
| 887 | generic, and server methods, with the exception of userinfo and
|
---|
| 888 | query-related sub-components.
|
---|
| 889 |
|
---|
| 890 | =item B<news>:
|
---|
| 891 |
|
---|
| 892 | The I<news>, I<nntp> and I<snews> URI schemes are specified in
|
---|
| 893 | <draft-gilman-news-url-01> and will hopefully be available as an RFC
|
---|
| 894 | 2396 based specification soon.
|
---|
| 895 |
|
---|
| 896 | C<URI> objects belonging to the news scheme support the common,
|
---|
| 897 | generic and server methods. In addition, they provide some methods to
|
---|
| 898 | access the path: $uri->group and $uri->message.
|
---|
| 899 |
|
---|
| 900 | =item B<nntp>:
|
---|
| 901 |
|
---|
| 902 | See I<news> scheme.
|
---|
| 903 |
|
---|
| 904 | =item B<pop>:
|
---|
| 905 |
|
---|
| 906 | The I<pop> URI scheme is specified in RFC 2384. The scheme is used to
|
---|
| 907 | reference a POP3 mailbox.
|
---|
| 908 |
|
---|
| 909 | C<URI> objects belonging to the pop scheme support the common, generic
|
---|
| 910 | and server methods. In addition, they provide two methods to access the
|
---|
| 911 | userinfo components: $uri->user and $uri->auth
|
---|
| 912 |
|
---|
| 913 | =item B<rlogin>:
|
---|
| 914 |
|
---|
| 915 | An old specification of the I<rlogin> URI scheme is found in RFC
|
---|
| 916 | 1738. C<URI> objects belonging to the rlogin scheme support the
|
---|
| 917 | common, generic and server methods.
|
---|
| 918 |
|
---|
| 919 | =item B<rtsp>:
|
---|
| 920 |
|
---|
| 921 | The I<rtsp> URL specification can be found in section 3.2 of RFC 2326.
|
---|
| 922 | C<URI> objects belonging to the rtsp scheme support the common,
|
---|
| 923 | generic, and server methods, with the exception of userinfo and
|
---|
| 924 | query-related sub-components.
|
---|
| 925 |
|
---|
| 926 | =item B<rtspu>:
|
---|
| 927 |
|
---|
| 928 | The I<rtspu> URI scheme is used to talk to RTSP servers over UDP
|
---|
| 929 | instead of TCP. The syntax is the same as rtsp.
|
---|
| 930 |
|
---|
| 931 | =item B<rsync>:
|
---|
| 932 |
|
---|
| 933 | Information about rsync is available from L<http://rsync.samba.org/>.
|
---|
| 934 | C<URI> objects belonging to the rsync scheme support the common,
|
---|
| 935 | generic and server methods. In addition, they provide methods to
|
---|
| 936 | access the userinfo sub-components: $uri->user and $uri->password.
|
---|
| 937 |
|
---|
| 938 | =item B<sip>:
|
---|
| 939 |
|
---|
| 940 | The I<sip> URI specification is described in sections 19.1 and 25
|
---|
| 941 | of RFC 3261. C<URI> objects belonging to the sip scheme support the
|
---|
| 942 | common, generic, and server methods with the exception of path related
|
---|
| 943 | sub-components. In addition, they provide two methods to get and set
|
---|
| 944 | I<sip> parameters: $uri->params_form and $uri->params.
|
---|
| 945 |
|
---|
| 946 | =item B<sips>:
|
---|
| 947 |
|
---|
| 948 | See I<sip> scheme. Its syntax is the same as sip, but the default
|
---|
| 949 | port is different.
|
---|
| 950 |
|
---|
| 951 | =item B<snews>:
|
---|
| 952 |
|
---|
| 953 | See I<news> scheme. Its syntax is the same as news, but the default
|
---|
| 954 | port is different.
|
---|
| 955 |
|
---|
| 956 | =item B<telnet>:
|
---|
| 957 |
|
---|
| 958 | An old specification of the I<telnet> URI scheme is found in RFC
|
---|
| 959 | 1738. C<URI> objects belonging to the telnet scheme support the
|
---|
| 960 | common, generic and server methods.
|
---|
| 961 |
|
---|
| 962 | =item B<tn3270>:
|
---|
| 963 |
|
---|
| 964 | These URIs are used like I<telnet> URIs but for connections to IBM
|
---|
| 965 | mainframes. C<URI> objects belonging to the tn3270 scheme support the
|
---|
| 966 | common, generic and server methods.
|
---|
| 967 |
|
---|
| 968 | =item B<ssh>:
|
---|
| 969 |
|
---|
| 970 | Information about ssh is available at L<http://www.openssh.com/>.
|
---|
| 971 | C<URI> objects belonging to the ssh scheme support the common,
|
---|
| 972 | generic and server methods. In addition, they provide methods to
|
---|
| 973 | access the userinfo sub-components: $uri->user and $uri->password.
|
---|
| 974 |
|
---|
| 975 | =item B<urn>:
|
---|
| 976 |
|
---|
| 977 | The syntax of Uniform Resource Names is specified in RFC 2141. C<URI>
|
---|
| 978 | objects belonging to the urn scheme provide the common methods, and also the
|
---|
| 979 | methods $uri->nid and $uri->nss, which return the Namespace Identifier
|
---|
| 980 | and the Namespace-Specific String respectively.
|
---|
| 981 |
|
---|
| 982 | The Namespace Identifier basically works like the Scheme identifier of
|
---|
| 983 | URIs, and further divides the URN namespace. Namespace Identifier
|
---|
| 984 | assignments are maintained at
|
---|
| 985 | L<http://www.iana.org/assignments/urn-namespaces>.
|
---|
| 986 |
|
---|
| 987 | Letter case is not significant for the Namespace Identifier. It is
|
---|
| 988 | always returned in lower case by the $uri->nid method. The $uri->_nid
|
---|
| 989 | method can be used if you want it in its original case.
|
---|
| 990 |
|
---|
| 991 | =item B<urn>:B<isbn>:
|
---|
| 992 |
|
---|
| 993 | The C<urn:isbn:> namespace contains International Standard Book
|
---|
| 994 | Numbers (ISBNs) and is described in RFC 3187. A C<URI> object belonging
|
---|
| 995 | to this namespace has the following extra methods (if the
|
---|
| 996 | Business::ISBN module is available): $uri->isbn,
|
---|
| 997 | $uri->isbn_publisher_code, $uri->isbn_group_code (formerly isbn_country_code,
|
---|
| 998 | which is still supported by issues a deprecation warning), $uri->isbn_as_ean.
|
---|
| 999 |
|
---|
| 1000 | =item B<urn>:B<oid>:
|
---|
| 1001 |
|
---|
| 1002 | The C<urn:oid:> namespace contains Object Identifiers (OIDs) and is
|
---|
| 1003 | described in RFC 3061. An object identifier consists of sequences of digits
|
---|
| 1004 | separated by dots. A C<URI> object belonging to this namespace has an
|
---|
| 1005 | additional method called $uri->oid that can be used to get/set the oid
|
---|
| 1006 | value. In a list context, oid numbers are returned as separate elements.
|
---|
| 1007 |
|
---|
| 1008 | =back
|
---|
| 1009 |
|
---|
| 1010 | =head1 CONFIGURATION VARIABLES
|
---|
| 1011 |
|
---|
| 1012 | The following configuration variables influence how the class and its
|
---|
| 1013 | methods behave:
|
---|
| 1014 |
|
---|
| 1015 | =over 4
|
---|
| 1016 |
|
---|
| 1017 | =item $URI::ABS_ALLOW_RELATIVE_SCHEME
|
---|
| 1018 |
|
---|
| 1019 | Some older parsers used to allow the scheme name to be present in the
|
---|
| 1020 | relative URL if it was the same as the base URL scheme. RFC 2396 says
|
---|
| 1021 | that this should be avoided, but you can enable this old behaviour by
|
---|
| 1022 | setting the $URI::ABS_ALLOW_RELATIVE_SCHEME variable to a TRUE value.
|
---|
| 1023 | The difference is demonstrated by the following examples:
|
---|
| 1024 |
|
---|
| 1025 | URI->new("http:foo")->abs("http://host/a/b")
|
---|
| 1026 | ==> "http:foo"
|
---|
| 1027 |
|
---|
| 1028 | local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1;
|
---|
| 1029 | URI->new("http:foo")->abs("http://host/a/b")
|
---|
| 1030 | ==> "http:/host/a/foo"
|
---|
| 1031 |
|
---|
| 1032 |
|
---|
| 1033 | =item $URI::ABS_REMOTE_LEADING_DOTS
|
---|
| 1034 |
|
---|
| 1035 | You can also have the abs() method ignore excess ".."
|
---|
| 1036 | segments in the relative URI by setting $URI::ABS_REMOTE_LEADING_DOTS
|
---|
| 1037 | to a TRUE value. The difference is demonstrated by the following
|
---|
| 1038 | examples:
|
---|
| 1039 |
|
---|
| 1040 | URI->new("../../../foo")->abs("http://host/a/b")
|
---|
| 1041 | ==> "http://host/../../foo"
|
---|
| 1042 |
|
---|
| 1043 | local $URI::ABS_REMOTE_LEADING_DOTS = 1;
|
---|
| 1044 | URI->new("../../../foo")->abs("http://host/a/b")
|
---|
| 1045 | ==> "http://host/foo"
|
---|
| 1046 |
|
---|
| 1047 | =item $URI::DEFAULT_QUERY_FORM_DELIMITER
|
---|
| 1048 |
|
---|
| 1049 | This value can be set to ";" to have the query form C<key=value> pairs
|
---|
| 1050 | delimited by ";" instead of "&" which is the default.
|
---|
| 1051 |
|
---|
| 1052 | =back
|
---|
| 1053 |
|
---|
| 1054 | =head1 BUGS
|
---|
| 1055 |
|
---|
| 1056 | There are some things that are not quite right:
|
---|
| 1057 |
|
---|
| 1058 | =over
|
---|
| 1059 |
|
---|
| 1060 | =item *
|
---|
| 1061 |
|
---|
| 1062 | Using regexp variables like $1 directly as arguments to the URI accessor methods
|
---|
| 1063 | does not work too well with current perl implementations. I would argue
|
---|
| 1064 | that this is actually a bug in perl. The workaround is to quote
|
---|
| 1065 | them. Example:
|
---|
| 1066 |
|
---|
| 1067 | /(...)/ || die;
|
---|
| 1068 | $u->query("$1");
|
---|
| 1069 |
|
---|
| 1070 |
|
---|
| 1071 | =item *
|
---|
| 1072 |
|
---|
| 1073 | The escaping (percent encoding) of chars in the 128 .. 255 range passed to the
|
---|
| 1074 | URI constructor or when setting URI parts using the accessor methods depend on
|
---|
| 1075 | the state of the internal UTF8 flag (see utf8::is_utf8) of the string passed.
|
---|
| 1076 | If the UTF8 flag is set the UTF-8 encoded version of the character is percent
|
---|
| 1077 | encoded. If the UTF8 flag isn't set the Latin-1 version (byte) of the
|
---|
| 1078 | character is percent encoded. This basically exposes the internal encoding of
|
---|
| 1079 | Perl strings.
|
---|
| 1080 |
|
---|
| 1081 | =back
|
---|
| 1082 |
|
---|
| 1083 | =head1 PARSING URIs WITH REGEXP
|
---|
| 1084 |
|
---|
| 1085 | As an alternative to this module, the following (official) regular
|
---|
| 1086 | expression can be used to decode a URI:
|
---|
| 1087 |
|
---|
| 1088 | my($scheme, $authority, $path, $query, $fragment) =
|
---|
| 1089 | $uri =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
|
---|
| 1090 |
|
---|
| 1091 | The C<URI::Split> module provides the function uri_split() as a
|
---|
| 1092 | readable alternative.
|
---|
| 1093 |
|
---|
| 1094 | =head1 SEE ALSO
|
---|
| 1095 |
|
---|
| 1096 | L<URI::file>, L<URI::WithBase>, L<URI::QueryParam>, L<URI::Escape>,
|
---|
| 1097 | L<URI::Split>, L<URI::Heuristic>
|
---|
| 1098 |
|
---|
| 1099 | RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax",
|
---|
| 1100 | Berners-Lee, Fielding, Masinter, August 1998.
|
---|
| 1101 |
|
---|
| 1102 | L<http://www.iana.org/assignments/uri-schemes>
|
---|
| 1103 |
|
---|
| 1104 | L<http://www.iana.org/assignments/urn-namespaces>
|
---|
| 1105 |
|
---|
| 1106 | L<http://www.w3.org/Addressing/>
|
---|
| 1107 |
|
---|
| 1108 | =head1 COPYRIGHT
|
---|
| 1109 |
|
---|
| 1110 | Copyright 1995-2009 Gisle Aas.
|
---|
| 1111 |
|
---|
| 1112 | Copyright 1995 Martijn Koster.
|
---|
| 1113 |
|
---|
| 1114 | This program is free software; you can redistribute it and/or modify
|
---|
| 1115 | it under the same terms as Perl itself.
|
---|
| 1116 |
|
---|
| 1117 | =head1 AUTHORS / ACKNOWLEDGMENTS
|
---|
| 1118 |
|
---|
| 1119 | This module is based on the C<URI::URL> module, which in turn was
|
---|
| 1120 | (distantly) based on the C<wwwurl.pl> code in the libwww-perl for
|
---|
| 1121 | perl4 developed by Roy Fielding, as part of the Arcadia project at the
|
---|
| 1122 | University of California, Irvine, with contributions from Brooks
|
---|
| 1123 | Cutter.
|
---|
| 1124 |
|
---|
| 1125 | C<URI::URL> was developed by Gisle Aas, Tim Bunce, Roy Fielding and
|
---|
| 1126 | Martijn Koster with input from other people on the libwww-perl mailing
|
---|
| 1127 | list.
|
---|
| 1128 |
|
---|
| 1129 | C<URI> and related subclasses was developed by Gisle Aas.
|
---|
| 1130 |
|
---|
| 1131 | =cut
|
---|