source: extensions/gsdl-video/trunk/installed/cmdline/lib/ruby/1.8/uri/common.rb@ 18425

Last change on this file since 18425 was 18425, checked in by davidb, 15 years ago

Video extension to Greenstone

File size: 17.2 KB
Line 
1# = uri/common.rb
2#
3# Author:: Akira Yamada <[email protected]>
4# Revision:: $Id: common.rb 11747 2007-02-15 02:41:45Z knu $
5# License::
6# You can redistribute it and/or modify it under the same term as Ruby.
7#
8
9module URI
10 module REGEXP
11 #
12 # Patterns used to parse URI's
13 #
14 module PATTERN
15 # :stopdoc:
16
17 # RFC 2396 (URI Generic Syntax)
18 # RFC 2732 (IPv6 Literal Addresses in URL's)
19 # RFC 2373 (IPv6 Addressing Architecture)
20
21 # alpha = lowalpha | upalpha
22 ALPHA = "a-zA-Z"
23 # alphanum = alpha | digit
24 ALNUM = "#{ALPHA}\\d"
25
26 # hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
27 # "a" | "b" | "c" | "d" | "e" | "f"
28 HEX = "a-fA-F\\d"
29 # escaped = "%" hex hex
30 ESCAPED = "%[#{HEX}]{2}"
31 # mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
32 # "(" | ")"
33 # unreserved = alphanum | mark
34 UNRESERVED = "-_.!~*'()#{ALNUM}"
35 # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
36 # "$" | ","
37 # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
38 # "$" | "," | "[" | "]" (RFC 2732)
39 RESERVED = ";/?:@&=+$,\\[\\]"
40
41 # uric = reserved | unreserved | escaped
42 URIC = "(?:[#{UNRESERVED}#{RESERVED}]|#{ESCAPED})"
43 # uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
44 # "&" | "=" | "+" | "$" | ","
45 URIC_NO_SLASH = "(?:[#{UNRESERVED};?:@&=+$,]|#{ESCAPED})"
46 # query = *uric
47 QUERY = "#{URIC}*"
48 # fragment = *uric
49 FRAGMENT = "#{URIC}*"
50
51 # domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
52 DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
53 # toplabel = alpha | alpha *( alphanum | "-" ) alphanum
54 TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
55 # hostname = *( domainlabel "." ) toplabel [ "." ]
56 HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
57
58 # RFC 2373, APPENDIX B:
59 # IPv6address = hexpart [ ":" IPv4address ]
60 # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
61 # hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
62 # hexseq = hex4 *( ":" hex4)
63 # hex4 = 1*4HEXDIG
64 #
65 # XXX: This definition has a flaw. "::" + IPv4address must be
66 # allowed too. Here is a replacement.
67 #
68 # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
69 IPV4ADDR = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
70 # hex4 = 1*4HEXDIG
71 HEX4 = "[#{HEX}]{1,4}"
72 # lastpart = hex4 | IPv4address
73 LASTPART = "(?:#{HEX4}|#{IPV4ADDR})"
74 # hexseq1 = *( hex4 ":" ) hex4
75 HEXSEQ1 = "(?:#{HEX4}:)*#{HEX4}"
76 # hexseq2 = *( hex4 ":" ) lastpart
77 HEXSEQ2 = "(?:#{HEX4}:)*#{LASTPART}"
78 # IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ]
79 IPV6ADDR = "(?:#{HEXSEQ2}|(?:#{HEXSEQ1})?::(?:#{HEXSEQ2})?)"
80
81 # IPv6prefix = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT
82 # unused
83
84 # ipv6reference = "[" IPv6address "]" (RFC 2732)
85 IPV6REF = "\\[#{IPV6ADDR}\\]"
86
87 # host = hostname | IPv4address
88 # host = hostname | IPv4address | IPv6reference (RFC 2732)
89 HOST = "(?:#{HOSTNAME}|#{IPV4ADDR}|#{IPV6REF})"
90 # port = *digit
91 PORT = '\d*'
92 # hostport = host [ ":" port ]
93 HOSTPORT = "#{HOST}(?::#{PORT})?"
94
95 # userinfo = *( unreserved | escaped |
96 # ";" | ":" | "&" | "=" | "+" | "$" | "," )
97 USERINFO = "(?:[#{UNRESERVED};:&=+$,]|#{ESCAPED})*"
98
99 # pchar = unreserved | escaped |
100 # ":" | "@" | "&" | "=" | "+" | "$" | ","
101 PCHAR = "(?:[#{UNRESERVED}:@&=+$,]|#{ESCAPED})"
102 # param = *pchar
103 PARAM = "#{PCHAR}*"
104 # segment = *pchar *( ";" param )
105 SEGMENT = "#{PCHAR}*(?:;#{PARAM})*"
106 # path_segments = segment *( "/" segment )
107 PATH_SEGMENTS = "#{SEGMENT}(?:/#{SEGMENT})*"
108
109 # server = [ [ userinfo "@" ] hostport ]
110 SERVER = "(?:#{USERINFO}@)?#{HOSTPORT}"
111 # reg_name = 1*( unreserved | escaped | "$" | "," |
112 # ";" | ":" | "@" | "&" | "=" | "+" )
113 REG_NAME = "(?:[#{UNRESERVED}$,;+@&=+]|#{ESCAPED})+"
114 # authority = server | reg_name
115 AUTHORITY = "(?:#{SERVER}|#{REG_NAME})"
116
117 # rel_segment = 1*( unreserved | escaped |
118 # ";" | "@" | "&" | "=" | "+" | "$" | "," )
119 REL_SEGMENT = "(?:[#{UNRESERVED};@&=+$,]|#{ESCAPED})+"
120
121 # scheme = alpha *( alpha | digit | "+" | "-" | "." )
122 SCHEME = "[#{ALPHA}][-+.#{ALPHA}\\d]*"
123
124 # abs_path = "/" path_segments
125 ABS_PATH = "/#{PATH_SEGMENTS}"
126 # rel_path = rel_segment [ abs_path ]
127 REL_PATH = "#{REL_SEGMENT}(?:#{ABS_PATH})?"
128 # net_path = "//" authority [ abs_path ]
129 NET_PATH = "//#{AUTHORITY}(?:#{ABS_PATH})?"
130
131 # hier_part = ( net_path | abs_path ) [ "?" query ]
132 HIER_PART = "(?:#{NET_PATH}|#{ABS_PATH})(?:\\?(?:#{QUERY}))?"
133 # opaque_part = uric_no_slash *uric
134 OPAQUE_PART = "#{URIC_NO_SLASH}#{URIC}*"
135
136 # absoluteURI = scheme ":" ( hier_part | opaque_part )
137 ABS_URI = "#{SCHEME}:(?:#{HIER_PART}|#{OPAQUE_PART})"
138 # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
139 REL_URI = "(?:#{NET_PATH}|#{ABS_PATH}|#{REL_PATH})(?:\\?#{QUERY})?"
140
141 # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
142 URI_REF = "(?:#{ABS_URI}|#{REL_URI})?(?:##{FRAGMENT})?"
143
144 # XXX:
145 X_ABS_URI = "
146 (#{PATTERN::SCHEME}): (?# 1: scheme)
147 (?:
148 (#{PATTERN::OPAQUE_PART}) (?# 2: opaque)
149 |
150 (?:(?:
151 //(?:
152 (?:(?:(#{PATTERN::USERINFO})@)? (?# 3: userinfo)
153 (?:(#{PATTERN::HOST})(?::(\\d*))?))?(?# 4: host, 5: port)
154 |
155 (#{PATTERN::REG_NAME}) (?# 6: registry)
156 )
157 |
158 (?!//)) (?# XXX: '//' is the mark for hostport)
159 (#{PATTERN::ABS_PATH})? (?# 7: path)
160 )(?:\\?(#{PATTERN::QUERY}))? (?# 8: query)
161 )
162 (?:\\#(#{PATTERN::FRAGMENT}))? (?# 9: fragment)
163 "
164 X_REL_URI = "
165 (?:
166 (?:
167 //
168 (?:
169 (?:(#{PATTERN::USERINFO})@)? (?# 1: userinfo)
170 (#{PATTERN::HOST})?(?::(\\d*))? (?# 2: host, 3: port)
171 |
172 (#{PATTERN::REG_NAME}) (?# 4: registry)
173 )
174 )
175 |
176 (#{PATTERN::REL_SEGMENT}) (?# 5: rel_segment)
177 )?
178 (#{PATTERN::ABS_PATH})? (?# 6: abs_path)
179 (?:\\?(#{PATTERN::QUERY}))? (?# 7: query)
180 (?:\\#(#{PATTERN::FRAGMENT}))? (?# 8: fragment)
181 "
182 # :startdoc:
183 end # PATTERN
184
185 # :stopdoc:
186
187 # for URI::split
188 ABS_URI = Regexp.new('^' + PATTERN::X_ABS_URI + '$', #'
189 Regexp::EXTENDED, 'N').freeze
190 REL_URI = Regexp.new('^' + PATTERN::X_REL_URI + '$', #'
191 Regexp::EXTENDED, 'N').freeze
192
193 # for URI::extract
194 URI_REF = Regexp.new(PATTERN::URI_REF, false, 'N').freeze
195 ABS_URI_REF = Regexp.new(PATTERN::X_ABS_URI, Regexp::EXTENDED, 'N').freeze
196 REL_URI_REF = Regexp.new(PATTERN::X_REL_URI, Regexp::EXTENDED, 'N').freeze
197
198 # for URI::escape/unescape
199 ESCAPED = Regexp.new(PATTERN::ESCAPED, false, 'N').freeze
200 UNSAFE = Regexp.new("[^#{PATTERN::UNRESERVED}#{PATTERN::RESERVED}]",
201 false, 'N').freeze
202
203 # for Generic#initialize
204 SCHEME = Regexp.new("^#{PATTERN::SCHEME}$", false, 'N').freeze #"
205 USERINFO = Regexp.new("^#{PATTERN::USERINFO}$", false, 'N').freeze #"
206 HOST = Regexp.new("^#{PATTERN::HOST}$", false, 'N').freeze #"
207 PORT = Regexp.new("^#{PATTERN::PORT}$", false, 'N').freeze #"
208 OPAQUE = Regexp.new("^#{PATTERN::OPAQUE_PART}$", false, 'N').freeze #"
209 REGISTRY = Regexp.new("^#{PATTERN::REG_NAME}$", false, 'N').freeze #"
210 ABS_PATH = Regexp.new("^#{PATTERN::ABS_PATH}$", false, 'N').freeze #"
211 REL_PATH = Regexp.new("^#{PATTERN::REL_PATH}$", false, 'N').freeze #"
212 QUERY = Regexp.new("^#{PATTERN::QUERY}$", false, 'N').freeze #"
213 FRAGMENT = Regexp.new("^#{PATTERN::FRAGMENT}$", false, 'N').freeze #"
214 # :startdoc:
215 end # REGEXP
216
217 module Util # :nodoc:
218 def make_components_hash(klass, array_hash)
219 tmp = {}
220 if array_hash.kind_of?(Array) &&
221 array_hash.size == klass.component.size - 1
222 klass.component[1..-1].each_index do |i|
223 begin
224 tmp[klass.component[i + 1]] = array_hash[i].clone
225 rescue TypeError
226 tmp[klass.component[i + 1]] = array_hash[i]
227 end
228 end
229
230 elsif array_hash.kind_of?(Hash)
231 array_hash.each do |key, value|
232 begin
233 tmp[key] = value.clone
234 rescue TypeError
235 tmp[key] = value
236 end
237 end
238 else
239 raise ArgumentError,
240 "expected Array of or Hash of components of #{klass.to_s} (#{klass.component[1..-1].join(', ')})"
241 end
242 tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase
243
244 return tmp
245 end
246 module_function :make_components_hash
247 end
248
249 module Escape
250 include REGEXP
251
252 #
253 # == Synopsis
254 #
255 # URI.escape(str [, unsafe])
256 #
257 # == Args
258 #
259 # +str+::
260 # String to replaces in.
261 # +unsafe+::
262 # Regexp that matches all symbols that must be replaced with codes.
263 # By default uses <tt>REGEXP::UNSAFE</tt>.
264 # When this argument is a String, it represents a character set.
265 #
266 # == Description
267 #
268 # Escapes the string, replacing all unsafe characters with codes.
269 #
270 # == Usage
271 #
272 # require 'uri'
273 #
274 # enc_uri = URI.escape("http://example.com/?a=\11\15")
275 # p enc_uri
276 # # => "http://example.com/?a=%09%0D"
277 #
278 # p URI.unescape(enc_uri)
279 # # => "http://example.com/?a=\t\r"
280 #
281 # p URI.escape("@?@!", "!?")
282 # # => "@%3F@%21"
283 #
284 def escape(str, unsafe = UNSAFE)
285 unless unsafe.kind_of?(Regexp)
286 # perhaps unsafe is String object
287 unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false, 'N')
288 end
289 str.gsub(unsafe) do |us|
290 tmp = ''
291 us.each_byte do |uc|
292 tmp << sprintf('%%%02X', uc)
293 end
294 tmp
295 end
296 end
297 alias encode escape
298 #
299 # == Synopsis
300 #
301 # URI.unescape(str)
302 #
303 # == Args
304 #
305 # +str+::
306 # Unescapes the string.
307 #
308 # == Usage
309 #
310 # require 'uri'
311 #
312 # enc_uri = URI.escape("http://example.com/?a=\11\15")
313 # p enc_uri
314 # # => "http://example.com/?a=%09%0D"
315 #
316 # p URI.unescape(enc_uri)
317 # # => "http://example.com/?a=\t\r"
318 #
319 def unescape(str)
320 str.gsub(ESCAPED) do
321 $&[1,2].hex.chr
322 end
323 end
324 alias decode unescape
325 end
326
327 include REGEXP
328 extend Escape
329
330 @@schemes = {}
331
332 #
333 # Base class for all URI exceptions.
334 #
335 class Error < StandardError; end
336 #
337 # Not a URI.
338 #
339 class InvalidURIError < Error; end
340 #
341 # Not a URI component.
342 #
343 class InvalidComponentError < Error; end
344 #
345 # URI is valid, bad usage is not.
346 #
347 class BadURIError < Error; end
348
349 #
350 # == Synopsis
351 #
352 # URI::split(uri)
353 #
354 # == Args
355 #
356 # +uri+::
357 # String with URI.
358 #
359 # == Description
360 #
361 # Splits the string on following parts and returns array with result:
362 #
363 # * Scheme
364 # * Userinfo
365 # * Host
366 # * Port
367 # * Registry
368 # * Path
369 # * Opaque
370 # * Query
371 # * Fragment
372 #
373 # == Usage
374 #
375 # require 'uri'
376 #
377 # p URI.split("http://www.ruby-lang.org/")
378 # # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]
379 #
380 def self.split(uri)
381 case uri
382 when ''
383 # null uri
384
385 when ABS_URI
386 scheme, opaque, userinfo, host, port,
387 registry, path, query, fragment = $~[1..-1]
388
389 # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
390
391 # absoluteURI = scheme ":" ( hier_part | opaque_part )
392 # hier_part = ( net_path | abs_path ) [ "?" query ]
393 # opaque_part = uric_no_slash *uric
394
395 # abs_path = "/" path_segments
396 # net_path = "//" authority [ abs_path ]
397
398 # authority = server | reg_name
399 # server = [ [ userinfo "@" ] hostport ]
400
401 if !scheme
402 raise InvalidURIError,
403 "bad URI(absolute but no scheme): #{uri}"
404 end
405 if !opaque && (!path && (!host && !registry))
406 raise InvalidURIError,
407 "bad URI(absolute but no path): #{uri}"
408 end
409
410 when REL_URI
411 scheme = nil
412 opaque = nil
413
414 userinfo, host, port, registry,
415 rel_segment, abs_path, query, fragment = $~[1..-1]
416 if rel_segment && abs_path
417 path = rel_segment + abs_path
418 elsif rel_segment
419 path = rel_segment
420 elsif abs_path
421 path = abs_path
422 end
423
424 # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
425
426 # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
427
428 # net_path = "//" authority [ abs_path ]
429 # abs_path = "/" path_segments
430 # rel_path = rel_segment [ abs_path ]
431
432 # authority = server | reg_name
433 # server = [ [ userinfo "@" ] hostport ]
434
435 else
436 raise InvalidURIError, "bad URI(is not URI?): #{uri}"
437 end
438
439 path = '' if !path && !opaque # (see RFC2396 Section 5.2)
440 ret = [
441 scheme,
442 userinfo, host, port, # X
443 registry, # X
444 path, # Y
445 opaque, # Y
446 query,
447 fragment
448 ]
449 return ret
450 end
451
452 #
453 # == Synopsis
454 #
455 # URI::parse(uri_str)
456 #
457 # == Args
458 #
459 # +uri_str+::
460 # String with URI.
461 #
462 # == Description
463 #
464 # Creates one of the URI's subclasses instance from the string.
465 #
466 # == Raises
467 #
468 # URI::InvalidURIError
469 # Raised if URI given is not a correct one.
470 #
471 # == Usage
472 #
473 # require 'uri'
474 #
475 # uri = URI.parse("http://www.ruby-lang.org/")
476 # p uri
477 # # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
478 # p uri.scheme
479 # # => "http"
480 # p uri.host
481 # # => "www.ruby-lang.org"
482 #
483 def self.parse(uri)
484 scheme, userinfo, host, port,
485 registry, path, opaque, query, fragment = self.split(uri)
486
487 if scheme && @@schemes.include?(scheme.upcase)
488 @@schemes[scheme.upcase].new(scheme, userinfo, host, port,
489 registry, path, opaque, query,
490 fragment)
491 else
492 Generic.new(scheme, userinfo, host, port,
493 registry, path, opaque, query,
494 fragment)
495 end
496 end
497
498 #
499 # == Synopsis
500 #
501 # URI::join(str[, str, ...])
502 #
503 # == Args
504 #
505 # +str+::
506 # String(s) to work with
507 #
508 # == Description
509 #
510 # Joins URIs.
511 #
512 # == Usage
513 #
514 # require 'uri'
515 #
516 # p URI.join("http://localhost/","main.rbx")
517 # # => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>
518 #
519 def self.join(*str)
520 u = self.parse(str[0])
521 str[1 .. -1].each do |x|
522 u = u.merge(x)
523 end
524 u
525 end
526
527 #
528 # == Synopsis
529 #
530 # URI::extract(str[, schemes][,&blk])
531 #
532 # == Args
533 #
534 # +str+::
535 # String to extract URIs from.
536 # +schemes+::
537 # Limit URI matching to a specific schemes.
538 #
539 # == Description
540 #
541 # Extracts URIs from a string. If block given, iterates through all matched URIs.
542 # Returns nil if block given or array with matches.
543 #
544 # == Usage
545 #
546 # require "uri"
547 #
548 # URI.extract("text here http://foo.example.org/bla and here mailto:[email protected] and here also.")
549 # # => ["http://foo.example.com/bla", "mailto:[email protected]"]
550 #
551 def self.extract(str, schemes = nil, &block)
552 if block_given?
553 str.scan(regexp(schemes)) { yield $& }
554 nil
555 else
556 result = []
557 str.scan(regexp(schemes)) { result.push $& }
558 result
559 end
560 end
561
562 #
563 # == Synopsis
564 #
565 # URI::regexp([match_schemes])
566 #
567 # == Args
568 #
569 # +match_schemes+::
570 # Array of schemes. If given, resulting regexp matches to URIs
571 # whose scheme is one of the match_schemes.
572 #
573 # == Description
574 # Returns a Regexp object which matches to URI-like strings.
575 # The Regexp object returned by this method includes arbitrary
576 # number of capture group (parentheses). Never rely on it's number.
577 #
578 # == Usage
579 #
580 # require 'uri'
581 #
582 # # extract first URI from html_string
583 # html_string.slice(URI.regexp)
584 #
585 # # remove ftp URIs
586 # html_string.sub(URI.regexp(['ftp'])
587 #
588 # # You should not rely on the number of parentheses
589 # html_string.scan(URI.regexp) do |*matches|
590 # p $&
591 # end
592 #
593 def self.regexp(schemes = nil)
594 unless schemes
595 ABS_URI_REF
596 else
597 /(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn
598 end
599 end
600
601end
602
603module Kernel
604 # alias for URI.parse.
605 #
606 # This method is introduced at 1.8.2.
607 def URI(uri_str) # :doc:
608 URI.parse(uri_str)
609 end
610 module_function :URI
611end
Note: See TracBrowser for help on using the repository browser.