Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: extensions/gsdl-video/trunk/installed/cmdline/lib/ruby/1.8/uri/common.rb@ 18425

Last change on this file since 18425 was 18425, checked in by davidb, 15 years ago
Video extension to Greenstone
File size: 17.2 KB

Line
1	# = uri/common.rb
2	#
3	# Author:: Akira Yamada <[email protected]>
4	# Revision:: $Id: common.rb 11747 2007-02-15 02:41:45Z knu $
5	# License::
6	# You can redistribute it and/or modify it under the same term as Ruby.
7	#
8
9	module URI
10	module REGEXP
11	#
12	# Patterns used to parse URI's
13	#
14	module PATTERN
15	# :stopdoc:
16
17	# RFC 2396 (URI Generic Syntax)
18	# RFC 2732 (IPv6 Literal Addresses in URL's)
19	# RFC 2373 (IPv6 Addressing Architecture)
20
21	# alpha = lowalpha \| upalpha
22	ALPHA = "a-zA-Z"
23	# alphanum = alpha \| digit
24	ALNUM = "#{ALPHA}\\d"
25
26	# hex = digit \| "A" \| "B" \| "C" \| "D" \| "E" \| "F" \|
27	# "a" \| "b" \| "c" \| "d" \| "e" \| "f"
28	HEX = "a-fA-F\\d"
29	# escaped = "%" hex hex
30	ESCAPED = "%[#{HEX}]{2}"
31	# mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \|
32	# "(" \| ")"
33	# unreserved = alphanum \| mark
34	UNRESERVED = "-_.!~*'()#{ALNUM}"
35	# reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
36	# "$" \| ","
37	# reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
38	# "$" \| "," \| "[" \| "]" (RFC 2732)
39	RESERVED = ";/?:@&=+$,\\[\\]"
40
41	# uric = reserved \| unreserved \| escaped
42	URIC = "(?:[#{UNRESERVED}#{RESERVED}]\|#{ESCAPED})"
43	# uric_no_slash = unreserved \| escaped \| ";" \| "?" \| ":" \| "@" \|
44	# "&" \| "=" \| "+" \| "$" \| ","
45	URIC_NO_SLASH = "(?:[#{UNRESERVED};?:@&=+$,]\|#{ESCAPED})"
46	# query = *uric
47	QUERY = "#{URIC}*"
48	# fragment = *uric
49	FRAGMENT = "#{URIC}*"
50
51	# domainlabel = alphanum \| alphanum *( alphanum \| "-" ) alphanum
52	DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
53	# toplabel = alpha \| alpha *( alphanum \| "-" ) alphanum
54	TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
55	# hostname = *( domainlabel "." ) toplabel [ "." ]
56	HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
57
58	# RFC 2373, APPENDIX B:
59	# IPv6address = hexpart [ ":" IPv4address ]
60	# IPv4address = 13DIGIT "." 13DIGIT "." 13DIGIT "." 13DIGIT
61	# hexpart = hexseq \| hexseq "::" [ hexseq ] \| "::" [ hexseq ]
62	# hexseq = hex4 *( ":" hex4)
63	# hex4 = 1*4HEXDIG
64	#
65	# XXX: This definition has a flaw. "::" + IPv4address must be
66	# allowed too. Here is a replacement.
67	#
68	# IPv4address = 13DIGIT "." 13DIGIT "." 13DIGIT "." 13DIGIT
69	IPV4ADDR = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
70	# hex4 = 1*4HEXDIG
71	HEX4 = "[#{HEX}]{1,4}"
72	# lastpart = hex4 \| IPv4address
73	LASTPART = "(?:#{HEX4}\|#{IPV4ADDR})"
74	# hexseq1 = *( hex4 ":" ) hex4
75	HEXSEQ1 = "(?:#{HEX4}:)*#{HEX4}"
76	# hexseq2 = *( hex4 ":" ) lastpart
77	HEXSEQ2 = "(?:#{HEX4}:)*#{LASTPART}"
78	# IPv6address = hexseq2 \| [ hexseq1 ] "::" [ hexseq2 ]
79	IPV6ADDR = "(?:#{HEXSEQ2}\|(?:#{HEXSEQ1})?::(?:#{HEXSEQ2})?)"
80
81	# IPv6prefix = ( hexseq1 \| [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT
82	# unused
83
84	# ipv6reference = "[" IPv6address "]" (RFC 2732)
85	IPV6REF = "\\[#{IPV6ADDR}\\]"
86
87	# host = hostname \| IPv4address
88	# host = hostname \| IPv4address \| IPv6reference (RFC 2732)
89	HOST = "(?:#{HOSTNAME}\|#{IPV4ADDR}\|#{IPV6REF})"
90	# port = *digit
91	PORT = '\d*'
92	# hostport = host [ ":" port ]
93	HOSTPORT = "#{HOST}(?::#{PORT})?"
94
95	# userinfo = *( unreserved \| escaped \|
96	# ";" \| ":" \| "&" \| "=" \| "+" \| "$" \| "," )
97	USERINFO = "(?:[#{UNRESERVED};:&=+$,]\|#{ESCAPED})*"
98
99	# pchar = unreserved \| escaped \|
100	# ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| ","
101	PCHAR = "(?:[#{UNRESERVED}:@&=+$,]\|#{ESCAPED})"
102	# param = *pchar
103	PARAM = "#{PCHAR}*"
104	# segment = pchar ( ";" param )
105	SEGMENT = "#{PCHAR}(?:;#{PARAM})"
106	# path_segments = segment *( "/" segment )
107	PATH_SEGMENTS = "#{SEGMENT}(?:/#{SEGMENT})*"
108
109	# server = [ [ userinfo "@" ] hostport ]
110	SERVER = "(?:#{USERINFO}@)?#{HOSTPORT}"
111	# reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
112	# ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
113	REG_NAME = "(?:[#{UNRESERVED}$,;+@&=+]\|#{ESCAPED})+"
114	# authority = server \| reg_name
115	AUTHORITY = "(?:#{SERVER}\|#{REG_NAME})"
116
117	# rel_segment = 1*( unreserved \| escaped \|
118	# ";" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," )
119	REL_SEGMENT = "(?:[#{UNRESERVED};@&=+$,]\|#{ESCAPED})+"
120
121	# scheme = alpha *( alpha \| digit \| "+" \| "-" \| "." )
122	SCHEME = "[#{ALPHA}][-+.#{ALPHA}\\d]*"
123
124	# abs_path = "/" path_segments
125	ABS_PATH = "/#{PATH_SEGMENTS}"
126	# rel_path = rel_segment [ abs_path ]
127	REL_PATH = "#{REL_SEGMENT}(?:#{ABS_PATH})?"
128	# net_path = "//" authority [ abs_path ]
129	NET_PATH = "//#{AUTHORITY}(?:#{ABS_PATH})?"
130
131	# hier_part = ( net_path \| abs_path ) [ "?" query ]
132	HIER_PART = "(?:#{NET_PATH}\|#{ABS_PATH})(?:\\?(?:#{QUERY}))?"
133	# opaque_part = uric_no_slash *uric
134	OPAQUE_PART = "#{URIC_NO_SLASH}#{URIC}*"
135
136	# absoluteURI = scheme ":" ( hier_part \| opaque_part )
137	ABS_URI = "#{SCHEME}:(?:#{HIER_PART}\|#{OPAQUE_PART})"
138	# relativeURI = ( net_path \| abs_path \| rel_path ) [ "?" query ]
139	REL_URI = "(?:#{NET_PATH}\|#{ABS_PATH}\|#{REL_PATH})(?:\\?#{QUERY})?"
140
141	# URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
142	URI_REF = "(?:#{ABS_URI}\|#{REL_URI})?(?:##{FRAGMENT})?"
143
144	# XXX:
145	X_ABS_URI = "
146	(#{PATTERN::SCHEME}): (?# 1: scheme)
147	(?:
148	(#{PATTERN::OPAQUE_PART}) (?# 2: opaque)
149	\|
150	(?:(?:
151	//(?:
152	(?:(?:(#{PATTERN::USERINFO})@)? (?# 3: userinfo)
153	(?:(#{PATTERN::HOST})(?::(\\d*))?))?(?# 4: host, 5: port)
154	\|
155	(#{PATTERN::REG_NAME}) (?# 6: registry)
156	)
157	\|
158	(?!//)) (?# XXX: '//' is the mark for hostport)
159	(#{PATTERN::ABS_PATH})? (?# 7: path)
160	)(?:\\?(#{PATTERN::QUERY}))? (?# 8: query)
161	)
162	(?:\\#(#{PATTERN::FRAGMENT}))? (?# 9: fragment)
163	"
164	X_REL_URI = "
165	(?:
166	(?:
167	//
168	(?:
169	(?:(#{PATTERN::USERINFO})@)? (?# 1: userinfo)
170	(#{PATTERN::HOST})?(?::(\\d*))? (?# 2: host, 3: port)
171	\|
172	(#{PATTERN::REG_NAME}) (?# 4: registry)
173	)
174	)
175	\|
176	(#{PATTERN::REL_SEGMENT}) (?# 5: rel_segment)
177	)?
178	(#{PATTERN::ABS_PATH})? (?# 6: abs_path)
179	(?:\\?(#{PATTERN::QUERY}))? (?# 7: query)
180	(?:\\#(#{PATTERN::FRAGMENT}))? (?# 8: fragment)
181	"
182	# :startdoc:
183	end # PATTERN
184
185	# :stopdoc:
186
187	# for URI::split
188	ABS_URI = Regexp.new('^' + PATTERN::X_ABS_URI + '$', #'
189	Regexp::EXTENDED, 'N').freeze
190	REL_URI = Regexp.new('^' + PATTERN::X_REL_URI + '$', #'
191	Regexp::EXTENDED, 'N').freeze
192
193	# for URI::extract
194	URI_REF = Regexp.new(PATTERN::URI_REF, false, 'N').freeze
195	ABS_URI_REF = Regexp.new(PATTERN::X_ABS_URI, Regexp::EXTENDED, 'N').freeze
196	REL_URI_REF = Regexp.new(PATTERN::X_REL_URI, Regexp::EXTENDED, 'N').freeze
197
198	# for URI::escape/unescape
199	ESCAPED = Regexp.new(PATTERN::ESCAPED, false, 'N').freeze
200	UNSAFE = Regexp.new("[^#{PATTERN::UNRESERVED}#{PATTERN::RESERVED}]",
201	false, 'N').freeze
202
203	# for Generic#initialize
204	SCHEME = Regexp.new("^#{PATTERN::SCHEME}$", false, 'N').freeze #"
205	USERINFO = Regexp.new("^#{PATTERN::USERINFO}$", false, 'N').freeze #"
206	HOST = Regexp.new("^#{PATTERN::HOST}$", false, 'N').freeze #"
207	PORT = Regexp.new("^#{PATTERN::PORT}$", false, 'N').freeze #"
208	OPAQUE = Regexp.new("^#{PATTERN::OPAQUE_PART}$", false, 'N').freeze #"
209	REGISTRY = Regexp.new("^#{PATTERN::REG_NAME}$", false, 'N').freeze #"
210	ABS_PATH = Regexp.new("^#{PATTERN::ABS_PATH}$", false, 'N').freeze #"
211	REL_PATH = Regexp.new("^#{PATTERN::REL_PATH}$", false, 'N').freeze #"
212	QUERY = Regexp.new("^#{PATTERN::QUERY}$", false, 'N').freeze #"
213	FRAGMENT = Regexp.new("^#{PATTERN::FRAGMENT}$", false, 'N').freeze #"
214	# :startdoc:
215	end # REGEXP
216
217	module Util # :nodoc:
218	def make_components_hash(klass, array_hash)
219	tmp = {}
220	if array_hash.kind_of?(Array) &&
221	array_hash.size == klass.component.size - 1
222	klass.component[1..-1].each_index do \|i\|
223	begin
224	tmp[klass.component[i + 1]] = array_hash[i].clone
225	rescue TypeError
226	tmp[klass.component[i + 1]] = array_hash[i]
227	end
228	end
229
230	elsif array_hash.kind_of?(Hash)
231	array_hash.each do \|key, value\|
232	begin
233	tmp[key] = value.clone
234	rescue TypeError
235	tmp[key] = value
236	end
237	end
238	else
239	raise ArgumentError,
240	"expected Array of or Hash of components of #{klass.to_s} (#{klass.component[1..-1].join(', ')})"
241	end
242	tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase
243
244	return tmp
245	end
246	module_function :make_components_hash
247	end
248
249	module Escape
250	include REGEXP
251
252	#
253	# == Synopsis
254	#
255	# URI.escape(str [, unsafe])
256	#
257	# == Args
258	#
259	# +str+::
260	# String to replaces in.
261	# +unsafe+::
262	# Regexp that matches all symbols that must be replaced with codes.
263	# By default uses <tt>REGEXP::UNSAFE</tt>.
264	# When this argument is a String, it represents a character set.
265	#
266	# == Description
267	#
268	# Escapes the string, replacing all unsafe characters with codes.
269	#
270	# == Usage
271	#
272	# require 'uri'
273	#
274	# enc_uri = URI.escape("http://example.com/?a=\11\15")
275	# p enc_uri
276	# # => "http://example.com/?a=%09%0D"
277	#
278	# p URI.unescape(enc_uri)
279	# # => "http://example.com/?a=\t\r"
280	#
281	# p URI.escape("@?@!", "!?")
282	# # => "@%3F@%21"
283	#
284	def escape(str, unsafe = UNSAFE)
285	unless unsafe.kind_of?(Regexp)
286	# perhaps unsafe is String object
287	unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false, 'N')
288	end
289	str.gsub(unsafe) do \|us\|
290	tmp = ''
291	us.each_byte do \|uc\|
292	tmp << sprintf('%%%02X', uc)
293	end
294	tmp
295	end
296	end
297	alias encode escape
298	#
299	# == Synopsis
300	#
301	# URI.unescape(str)
302	#
303	# == Args
304	#
305	# +str+::
306	# Unescapes the string.
307	#
308	# == Usage
309	#
310	# require 'uri'
311	#
312	# enc_uri = URI.escape("http://example.com/?a=\11\15")
313	# p enc_uri
314	# # => "http://example.com/?a=%09%0D"
315	#
316	# p URI.unescape(enc_uri)
317	# # => "http://example.com/?a=\t\r"
318	#
319	def unescape(str)
320	str.gsub(ESCAPED) do
321	$&[1,2].hex.chr
322	end
323	end
324	alias decode unescape
325	end
326
327	include REGEXP
328	extend Escape
329
330	@@schemes = {}
331
332	#
333	# Base class for all URI exceptions.
334	#
335	class Error < StandardError; end
336	#
337	# Not a URI.
338	#
339	class InvalidURIError < Error; end
340	#
341	# Not a URI component.
342	#
343	class InvalidComponentError < Error; end
344	#
345	# URI is valid, bad usage is not.
346	#
347	class BadURIError < Error; end
348
349	#
350	# == Synopsis
351	#
352	# URI::split(uri)
353	#
354	# == Args
355	#
356	# +uri+::
357	# String with URI.
358	#
359	# == Description
360	#
361	# Splits the string on following parts and returns array with result:
362	#
363	# * Scheme
364	# * Userinfo
365	# * Host
366	# * Port
367	# * Registry
368	# * Path
369	# * Opaque
370	# * Query
371	# * Fragment
372	#
373	# == Usage
374	#
375	# require 'uri'
376	#
377	# p URI.split("http://www.ruby-lang.org/")
378	# # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]
379	#
380	def self.split(uri)
381	case uri
382	when ''
383	# null uri
384
385	when ABS_URI
386	scheme, opaque, userinfo, host, port,
387	registry, path, query, fragment = $~[1..-1]
388
389	# URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
390
391	# absoluteURI = scheme ":" ( hier_part \| opaque_part )
392	# hier_part = ( net_path \| abs_path ) [ "?" query ]
393	# opaque_part = uric_no_slash *uric
394
395	# abs_path = "/" path_segments
396	# net_path = "//" authority [ abs_path ]
397
398	# authority = server \| reg_name
399	# server = [ [ userinfo "@" ] hostport ]
400
401	if !scheme
402	raise InvalidURIError,
403	"bad URI(absolute but no scheme): #{uri}"
404	end
405	if !opaque && (!path && (!host && !registry))
406	raise InvalidURIError,
407	"bad URI(absolute but no path): #{uri}"
408	end
409
410	when REL_URI
411	scheme = nil
412	opaque = nil
413
414	userinfo, host, port, registry,
415	rel_segment, abs_path, query, fragment = $~[1..-1]
416	if rel_segment && abs_path
417	path = rel_segment + abs_path
418	elsif rel_segment
419	path = rel_segment
420	elsif abs_path
421	path = abs_path
422	end
423
424	# URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
425
426	# relativeURI = ( net_path \| abs_path \| rel_path ) [ "?" query ]
427
428	# net_path = "//" authority [ abs_path ]
429	# abs_path = "/" path_segments
430	# rel_path = rel_segment [ abs_path ]
431
432	# authority = server \| reg_name
433	# server = [ [ userinfo "@" ] hostport ]
434
435	else
436	raise InvalidURIError, "bad URI(is not URI?): #{uri}"
437	end
438
439	path = '' if !path && !opaque # (see RFC2396 Section 5.2)
440	ret = [
441	scheme,
442	userinfo, host, port, # X
443	registry, # X
444	path, # Y
445	opaque, # Y
446	query,
447	fragment
448	]
449	return ret
450	end
451
452	#
453	# == Synopsis
454	#
455	# URI::parse(uri_str)
456	#
457	# == Args
458	#
459	# +uri_str+::
460	# String with URI.
461	#
462	# == Description
463	#
464	# Creates one of the URI's subclasses instance from the string.
465	#
466	# == Raises
467	#
468	# URI::InvalidURIError
469	# Raised if URI given is not a correct one.
470	#
471	# == Usage
472	#
473	# require 'uri'
474	#
475	# uri = URI.parse("http://www.ruby-lang.org/")
476	# p uri
477	# # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
478	# p uri.scheme
479	# # => "http"
480	# p uri.host
481	# # => "www.ruby-lang.org"
482	#
483	def self.parse(uri)
484	scheme, userinfo, host, port,
485	registry, path, opaque, query, fragment = self.split(uri)
486
487	if scheme && @@schemes.include?(scheme.upcase)
488	@@schemes[scheme.upcase].new(scheme, userinfo, host, port,
489	registry, path, opaque, query,
490	fragment)
491	else
492	Generic.new(scheme, userinfo, host, port,
493	registry, path, opaque, query,
494	fragment)
495	end
496	end
497
498	#
499	# == Synopsis
500	#
501	# URI::join(str[, str, ...])
502	#
503	# == Args
504	#
505	# +str+::
506	# String(s) to work with
507	#
508	# == Description
509	#
510	# Joins URIs.
511	#
512	# == Usage
513	#
514	# require 'uri'
515	#
516	# p URI.join("http://localhost/","main.rbx")
517	# # => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>
518	#
519	def self.join(*str)
520	u = self.parse(str[0])
521	str[1 .. -1].each do \|x\|
522	u = u.merge(x)
523	end
524	u
525	end
526
527	#
528	# == Synopsis
529	#
530	# URI::extract(str[, schemes][,&blk])
531	#
532	# == Args
533	#
534	# +str+::
535	# String to extract URIs from.
536	# +schemes+::
537	# Limit URI matching to a specific schemes.
538	#
539	# == Description
540	#
541	# Extracts URIs from a string. If block given, iterates through all matched URIs.
542	# Returns nil if block given or array with matches.
543	#
544	# == Usage
545	#
546	# require "uri"
547	#
548	# URI.extract("text here http://foo.example.org/bla and here mailto:[email protected] and here also.")
549	# # => ["http://foo.example.com/bla", "mailto:[email protected]"]
550	#
551	def self.extract(str, schemes = nil, &block)
552	if block_given?
553	str.scan(regexp(schemes)) { yield $& }
554	nil
555	else
556	result = []
557	str.scan(regexp(schemes)) { result.push $& }
558	result
559	end
560	end
561
562	#
563	# == Synopsis
564	#
565	# URI::regexp([match_schemes])
566	#
567	# == Args
568	#
569	# +match_schemes+::
570	# Array of schemes. If given, resulting regexp matches to URIs
571	# whose scheme is one of the match_schemes.
572	#
573	# == Description
574	# Returns a Regexp object which matches to URI-like strings.
575	# The Regexp object returned by this method includes arbitrary
576	# number of capture group (parentheses). Never rely on it's number.
577	#
578	# == Usage
579	#
580	# require 'uri'
581	#
582	# # extract first URI from html_string
583	# html_string.slice(URI.regexp)
584	#
585	# # remove ftp URIs
586	# html_string.sub(URI.regexp(['ftp'])
587	#
588	# # You should not rely on the number of parentheses
589	# html_string.scan(URI.regexp) do \|*matches\|
590	# p $&
591	# end
592	#
593	def self.regexp(schemes = nil)
594	unless schemes
595	ABS_URI_REF
596	else
597	/(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn
598	end
599	end
600
601	end
602
603	module Kernel
604	# alias for URI.parse.
605	#
606	# This method is introduced at 1.8.2.
607	def URI(uri_str) # :doc:
608	URI.parse(uri_str)
609	end
610	module_function :URI
611	end

Note: See TracBrowser for help on using the repository browser.

Download in other formats: