source: extensions/gsdl-video/trunk/installed/cmdline/lib/ruby/1.8/open-uri.rb@ 18425

Last change on this file since 18425 was 18425, checked in by davidb, 15 years ago

Video extension to Greenstone

File size: 20.8 KB
Line 
1require 'uri'
2require 'stringio'
3require 'time'
4
5module Kernel
6 private
7 alias open_uri_original_open open # :nodoc:
8
9 # makes possible to open various resources including URIs.
10 # If the first argument respond to `open' method,
11 # the method is called with the rest arguments.
12 #
13 # If the first argument is a string which begins with xxx://,
14 # it is parsed by URI.parse. If the parsed object respond to `open' method,
15 # the method is called with the rest arguments.
16 #
17 # Otherwise original open is called.
18 #
19 # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
20 # URI::FTP#open,
21 # Kernel[#.]open can accepts such URIs and strings which begins with
22 # http://, https:// and ftp://.
23 # In these case, the opened file object is extended by OpenURI::Meta.
24 def open(name, *rest, &block) # :doc:
25 if name.respond_to?(:open)
26 name.open(*rest, &block)
27 elsif name.respond_to?(:to_str) &&
28 %r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
29 (uri = URI.parse(name)).respond_to?(:open)
30 uri.open(*rest, &block)
31 else
32 open_uri_original_open(name, *rest, &block)
33 end
34 end
35 module_function :open
36end
37
38# OpenURI is an easy-to-use wrapper for net/http, net/https and net/ftp.
39#
40#== Example
41#
42# It is possible to open http/https/ftp URL as usual like opening a file:
43#
44# open("http://www.ruby-lang.org/") {|f|
45# f.each_line {|line| p line}
46# }
47#
48# The opened file has several methods for meta information as follows since
49# it is extended by OpenURI::Meta.
50#
51# open("http://www.ruby-lang.org/en") {|f|
52# f.each_line {|line| p line}
53# p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
54# p f.content_type # "text/html"
55# p f.charset # "iso-8859-1"
56# p f.content_encoding # []
57# p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
58# }
59#
60# Additional header fields can be specified by an optional hash argument.
61#
62# open("http://www.ruby-lang.org/en/",
63# "User-Agent" => "Ruby/#{RUBY_VERSION}",
64# "From" => "[email protected]",
65# "Referer" => "http://www.ruby-lang.org/") {|f|
66# # ...
67# }
68#
69# The environment variables such as http_proxy, https_proxy and ftp_proxy
70# are in effect by default. :proxy => nil disables proxy.
71#
72# open("http://www.ruby-lang.org/en/raa.html", :proxy => nil) {|f|
73# # ...
74# }
75#
76# URI objects can be opened in a similar way.
77#
78# uri = URI.parse("http://www.ruby-lang.org/en/")
79# uri.open {|f|
80# # ...
81# }
82#
83# URI objects can be read directly. The returned string is also extended by
84# OpenURI::Meta.
85#
86# str = uri.read
87# p str.base_uri
88#
89# Author:: Tanaka Akira <[email protected]>
90
91module OpenURI
92 Options = {
93 :proxy => true,
94 :progress_proc => true,
95 :content_length_proc => true,
96 :http_basic_authentication => true,
97 }
98
99 def OpenURI.check_options(options) # :nodoc:
100 options.each {|k, v|
101 next unless Symbol === k
102 unless Options.include? k
103 raise ArgumentError, "unrecognized option: #{k}"
104 end
105 }
106 end
107
108 def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
109 if !rest.empty? && (String === rest.first || Integer === rest.first)
110 mode = rest.shift
111 if !rest.empty? && Integer === rest.first
112 perm = rest.shift
113 end
114 end
115 return mode, perm, rest
116 end
117
118 def OpenURI.open_uri(name, *rest) # :nodoc:
119 uri = URI::Generic === name ? name : URI.parse(name)
120 mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest)
121 options = rest.shift if !rest.empty? && Hash === rest.first
122 raise ArgumentError.new("extra arguments") if !rest.empty?
123 options ||= {}
124 OpenURI.check_options(options)
125
126 unless mode == nil ||
127 mode == 'r' || mode == 'rb' ||
128 mode == File::RDONLY
129 raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
130 end
131
132 io = open_loop(uri, options)
133 if block_given?
134 begin
135 yield io
136 ensure
137 io.close
138 end
139 else
140 io
141 end
142 end
143
144 def OpenURI.open_loop(uri, options) # :nodoc:
145 case opt_proxy = options.fetch(:proxy, true)
146 when true
147 find_proxy = lambda {|u| u.find_proxy}
148 when nil, false
149 find_proxy = lambda {|u| nil}
150 when String
151 opt_proxy = URI.parse(opt_proxy)
152 find_proxy = lambda {|u| opt_proxy}
153 when URI::Generic
154 find_proxy = lambda {|u| opt_proxy}
155 else
156 raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
157 end
158
159 uri_set = {}
160 buf = nil
161 while true
162 redirect = catch(:open_uri_redirect) {
163 buf = Buffer.new
164 uri.buffer_open(buf, find_proxy.call(uri), options)
165 nil
166 }
167 if redirect
168 if redirect.relative?
169 # Although it violates RFC2616, Location: field may have relative
170 # URI. It is converted to absolute URI using uri as a base URI.
171 redirect = uri + redirect
172 end
173 unless OpenURI.redirectable?(uri, redirect)
174 raise "redirection forbidden: #{uri} -> #{redirect}"
175 end
176 if options.include? :http_basic_authentication
177 # send authentication only for the URI directly specified.
178 options = options.dup
179 options.delete :http_basic_authentication
180 end
181 uri = redirect
182 raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
183 uri_set[uri.to_s] = true
184 else
185 break
186 end
187 end
188 io = buf.io
189 io.base_uri = uri
190 io
191 end
192
193 def OpenURI.redirectable?(uri1, uri2) # :nodoc:
194 # This test is intended to forbid a redirection from http://... to
195 # file:///etc/passwd.
196 # However this is ad hoc. It should be extensible/configurable.
197 uri1.scheme.downcase == uri2.scheme.downcase ||
198 (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
199 end
200
201 def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
202 if proxy
203 raise "Non-HTTP proxy URI: #{proxy}" if proxy.class != URI::HTTP
204 end
205
206 if target.userinfo && "1.9.0" <= RUBY_VERSION
207 # don't raise for 1.8 because compatibility.
208 raise ArgumentError, "userinfo not supported. [RFC3986]"
209 end
210
211 require 'net/http'
212 klass = Net::HTTP
213 if URI::HTTP === target
214 # HTTP or HTTPS
215 if proxy
216 klass = Net::HTTP::Proxy(proxy.host, proxy.port)
217 end
218 target_host = target.host
219 target_port = target.port
220 request_uri = target.request_uri
221 else
222 # FTP over HTTP proxy
223 target_host = proxy.host
224 target_port = proxy.port
225 request_uri = target.to_s
226 end
227
228 http = klass.new(target_host, target_port)
229 if target.class == URI::HTTPS
230 require 'net/https'
231 http.use_ssl = true
232 http.verify_mode = OpenSSL::SSL::VERIFY_PEER
233 store = OpenSSL::X509::Store.new
234 store.set_default_paths
235 http.cert_store = store
236 end
237
238 header = {}
239 options.each {|k, v| header[k] = v if String === k }
240
241 resp = nil
242 http.start {
243 if target.class == URI::HTTPS
244 # xxx: information hiding violation
245 sock = http.instance_variable_get(:@socket)
246 if sock.respond_to?(:io)
247 sock = sock.io # 1.9
248 else
249 sock = sock.instance_variable_get(:@socket) # 1.8
250 end
251 sock.post_connection_check(target_host)
252 end
253 req = Net::HTTP::Get.new(request_uri, header)
254 if options.include? :http_basic_authentication
255 user, pass = options[:http_basic_authentication]
256 req.basic_auth user, pass
257 end
258 http.request(req) {|response|
259 resp = response
260 if options[:content_length_proc] && Net::HTTPSuccess === resp
261 if resp.key?('Content-Length')
262 options[:content_length_proc].call(resp['Content-Length'].to_i)
263 else
264 options[:content_length_proc].call(nil)
265 end
266 end
267 resp.read_body {|str|
268 buf << str
269 if options[:progress_proc] && Net::HTTPSuccess === resp
270 options[:progress_proc].call(buf.size)
271 end
272 }
273 }
274 }
275 io = buf.io
276 io.rewind
277 io.status = [resp.code, resp.message]
278 resp.each {|name,value| buf.io.meta_add_field name, value }
279 case resp
280 when Net::HTTPSuccess
281 when Net::HTTPMovedPermanently, # 301
282 Net::HTTPFound, # 302
283 Net::HTTPSeeOther, # 303
284 Net::HTTPTemporaryRedirect # 307
285 throw :open_uri_redirect, URI.parse(resp['location'])
286 else
287 raise OpenURI::HTTPError.new(io.status.join(' '), io)
288 end
289 end
290
291 class HTTPError < StandardError
292 def initialize(message, io)
293 super(message)
294 @io = io
295 end
296 attr_reader :io
297 end
298
299 class Buffer # :nodoc:
300 def initialize
301 @io = StringIO.new
302 @size = 0
303 end
304 attr_reader :size
305
306 StringMax = 10240
307 def <<(str)
308 @io << str
309 @size += str.length
310 if StringIO === @io && StringMax < @size
311 require 'tempfile'
312 io = Tempfile.new('open-uri')
313 io.binmode
314 Meta.init io, @io if @io.respond_to? :meta
315 io << @io.string
316 @io = io
317 end
318 end
319
320 def io
321 Meta.init @io unless @io.respond_to? :meta
322 @io
323 end
324 end
325
326 # Mixin for holding meta-information.
327 module Meta
328 def Meta.init(obj, src=nil) # :nodoc:
329 obj.extend Meta
330 obj.instance_eval {
331 @base_uri = nil
332 @meta = {}
333 }
334 if src
335 obj.status = src.status
336 obj.base_uri = src.base_uri
337 src.meta.each {|name, value|
338 obj.meta_add_field(name, value)
339 }
340 end
341 end
342
343 # returns an Array which consists status code and message.
344 attr_accessor :status
345
346 # returns a URI which is base of relative URIs in the data.
347 # It may differ from the URI supplied by a user because redirection.
348 attr_accessor :base_uri
349
350 # returns a Hash which represents header fields.
351 # The Hash keys are downcased for canonicalization.
352 attr_reader :meta
353
354 def meta_add_field(name, value) # :nodoc:
355 @meta[name.downcase] = value
356 end
357
358 # returns a Time which represents Last-Modified field.
359 def last_modified
360 if v = @meta['last-modified']
361 Time.httpdate(v)
362 else
363 nil
364 end
365 end
366
367 RE_LWS = /[\r\n\t ]+/n
368 RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
369 RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
370 RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
371
372 def content_type_parse # :nodoc:
373 v = @meta['content-type']
374 # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
375 if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ v
376 type = $1.downcase
377 subtype = $2.downcase
378 parameters = []
379 $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
380 val = qval.gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/) { $1 ? $1[1,1] : $& } if qval
381 parameters << [att.downcase, val]
382 }
383 ["#{type}/#{subtype}", *parameters]
384 else
385 nil
386 end
387 end
388
389 # returns "type/subtype" which is MIME Content-Type.
390 # It is downcased for canonicalization.
391 # Content-Type parameters are stripped.
392 def content_type
393 type, *parameters = content_type_parse
394 type || 'application/octet-stream'
395 end
396
397 # returns a charset parameter in Content-Type field.
398 # It is downcased for canonicalization.
399 #
400 # If charset parameter is not given but a block is given,
401 # the block is called and its result is returned.
402 # It can be used to guess charset.
403 #
404 # If charset parameter and block is not given,
405 # nil is returned except text type in HTTP.
406 # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1.
407 def charset
408 type, *parameters = content_type_parse
409 if pair = parameters.assoc('charset')
410 pair.last.downcase
411 elsif block_given?
412 yield
413 elsif type && %r{\Atext/} =~ type &&
414 @base_uri && /\Ahttp\z/i =~ @base_uri.scheme
415 "iso-8859-1" # RFC2616 3.7.1
416 else
417 nil
418 end
419 end
420
421 # returns a list of encodings in Content-Encoding field
422 # as an Array of String.
423 # The encodings are downcased for canonicalization.
424 def content_encoding
425 v = @meta['content-encoding']
426 if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v
427 v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
428 else
429 []
430 end
431 end
432 end
433
434 # Mixin for HTTP and FTP URIs.
435 module OpenRead
436 # OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
437 #
438 # OpenURI::OpenRead#open takes optional 3 arguments as:
439 # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
440 #
441 # `mode', `perm' is same as Kernel#open.
442 #
443 # However, `mode' must be read mode because OpenURI::OpenRead#open doesn't
444 # support write mode (yet).
445 # Also `perm' is just ignored because it is meaningful only for file
446 # creation.
447 #
448 # `options' must be a hash.
449 #
450 # Each pairs which key is a string in the hash specify a extra header
451 # field for HTTP.
452 # I.e. it is ignored for FTP without HTTP proxy.
453 #
454 # The hash may include other options which key is a symbol:
455 #
456 # [:proxy]
457 # Synopsis:
458 # :proxy => "http://proxy.foo.com:8000/"
459 # :proxy => URI.parse("http://proxy.foo.com:8000/")
460 # :proxy => true
461 # :proxy => false
462 # :proxy => nil
463 #
464 # If :proxy option is specified, the value should be String, URI,
465 # boolean or nil.
466 # When String or URI is given, it is treated as proxy URI.
467 # When true is given or the option itself is not specified,
468 # environment variable `scheme_proxy' is examined.
469 # `scheme' is replaced by `http', `https' or `ftp'.
470 # When false or nil is given, the environment variables are ignored and
471 # connection will be made to a server directly.
472 #
473 # [:http_basic_authentication]
474 # Synopsis:
475 # :http_basic_authentication=>[user, password]
476 #
477 # If :http_basic_authentication is specified,
478 # the value should be an array which contains 2 strings:
479 # username and password.
480 # It is used for HTTP Basic authentication defined by RFC 2617.
481 #
482 # [:content_length_proc]
483 # Synopsis:
484 # :content_length_proc => lambda {|content_length| ... }
485 #
486 # If :content_length_proc option is specified, the option value procedure
487 # is called before actual transfer is started.
488 # It takes one argument which is expected content length in bytes.
489 #
490 # If two or more transfer is done by HTTP redirection, the procedure
491 # is called only one for a last transfer.
492 #
493 # When expected content length is unknown, the procedure is called with
494 # nil.
495 # It is happen when HTTP response has no Content-Length header.
496 #
497 # [:progress_proc]
498 # Synopsis:
499 # :progress_proc => lambda {|size| ...}
500 #
501 # If :progress_proc option is specified, the proc is called with one
502 # argument each time when `open' gets content fragment from network.
503 # The argument `size' `size' is a accumulated transfered size in bytes.
504 #
505 # If two or more transfer is done by HTTP redirection, the procedure
506 # is called only one for a last transfer.
507 #
508 # :progress_proc and :content_length_proc are intended to be used for
509 # progress bar.
510 # For example, it can be implemented as follows using Ruby/ProgressBar.
511 #
512 # pbar = nil
513 # open("http://...",
514 # :content_length_proc => lambda {|t|
515 # if t && 0 < t
516 # pbar = ProgressBar.new("...", t)
517 # pbar.file_transfer_mode
518 # end
519 # },
520 # :progress_proc => lambda {|s|
521 # pbar.set s if pbar
522 # }) {|f| ... }
523 #
524 # OpenURI::OpenRead#open returns an IO like object if block is not given.
525 # Otherwise it yields the IO object and return the value of the block.
526 # The IO object is extended with OpenURI::Meta.
527 def open(*rest, &block)
528 OpenURI.open_uri(self, *rest, &block)
529 end
530
531 # OpenURI::OpenRead#read([options]) reads a content referenced by self and
532 # returns the content as string.
533 # The string is extended with OpenURI::Meta.
534 # The argument `options' is same as OpenURI::OpenRead#open.
535 def read(options={})
536 self.open(options) {|f|
537 str = f.read
538 Meta.init str, f
539 str
540 }
541 end
542 end
543end
544
545module URI
546 class Generic
547 # returns a proxy URI.
548 # The proxy URI is obtained from environment variables such as http_proxy,
549 # ftp_proxy, no_proxy, etc.
550 # If there is no proper proxy, nil is returned.
551 #
552 # Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.)
553 # are examined too.
554 #
555 # But http_proxy and HTTP_PROXY is treated specially under CGI environment.
556 # It's because HTTP_PROXY may be set by Proxy: header.
557 # So HTTP_PROXY is not used.
558 # http_proxy is not used too if the variable is case insensitive.
559 # CGI_HTTP_PROXY can be used instead.
560 def find_proxy
561 name = self.scheme.downcase + '_proxy'
562 proxy_uri = nil
563 if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI?
564 # HTTP_PROXY conflicts with *_proxy for proxy settings and
565 # HTTP_* for header information in CGI.
566 # So it should be careful to use it.
567 pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
568 case pairs.length
569 when 0 # no proxy setting anyway.
570 proxy_uri = nil
571 when 1
572 k, v = pairs.shift
573 if k == 'http_proxy' && ENV[k.upcase] == nil
574 # http_proxy is safe to use because ENV is case sensitive.
575 proxy_uri = ENV[name]
576 else
577 proxy_uri = nil
578 end
579 else # http_proxy is safe to use because ENV is case sensitive.
580 proxy_uri = ENV[name]
581 end
582 if !proxy_uri
583 # Use CGI_HTTP_PROXY. cf. libwww-perl.
584 proxy_uri = ENV["CGI_#{name.upcase}"]
585 end
586 elsif name == 'http_proxy'
587 unless proxy_uri = ENV[name]
588 if proxy_uri = ENV[name.upcase]
589 warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.'
590 end
591 end
592 else
593 proxy_uri = ENV[name] || ENV[name.upcase]
594 end
595
596 if proxy_uri && self.host
597 require 'socket'
598 begin
599 addr = IPSocket.getaddress(self.host)
600 proxy_uri = nil if /\A127\.|\A::1\z/ =~ addr
601 rescue SocketError
602 end
603 end
604
605 if proxy_uri
606 proxy_uri = URI.parse(proxy_uri)
607 name = 'no_proxy'
608 if no_proxy = ENV[name] || ENV[name.upcase]
609 no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port|
610 if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host &&
611 (!port || self.port == port.to_i)
612 proxy_uri = nil
613 break
614 end
615 }
616 end
617 proxy_uri
618 else
619 nil
620 end
621 end
622 end
623
624 class HTTP
625 def buffer_open(buf, proxy, options) # :nodoc:
626 OpenURI.open_http(buf, self, proxy, options)
627 end
628
629 include OpenURI::OpenRead
630 end
631
632 class FTP
633 def buffer_open(buf, proxy, options) # :nodoc:
634 if proxy
635 OpenURI.open_http(buf, self, proxy, options)
636 return
637 end
638 require 'net/ftp'
639
640 directories = self.path.split(%r{/}, -1)
641 directories.shift if directories[0] == '' # strip a field before leading slash
642 directories.each {|d|
643 d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
644 }
645 unless filename = directories.pop
646 raise ArgumentError, "no filename: #{self.inspect}"
647 end
648 directories.each {|d|
649 if /[\r\n]/ =~ d
650 raise ArgumentError, "invalid directory: #{d.inspect}"
651 end
652 }
653 if /[\r\n]/ =~ filename
654 raise ArgumentError, "invalid filename: #{filename.inspect}"
655 end
656 typecode = self.typecode
657 if typecode && /\A[aid]\z/ !~ typecode
658 raise ArgumentError, "invalid typecode: #{typecode.inspect}"
659 end
660
661 # The access sequence is defined by RFC 1738
662 ftp = Net::FTP.open(self.host)
663 # todo: extract user/passwd from .netrc.
664 user = 'anonymous'
665 passwd = nil
666 user, passwd = self.userinfo.split(/:/) if self.userinfo
667 ftp.login(user, passwd)
668 directories.each {|cwd|
669 ftp.voidcmd("CWD #{cwd}")
670 }
671 if typecode
672 # xxx: typecode D is not handled.
673 ftp.voidcmd("TYPE #{typecode.upcase}")
674 end
675 if options[:content_length_proc]
676 options[:content_length_proc].call(ftp.size(filename))
677 end
678 ftp.retrbinary("RETR #{filename}", 4096) { |str|
679 buf << str
680 options[:progress_proc].call(buf.size) if options[:progress_proc]
681 }
682 ftp.close
683 buf.io.rewind
684 end
685
686 include OpenURI::OpenRead
687 end
688end
Note: See TracBrowser for help on using the repository browser.