1 | require 'uri'
|
---|
2 | require 'stringio'
|
---|
3 | require 'time'
|
---|
4 |
|
---|
5 | module Kernel
|
---|
6 | private
|
---|
7 | alias open_uri_original_open open # :nodoc:
|
---|
8 |
|
---|
9 | # makes possible to open various resources including URIs.
|
---|
10 | # If the first argument respond to `open' method,
|
---|
11 | # the method is called with the rest arguments.
|
---|
12 | #
|
---|
13 | # If the first argument is a string which begins with xxx://,
|
---|
14 | # it is parsed by URI.parse. If the parsed object respond to `open' method,
|
---|
15 | # the method is called with the rest arguments.
|
---|
16 | #
|
---|
17 | # Otherwise original open is called.
|
---|
18 | #
|
---|
19 | # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
|
---|
20 | # URI::FTP#open,
|
---|
21 | # Kernel[#.]open can accepts such URIs and strings which begins with
|
---|
22 | # http://, https:// and ftp://.
|
---|
23 | # In these case, the opened file object is extended by OpenURI::Meta.
|
---|
24 | def open(name, *rest, &block) # :doc:
|
---|
25 | if name.respond_to?(:open)
|
---|
26 | name.open(*rest, &block)
|
---|
27 | elsif name.respond_to?(:to_str) &&
|
---|
28 | %r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
|
---|
29 | (uri = URI.parse(name)).respond_to?(:open)
|
---|
30 | uri.open(*rest, &block)
|
---|
31 | else
|
---|
32 | open_uri_original_open(name, *rest, &block)
|
---|
33 | end
|
---|
34 | end
|
---|
35 | module_function :open
|
---|
36 | end
|
---|
37 |
|
---|
38 | # OpenURI is an easy-to-use wrapper for net/http, net/https and net/ftp.
|
---|
39 | #
|
---|
40 | #== Example
|
---|
41 | #
|
---|
42 | # It is possible to open http/https/ftp URL as usual like opening a file:
|
---|
43 | #
|
---|
44 | # open("http://www.ruby-lang.org/") {|f|
|
---|
45 | # f.each_line {|line| p line}
|
---|
46 | # }
|
---|
47 | #
|
---|
48 | # The opened file has several methods for meta information as follows since
|
---|
49 | # it is extended by OpenURI::Meta.
|
---|
50 | #
|
---|
51 | # open("http://www.ruby-lang.org/en") {|f|
|
---|
52 | # f.each_line {|line| p line}
|
---|
53 | # p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
|
---|
54 | # p f.content_type # "text/html"
|
---|
55 | # p f.charset # "iso-8859-1"
|
---|
56 | # p f.content_encoding # []
|
---|
57 | # p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
|
---|
58 | # }
|
---|
59 | #
|
---|
60 | # Additional header fields can be specified by an optional hash argument.
|
---|
61 | #
|
---|
62 | # open("http://www.ruby-lang.org/en/",
|
---|
63 | # "User-Agent" => "Ruby/#{RUBY_VERSION}",
|
---|
64 | # "From" => "[email protected]",
|
---|
65 | # "Referer" => "http://www.ruby-lang.org/") {|f|
|
---|
66 | # # ...
|
---|
67 | # }
|
---|
68 | #
|
---|
69 | # The environment variables such as http_proxy, https_proxy and ftp_proxy
|
---|
70 | # are in effect by default. :proxy => nil disables proxy.
|
---|
71 | #
|
---|
72 | # open("http://www.ruby-lang.org/en/raa.html", :proxy => nil) {|f|
|
---|
73 | # # ...
|
---|
74 | # }
|
---|
75 | #
|
---|
76 | # URI objects can be opened in a similar way.
|
---|
77 | #
|
---|
78 | # uri = URI.parse("http://www.ruby-lang.org/en/")
|
---|
79 | # uri.open {|f|
|
---|
80 | # # ...
|
---|
81 | # }
|
---|
82 | #
|
---|
83 | # URI objects can be read directly. The returned string is also extended by
|
---|
84 | # OpenURI::Meta.
|
---|
85 | #
|
---|
86 | # str = uri.read
|
---|
87 | # p str.base_uri
|
---|
88 | #
|
---|
89 | # Author:: Tanaka Akira <[email protected]>
|
---|
90 |
|
---|
91 | module OpenURI
|
---|
92 | Options = {
|
---|
93 | :proxy => true,
|
---|
94 | :progress_proc => true,
|
---|
95 | :content_length_proc => true,
|
---|
96 | :http_basic_authentication => true,
|
---|
97 | }
|
---|
98 |
|
---|
99 | def OpenURI.check_options(options) # :nodoc:
|
---|
100 | options.each {|k, v|
|
---|
101 | next unless Symbol === k
|
---|
102 | unless Options.include? k
|
---|
103 | raise ArgumentError, "unrecognized option: #{k}"
|
---|
104 | end
|
---|
105 | }
|
---|
106 | end
|
---|
107 |
|
---|
108 | def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
|
---|
109 | if !rest.empty? && (String === rest.first || Integer === rest.first)
|
---|
110 | mode = rest.shift
|
---|
111 | if !rest.empty? && Integer === rest.first
|
---|
112 | perm = rest.shift
|
---|
113 | end
|
---|
114 | end
|
---|
115 | return mode, perm, rest
|
---|
116 | end
|
---|
117 |
|
---|
118 | def OpenURI.open_uri(name, *rest) # :nodoc:
|
---|
119 | uri = URI::Generic === name ? name : URI.parse(name)
|
---|
120 | mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest)
|
---|
121 | options = rest.shift if !rest.empty? && Hash === rest.first
|
---|
122 | raise ArgumentError.new("extra arguments") if !rest.empty?
|
---|
123 | options ||= {}
|
---|
124 | OpenURI.check_options(options)
|
---|
125 |
|
---|
126 | unless mode == nil ||
|
---|
127 | mode == 'r' || mode == 'rb' ||
|
---|
128 | mode == File::RDONLY
|
---|
129 | raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
|
---|
130 | end
|
---|
131 |
|
---|
132 | io = open_loop(uri, options)
|
---|
133 | if block_given?
|
---|
134 | begin
|
---|
135 | yield io
|
---|
136 | ensure
|
---|
137 | io.close
|
---|
138 | end
|
---|
139 | else
|
---|
140 | io
|
---|
141 | end
|
---|
142 | end
|
---|
143 |
|
---|
144 | def OpenURI.open_loop(uri, options) # :nodoc:
|
---|
145 | case opt_proxy = options.fetch(:proxy, true)
|
---|
146 | when true
|
---|
147 | find_proxy = lambda {|u| u.find_proxy}
|
---|
148 | when nil, false
|
---|
149 | find_proxy = lambda {|u| nil}
|
---|
150 | when String
|
---|
151 | opt_proxy = URI.parse(opt_proxy)
|
---|
152 | find_proxy = lambda {|u| opt_proxy}
|
---|
153 | when URI::Generic
|
---|
154 | find_proxy = lambda {|u| opt_proxy}
|
---|
155 | else
|
---|
156 | raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
|
---|
157 | end
|
---|
158 |
|
---|
159 | uri_set = {}
|
---|
160 | buf = nil
|
---|
161 | while true
|
---|
162 | redirect = catch(:open_uri_redirect) {
|
---|
163 | buf = Buffer.new
|
---|
164 | uri.buffer_open(buf, find_proxy.call(uri), options)
|
---|
165 | nil
|
---|
166 | }
|
---|
167 | if redirect
|
---|
168 | if redirect.relative?
|
---|
169 | # Although it violates RFC2616, Location: field may have relative
|
---|
170 | # URI. It is converted to absolute URI using uri as a base URI.
|
---|
171 | redirect = uri + redirect
|
---|
172 | end
|
---|
173 | unless OpenURI.redirectable?(uri, redirect)
|
---|
174 | raise "redirection forbidden: #{uri} -> #{redirect}"
|
---|
175 | end
|
---|
176 | if options.include? :http_basic_authentication
|
---|
177 | # send authentication only for the URI directly specified.
|
---|
178 | options = options.dup
|
---|
179 | options.delete :http_basic_authentication
|
---|
180 | end
|
---|
181 | uri = redirect
|
---|
182 | raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
|
---|
183 | uri_set[uri.to_s] = true
|
---|
184 | else
|
---|
185 | break
|
---|
186 | end
|
---|
187 | end
|
---|
188 | io = buf.io
|
---|
189 | io.base_uri = uri
|
---|
190 | io
|
---|
191 | end
|
---|
192 |
|
---|
193 | def OpenURI.redirectable?(uri1, uri2) # :nodoc:
|
---|
194 | # This test is intended to forbid a redirection from http://... to
|
---|
195 | # file:///etc/passwd.
|
---|
196 | # However this is ad hoc. It should be extensible/configurable.
|
---|
197 | uri1.scheme.downcase == uri2.scheme.downcase ||
|
---|
198 | (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
|
---|
199 | end
|
---|
200 |
|
---|
201 | def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
|
---|
202 | if proxy
|
---|
203 | raise "Non-HTTP proxy URI: #{proxy}" if proxy.class != URI::HTTP
|
---|
204 | end
|
---|
205 |
|
---|
206 | if target.userinfo && "1.9.0" <= RUBY_VERSION
|
---|
207 | # don't raise for 1.8 because compatibility.
|
---|
208 | raise ArgumentError, "userinfo not supported. [RFC3986]"
|
---|
209 | end
|
---|
210 |
|
---|
211 | require 'net/http'
|
---|
212 | klass = Net::HTTP
|
---|
213 | if URI::HTTP === target
|
---|
214 | # HTTP or HTTPS
|
---|
215 | if proxy
|
---|
216 | klass = Net::HTTP::Proxy(proxy.host, proxy.port)
|
---|
217 | end
|
---|
218 | target_host = target.host
|
---|
219 | target_port = target.port
|
---|
220 | request_uri = target.request_uri
|
---|
221 | else
|
---|
222 | # FTP over HTTP proxy
|
---|
223 | target_host = proxy.host
|
---|
224 | target_port = proxy.port
|
---|
225 | request_uri = target.to_s
|
---|
226 | end
|
---|
227 |
|
---|
228 | http = klass.new(target_host, target_port)
|
---|
229 | if target.class == URI::HTTPS
|
---|
230 | require 'net/https'
|
---|
231 | http.use_ssl = true
|
---|
232 | http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
---|
233 | store = OpenSSL::X509::Store.new
|
---|
234 | store.set_default_paths
|
---|
235 | http.cert_store = store
|
---|
236 | end
|
---|
237 |
|
---|
238 | header = {}
|
---|
239 | options.each {|k, v| header[k] = v if String === k }
|
---|
240 |
|
---|
241 | resp = nil
|
---|
242 | http.start {
|
---|
243 | if target.class == URI::HTTPS
|
---|
244 | # xxx: information hiding violation
|
---|
245 | sock = http.instance_variable_get(:@socket)
|
---|
246 | if sock.respond_to?(:io)
|
---|
247 | sock = sock.io # 1.9
|
---|
248 | else
|
---|
249 | sock = sock.instance_variable_get(:@socket) # 1.8
|
---|
250 | end
|
---|
251 | sock.post_connection_check(target_host)
|
---|
252 | end
|
---|
253 | req = Net::HTTP::Get.new(request_uri, header)
|
---|
254 | if options.include? :http_basic_authentication
|
---|
255 | user, pass = options[:http_basic_authentication]
|
---|
256 | req.basic_auth user, pass
|
---|
257 | end
|
---|
258 | http.request(req) {|response|
|
---|
259 | resp = response
|
---|
260 | if options[:content_length_proc] && Net::HTTPSuccess === resp
|
---|
261 | if resp.key?('Content-Length')
|
---|
262 | options[:content_length_proc].call(resp['Content-Length'].to_i)
|
---|
263 | else
|
---|
264 | options[:content_length_proc].call(nil)
|
---|
265 | end
|
---|
266 | end
|
---|
267 | resp.read_body {|str|
|
---|
268 | buf << str
|
---|
269 | if options[:progress_proc] && Net::HTTPSuccess === resp
|
---|
270 | options[:progress_proc].call(buf.size)
|
---|
271 | end
|
---|
272 | }
|
---|
273 | }
|
---|
274 | }
|
---|
275 | io = buf.io
|
---|
276 | io.rewind
|
---|
277 | io.status = [resp.code, resp.message]
|
---|
278 | resp.each {|name,value| buf.io.meta_add_field name, value }
|
---|
279 | case resp
|
---|
280 | when Net::HTTPSuccess
|
---|
281 | when Net::HTTPMovedPermanently, # 301
|
---|
282 | Net::HTTPFound, # 302
|
---|
283 | Net::HTTPSeeOther, # 303
|
---|
284 | Net::HTTPTemporaryRedirect # 307
|
---|
285 | throw :open_uri_redirect, URI.parse(resp['location'])
|
---|
286 | else
|
---|
287 | raise OpenURI::HTTPError.new(io.status.join(' '), io)
|
---|
288 | end
|
---|
289 | end
|
---|
290 |
|
---|
291 | class HTTPError < StandardError
|
---|
292 | def initialize(message, io)
|
---|
293 | super(message)
|
---|
294 | @io = io
|
---|
295 | end
|
---|
296 | attr_reader :io
|
---|
297 | end
|
---|
298 |
|
---|
299 | class Buffer # :nodoc:
|
---|
300 | def initialize
|
---|
301 | @io = StringIO.new
|
---|
302 | @size = 0
|
---|
303 | end
|
---|
304 | attr_reader :size
|
---|
305 |
|
---|
306 | StringMax = 10240
|
---|
307 | def <<(str)
|
---|
308 | @io << str
|
---|
309 | @size += str.length
|
---|
310 | if StringIO === @io && StringMax < @size
|
---|
311 | require 'tempfile'
|
---|
312 | io = Tempfile.new('open-uri')
|
---|
313 | io.binmode
|
---|
314 | Meta.init io, @io if @io.respond_to? :meta
|
---|
315 | io << @io.string
|
---|
316 | @io = io
|
---|
317 | end
|
---|
318 | end
|
---|
319 |
|
---|
320 | def io
|
---|
321 | Meta.init @io unless @io.respond_to? :meta
|
---|
322 | @io
|
---|
323 | end
|
---|
324 | end
|
---|
325 |
|
---|
326 | # Mixin for holding meta-information.
|
---|
327 | module Meta
|
---|
328 | def Meta.init(obj, src=nil) # :nodoc:
|
---|
329 | obj.extend Meta
|
---|
330 | obj.instance_eval {
|
---|
331 | @base_uri = nil
|
---|
332 | @meta = {}
|
---|
333 | }
|
---|
334 | if src
|
---|
335 | obj.status = src.status
|
---|
336 | obj.base_uri = src.base_uri
|
---|
337 | src.meta.each {|name, value|
|
---|
338 | obj.meta_add_field(name, value)
|
---|
339 | }
|
---|
340 | end
|
---|
341 | end
|
---|
342 |
|
---|
343 | # returns an Array which consists status code and message.
|
---|
344 | attr_accessor :status
|
---|
345 |
|
---|
346 | # returns a URI which is base of relative URIs in the data.
|
---|
347 | # It may differ from the URI supplied by a user because redirection.
|
---|
348 | attr_accessor :base_uri
|
---|
349 |
|
---|
350 | # returns a Hash which represents header fields.
|
---|
351 | # The Hash keys are downcased for canonicalization.
|
---|
352 | attr_reader :meta
|
---|
353 |
|
---|
354 | def meta_add_field(name, value) # :nodoc:
|
---|
355 | @meta[name.downcase] = value
|
---|
356 | end
|
---|
357 |
|
---|
358 | # returns a Time which represents Last-Modified field.
|
---|
359 | def last_modified
|
---|
360 | if v = @meta['last-modified']
|
---|
361 | Time.httpdate(v)
|
---|
362 | else
|
---|
363 | nil
|
---|
364 | end
|
---|
365 | end
|
---|
366 |
|
---|
367 | RE_LWS = /[\r\n\t ]+/n
|
---|
368 | RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
|
---|
369 | RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
|
---|
370 | RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
|
---|
371 |
|
---|
372 | def content_type_parse # :nodoc:
|
---|
373 | v = @meta['content-type']
|
---|
374 | # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
|
---|
375 | if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ v
|
---|
376 | type = $1.downcase
|
---|
377 | subtype = $2.downcase
|
---|
378 | parameters = []
|
---|
379 | $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
|
---|
380 | val = qval.gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/) { $1 ? $1[1,1] : $& } if qval
|
---|
381 | parameters << [att.downcase, val]
|
---|
382 | }
|
---|
383 | ["#{type}/#{subtype}", *parameters]
|
---|
384 | else
|
---|
385 | nil
|
---|
386 | end
|
---|
387 | end
|
---|
388 |
|
---|
389 | # returns "type/subtype" which is MIME Content-Type.
|
---|
390 | # It is downcased for canonicalization.
|
---|
391 | # Content-Type parameters are stripped.
|
---|
392 | def content_type
|
---|
393 | type, *parameters = content_type_parse
|
---|
394 | type || 'application/octet-stream'
|
---|
395 | end
|
---|
396 |
|
---|
397 | # returns a charset parameter in Content-Type field.
|
---|
398 | # It is downcased for canonicalization.
|
---|
399 | #
|
---|
400 | # If charset parameter is not given but a block is given,
|
---|
401 | # the block is called and its result is returned.
|
---|
402 | # It can be used to guess charset.
|
---|
403 | #
|
---|
404 | # If charset parameter and block is not given,
|
---|
405 | # nil is returned except text type in HTTP.
|
---|
406 | # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1.
|
---|
407 | def charset
|
---|
408 | type, *parameters = content_type_parse
|
---|
409 | if pair = parameters.assoc('charset')
|
---|
410 | pair.last.downcase
|
---|
411 | elsif block_given?
|
---|
412 | yield
|
---|
413 | elsif type && %r{\Atext/} =~ type &&
|
---|
414 | @base_uri && /\Ahttp\z/i =~ @base_uri.scheme
|
---|
415 | "iso-8859-1" # RFC2616 3.7.1
|
---|
416 | else
|
---|
417 | nil
|
---|
418 | end
|
---|
419 | end
|
---|
420 |
|
---|
421 | # returns a list of encodings in Content-Encoding field
|
---|
422 | # as an Array of String.
|
---|
423 | # The encodings are downcased for canonicalization.
|
---|
424 | def content_encoding
|
---|
425 | v = @meta['content-encoding']
|
---|
426 | if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v
|
---|
427 | v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
|
---|
428 | else
|
---|
429 | []
|
---|
430 | end
|
---|
431 | end
|
---|
432 | end
|
---|
433 |
|
---|
434 | # Mixin for HTTP and FTP URIs.
|
---|
435 | module OpenRead
|
---|
436 | # OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
|
---|
437 | #
|
---|
438 | # OpenURI::OpenRead#open takes optional 3 arguments as:
|
---|
439 | # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
|
---|
440 | #
|
---|
441 | # `mode', `perm' is same as Kernel#open.
|
---|
442 | #
|
---|
443 | # However, `mode' must be read mode because OpenURI::OpenRead#open doesn't
|
---|
444 | # support write mode (yet).
|
---|
445 | # Also `perm' is just ignored because it is meaningful only for file
|
---|
446 | # creation.
|
---|
447 | #
|
---|
448 | # `options' must be a hash.
|
---|
449 | #
|
---|
450 | # Each pairs which key is a string in the hash specify a extra header
|
---|
451 | # field for HTTP.
|
---|
452 | # I.e. it is ignored for FTP without HTTP proxy.
|
---|
453 | #
|
---|
454 | # The hash may include other options which key is a symbol:
|
---|
455 | #
|
---|
456 | # [:proxy]
|
---|
457 | # Synopsis:
|
---|
458 | # :proxy => "http://proxy.foo.com:8000/"
|
---|
459 | # :proxy => URI.parse("http://proxy.foo.com:8000/")
|
---|
460 | # :proxy => true
|
---|
461 | # :proxy => false
|
---|
462 | # :proxy => nil
|
---|
463 | #
|
---|
464 | # If :proxy option is specified, the value should be String, URI,
|
---|
465 | # boolean or nil.
|
---|
466 | # When String or URI is given, it is treated as proxy URI.
|
---|
467 | # When true is given or the option itself is not specified,
|
---|
468 | # environment variable `scheme_proxy' is examined.
|
---|
469 | # `scheme' is replaced by `http', `https' or `ftp'.
|
---|
470 | # When false or nil is given, the environment variables are ignored and
|
---|
471 | # connection will be made to a server directly.
|
---|
472 | #
|
---|
473 | # [:http_basic_authentication]
|
---|
474 | # Synopsis:
|
---|
475 | # :http_basic_authentication=>[user, password]
|
---|
476 | #
|
---|
477 | # If :http_basic_authentication is specified,
|
---|
478 | # the value should be an array which contains 2 strings:
|
---|
479 | # username and password.
|
---|
480 | # It is used for HTTP Basic authentication defined by RFC 2617.
|
---|
481 | #
|
---|
482 | # [:content_length_proc]
|
---|
483 | # Synopsis:
|
---|
484 | # :content_length_proc => lambda {|content_length| ... }
|
---|
485 | #
|
---|
486 | # If :content_length_proc option is specified, the option value procedure
|
---|
487 | # is called before actual transfer is started.
|
---|
488 | # It takes one argument which is expected content length in bytes.
|
---|
489 | #
|
---|
490 | # If two or more transfer is done by HTTP redirection, the procedure
|
---|
491 | # is called only one for a last transfer.
|
---|
492 | #
|
---|
493 | # When expected content length is unknown, the procedure is called with
|
---|
494 | # nil.
|
---|
495 | # It is happen when HTTP response has no Content-Length header.
|
---|
496 | #
|
---|
497 | # [:progress_proc]
|
---|
498 | # Synopsis:
|
---|
499 | # :progress_proc => lambda {|size| ...}
|
---|
500 | #
|
---|
501 | # If :progress_proc option is specified, the proc is called with one
|
---|
502 | # argument each time when `open' gets content fragment from network.
|
---|
503 | # The argument `size' `size' is a accumulated transfered size in bytes.
|
---|
504 | #
|
---|
505 | # If two or more transfer is done by HTTP redirection, the procedure
|
---|
506 | # is called only one for a last transfer.
|
---|
507 | #
|
---|
508 | # :progress_proc and :content_length_proc are intended to be used for
|
---|
509 | # progress bar.
|
---|
510 | # For example, it can be implemented as follows using Ruby/ProgressBar.
|
---|
511 | #
|
---|
512 | # pbar = nil
|
---|
513 | # open("http://...",
|
---|
514 | # :content_length_proc => lambda {|t|
|
---|
515 | # if t && 0 < t
|
---|
516 | # pbar = ProgressBar.new("...", t)
|
---|
517 | # pbar.file_transfer_mode
|
---|
518 | # end
|
---|
519 | # },
|
---|
520 | # :progress_proc => lambda {|s|
|
---|
521 | # pbar.set s if pbar
|
---|
522 | # }) {|f| ... }
|
---|
523 | #
|
---|
524 | # OpenURI::OpenRead#open returns an IO like object if block is not given.
|
---|
525 | # Otherwise it yields the IO object and return the value of the block.
|
---|
526 | # The IO object is extended with OpenURI::Meta.
|
---|
527 | def open(*rest, &block)
|
---|
528 | OpenURI.open_uri(self, *rest, &block)
|
---|
529 | end
|
---|
530 |
|
---|
531 | # OpenURI::OpenRead#read([options]) reads a content referenced by self and
|
---|
532 | # returns the content as string.
|
---|
533 | # The string is extended with OpenURI::Meta.
|
---|
534 | # The argument `options' is same as OpenURI::OpenRead#open.
|
---|
535 | def read(options={})
|
---|
536 | self.open(options) {|f|
|
---|
537 | str = f.read
|
---|
538 | Meta.init str, f
|
---|
539 | str
|
---|
540 | }
|
---|
541 | end
|
---|
542 | end
|
---|
543 | end
|
---|
544 |
|
---|
545 | module URI
|
---|
546 | class Generic
|
---|
547 | # returns a proxy URI.
|
---|
548 | # The proxy URI is obtained from environment variables such as http_proxy,
|
---|
549 | # ftp_proxy, no_proxy, etc.
|
---|
550 | # If there is no proper proxy, nil is returned.
|
---|
551 | #
|
---|
552 | # Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.)
|
---|
553 | # are examined too.
|
---|
554 | #
|
---|
555 | # But http_proxy and HTTP_PROXY is treated specially under CGI environment.
|
---|
556 | # It's because HTTP_PROXY may be set by Proxy: header.
|
---|
557 | # So HTTP_PROXY is not used.
|
---|
558 | # http_proxy is not used too if the variable is case insensitive.
|
---|
559 | # CGI_HTTP_PROXY can be used instead.
|
---|
560 | def find_proxy
|
---|
561 | name = self.scheme.downcase + '_proxy'
|
---|
562 | proxy_uri = nil
|
---|
563 | if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI?
|
---|
564 | # HTTP_PROXY conflicts with *_proxy for proxy settings and
|
---|
565 | # HTTP_* for header information in CGI.
|
---|
566 | # So it should be careful to use it.
|
---|
567 | pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
|
---|
568 | case pairs.length
|
---|
569 | when 0 # no proxy setting anyway.
|
---|
570 | proxy_uri = nil
|
---|
571 | when 1
|
---|
572 | k, v = pairs.shift
|
---|
573 | if k == 'http_proxy' && ENV[k.upcase] == nil
|
---|
574 | # http_proxy is safe to use because ENV is case sensitive.
|
---|
575 | proxy_uri = ENV[name]
|
---|
576 | else
|
---|
577 | proxy_uri = nil
|
---|
578 | end
|
---|
579 | else # http_proxy is safe to use because ENV is case sensitive.
|
---|
580 | proxy_uri = ENV[name]
|
---|
581 | end
|
---|
582 | if !proxy_uri
|
---|
583 | # Use CGI_HTTP_PROXY. cf. libwww-perl.
|
---|
584 | proxy_uri = ENV["CGI_#{name.upcase}"]
|
---|
585 | end
|
---|
586 | elsif name == 'http_proxy'
|
---|
587 | unless proxy_uri = ENV[name]
|
---|
588 | if proxy_uri = ENV[name.upcase]
|
---|
589 | warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.'
|
---|
590 | end
|
---|
591 | end
|
---|
592 | else
|
---|
593 | proxy_uri = ENV[name] || ENV[name.upcase]
|
---|
594 | end
|
---|
595 |
|
---|
596 | if proxy_uri && self.host
|
---|
597 | require 'socket'
|
---|
598 | begin
|
---|
599 | addr = IPSocket.getaddress(self.host)
|
---|
600 | proxy_uri = nil if /\A127\.|\A::1\z/ =~ addr
|
---|
601 | rescue SocketError
|
---|
602 | end
|
---|
603 | end
|
---|
604 |
|
---|
605 | if proxy_uri
|
---|
606 | proxy_uri = URI.parse(proxy_uri)
|
---|
607 | name = 'no_proxy'
|
---|
608 | if no_proxy = ENV[name] || ENV[name.upcase]
|
---|
609 | no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port|
|
---|
610 | if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host &&
|
---|
611 | (!port || self.port == port.to_i)
|
---|
612 | proxy_uri = nil
|
---|
613 | break
|
---|
614 | end
|
---|
615 | }
|
---|
616 | end
|
---|
617 | proxy_uri
|
---|
618 | else
|
---|
619 | nil
|
---|
620 | end
|
---|
621 | end
|
---|
622 | end
|
---|
623 |
|
---|
624 | class HTTP
|
---|
625 | def buffer_open(buf, proxy, options) # :nodoc:
|
---|
626 | OpenURI.open_http(buf, self, proxy, options)
|
---|
627 | end
|
---|
628 |
|
---|
629 | include OpenURI::OpenRead
|
---|
630 | end
|
---|
631 |
|
---|
632 | class FTP
|
---|
633 | def buffer_open(buf, proxy, options) # :nodoc:
|
---|
634 | if proxy
|
---|
635 | OpenURI.open_http(buf, self, proxy, options)
|
---|
636 | return
|
---|
637 | end
|
---|
638 | require 'net/ftp'
|
---|
639 |
|
---|
640 | directories = self.path.split(%r{/}, -1)
|
---|
641 | directories.shift if directories[0] == '' # strip a field before leading slash
|
---|
642 | directories.each {|d|
|
---|
643 | d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
|
---|
644 | }
|
---|
645 | unless filename = directories.pop
|
---|
646 | raise ArgumentError, "no filename: #{self.inspect}"
|
---|
647 | end
|
---|
648 | directories.each {|d|
|
---|
649 | if /[\r\n]/ =~ d
|
---|
650 | raise ArgumentError, "invalid directory: #{d.inspect}"
|
---|
651 | end
|
---|
652 | }
|
---|
653 | if /[\r\n]/ =~ filename
|
---|
654 | raise ArgumentError, "invalid filename: #{filename.inspect}"
|
---|
655 | end
|
---|
656 | typecode = self.typecode
|
---|
657 | if typecode && /\A[aid]\z/ !~ typecode
|
---|
658 | raise ArgumentError, "invalid typecode: #{typecode.inspect}"
|
---|
659 | end
|
---|
660 |
|
---|
661 | # The access sequence is defined by RFC 1738
|
---|
662 | ftp = Net::FTP.open(self.host)
|
---|
663 | # todo: extract user/passwd from .netrc.
|
---|
664 | user = 'anonymous'
|
---|
665 | passwd = nil
|
---|
666 | user, passwd = self.userinfo.split(/:/) if self.userinfo
|
---|
667 | ftp.login(user, passwd)
|
---|
668 | directories.each {|cwd|
|
---|
669 | ftp.voidcmd("CWD #{cwd}")
|
---|
670 | }
|
---|
671 | if typecode
|
---|
672 | # xxx: typecode D is not handled.
|
---|
673 | ftp.voidcmd("TYPE #{typecode.upcase}")
|
---|
674 | end
|
---|
675 | if options[:content_length_proc]
|
---|
676 | options[:content_length_proc].call(ftp.size(filename))
|
---|
677 | end
|
---|
678 | ftp.retrbinary("RETR #{filename}", 4096) { |str|
|
---|
679 | buf << str
|
---|
680 | options[:progress_proc].call(buf.size) if options[:progress_proc]
|
---|
681 | }
|
---|
682 | ftp.close
|
---|
683 | buf.io.rewind
|
---|
684 | end
|
---|
685 |
|
---|
686 | include OpenURI::OpenRead
|
---|
687 | end
|
---|
688 | end
|
---|