[18425] | 1 | require 'uri'
|
---|
| 2 | require 'stringio'
|
---|
| 3 | require 'time'
|
---|
| 4 |
|
---|
| 5 | module Kernel
|
---|
| 6 | private
|
---|
| 7 | alias open_uri_original_open open # :nodoc:
|
---|
| 8 |
|
---|
| 9 | # makes possible to open various resources including URIs.
|
---|
| 10 | # If the first argument respond to `open' method,
|
---|
| 11 | # the method is called with the rest arguments.
|
---|
| 12 | #
|
---|
| 13 | # If the first argument is a string which begins with xxx://,
|
---|
| 14 | # it is parsed by URI.parse. If the parsed object respond to `open' method,
|
---|
| 15 | # the method is called with the rest arguments.
|
---|
| 16 | #
|
---|
| 17 | # Otherwise original open is called.
|
---|
| 18 | #
|
---|
| 19 | # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
|
---|
| 20 | # URI::FTP#open,
|
---|
| 21 | # Kernel[#.]open can accepts such URIs and strings which begins with
|
---|
| 22 | # http://, https:// and ftp://.
|
---|
| 23 | # In these case, the opened file object is extended by OpenURI::Meta.
|
---|
| 24 | def open(name, *rest, &block) # :doc:
|
---|
| 25 | if name.respond_to?(:open)
|
---|
| 26 | name.open(*rest, &block)
|
---|
| 27 | elsif name.respond_to?(:to_str) &&
|
---|
| 28 | %r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
|
---|
| 29 | (uri = URI.parse(name)).respond_to?(:open)
|
---|
| 30 | uri.open(*rest, &block)
|
---|
| 31 | else
|
---|
| 32 | open_uri_original_open(name, *rest, &block)
|
---|
| 33 | end
|
---|
| 34 | end
|
---|
| 35 | module_function :open
|
---|
| 36 | end
|
---|
| 37 |
|
---|
| 38 | # OpenURI is an easy-to-use wrapper for net/http, net/https and net/ftp.
|
---|
| 39 | #
|
---|
| 40 | #== Example
|
---|
| 41 | #
|
---|
| 42 | # It is possible to open http/https/ftp URL as usual like opening a file:
|
---|
| 43 | #
|
---|
| 44 | # open("http://www.ruby-lang.org/") {|f|
|
---|
| 45 | # f.each_line {|line| p line}
|
---|
| 46 | # }
|
---|
| 47 | #
|
---|
| 48 | # The opened file has several methods for meta information as follows since
|
---|
| 49 | # it is extended by OpenURI::Meta.
|
---|
| 50 | #
|
---|
| 51 | # open("http://www.ruby-lang.org/en") {|f|
|
---|
| 52 | # f.each_line {|line| p line}
|
---|
| 53 | # p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
|
---|
| 54 | # p f.content_type # "text/html"
|
---|
| 55 | # p f.charset # "iso-8859-1"
|
---|
| 56 | # p f.content_encoding # []
|
---|
| 57 | # p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
|
---|
| 58 | # }
|
---|
| 59 | #
|
---|
| 60 | # Additional header fields can be specified by an optional hash argument.
|
---|
| 61 | #
|
---|
| 62 | # open("http://www.ruby-lang.org/en/",
|
---|
| 63 | # "User-Agent" => "Ruby/#{RUBY_VERSION}",
|
---|
| 64 | # "From" => "[email protected]",
|
---|
| 65 | # "Referer" => "http://www.ruby-lang.org/") {|f|
|
---|
| 66 | # # ...
|
---|
| 67 | # }
|
---|
| 68 | #
|
---|
| 69 | # The environment variables such as http_proxy, https_proxy and ftp_proxy
|
---|
| 70 | # are in effect by default. :proxy => nil disables proxy.
|
---|
| 71 | #
|
---|
| 72 | # open("http://www.ruby-lang.org/en/raa.html", :proxy => nil) {|f|
|
---|
| 73 | # # ...
|
---|
| 74 | # }
|
---|
| 75 | #
|
---|
| 76 | # URI objects can be opened in a similar way.
|
---|
| 77 | #
|
---|
| 78 | # uri = URI.parse("http://www.ruby-lang.org/en/")
|
---|
| 79 | # uri.open {|f|
|
---|
| 80 | # # ...
|
---|
| 81 | # }
|
---|
| 82 | #
|
---|
| 83 | # URI objects can be read directly. The returned string is also extended by
|
---|
| 84 | # OpenURI::Meta.
|
---|
| 85 | #
|
---|
| 86 | # str = uri.read
|
---|
| 87 | # p str.base_uri
|
---|
| 88 | #
|
---|
| 89 | # Author:: Tanaka Akira <[email protected]>
|
---|
| 90 |
|
---|
| 91 | module OpenURI
|
---|
| 92 | Options = {
|
---|
| 93 | :proxy => true,
|
---|
| 94 | :progress_proc => true,
|
---|
| 95 | :content_length_proc => true,
|
---|
| 96 | :http_basic_authentication => true,
|
---|
| 97 | }
|
---|
| 98 |
|
---|
| 99 | def OpenURI.check_options(options) # :nodoc:
|
---|
| 100 | options.each {|k, v|
|
---|
| 101 | next unless Symbol === k
|
---|
| 102 | unless Options.include? k
|
---|
| 103 | raise ArgumentError, "unrecognized option: #{k}"
|
---|
| 104 | end
|
---|
| 105 | }
|
---|
| 106 | end
|
---|
| 107 |
|
---|
| 108 | def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
|
---|
| 109 | if !rest.empty? && (String === rest.first || Integer === rest.first)
|
---|
| 110 | mode = rest.shift
|
---|
| 111 | if !rest.empty? && Integer === rest.first
|
---|
| 112 | perm = rest.shift
|
---|
| 113 | end
|
---|
| 114 | end
|
---|
| 115 | return mode, perm, rest
|
---|
| 116 | end
|
---|
| 117 |
|
---|
| 118 | def OpenURI.open_uri(name, *rest) # :nodoc:
|
---|
| 119 | uri = URI::Generic === name ? name : URI.parse(name)
|
---|
| 120 | mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest)
|
---|
| 121 | options = rest.shift if !rest.empty? && Hash === rest.first
|
---|
| 122 | raise ArgumentError.new("extra arguments") if !rest.empty?
|
---|
| 123 | options ||= {}
|
---|
| 124 | OpenURI.check_options(options)
|
---|
| 125 |
|
---|
| 126 | unless mode == nil ||
|
---|
| 127 | mode == 'r' || mode == 'rb' ||
|
---|
| 128 | mode == File::RDONLY
|
---|
| 129 | raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
|
---|
| 130 | end
|
---|
| 131 |
|
---|
| 132 | io = open_loop(uri, options)
|
---|
| 133 | if block_given?
|
---|
| 134 | begin
|
---|
| 135 | yield io
|
---|
| 136 | ensure
|
---|
| 137 | io.close
|
---|
| 138 | end
|
---|
| 139 | else
|
---|
| 140 | io
|
---|
| 141 | end
|
---|
| 142 | end
|
---|
| 143 |
|
---|
| 144 | def OpenURI.open_loop(uri, options) # :nodoc:
|
---|
| 145 | case opt_proxy = options.fetch(:proxy, true)
|
---|
| 146 | when true
|
---|
| 147 | find_proxy = lambda {|u| u.find_proxy}
|
---|
| 148 | when nil, false
|
---|
| 149 | find_proxy = lambda {|u| nil}
|
---|
| 150 | when String
|
---|
| 151 | opt_proxy = URI.parse(opt_proxy)
|
---|
| 152 | find_proxy = lambda {|u| opt_proxy}
|
---|
| 153 | when URI::Generic
|
---|
| 154 | find_proxy = lambda {|u| opt_proxy}
|
---|
| 155 | else
|
---|
| 156 | raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
|
---|
| 157 | end
|
---|
| 158 |
|
---|
| 159 | uri_set = {}
|
---|
| 160 | buf = nil
|
---|
| 161 | while true
|
---|
| 162 | redirect = catch(:open_uri_redirect) {
|
---|
| 163 | buf = Buffer.new
|
---|
| 164 | uri.buffer_open(buf, find_proxy.call(uri), options)
|
---|
| 165 | nil
|
---|
| 166 | }
|
---|
| 167 | if redirect
|
---|
| 168 | if redirect.relative?
|
---|
| 169 | # Although it violates RFC2616, Location: field may have relative
|
---|
| 170 | # URI. It is converted to absolute URI using uri as a base URI.
|
---|
| 171 | redirect = uri + redirect
|
---|
| 172 | end
|
---|
| 173 | unless OpenURI.redirectable?(uri, redirect)
|
---|
| 174 | raise "redirection forbidden: #{uri} -> #{redirect}"
|
---|
| 175 | end
|
---|
| 176 | if options.include? :http_basic_authentication
|
---|
| 177 | # send authentication only for the URI directly specified.
|
---|
| 178 | options = options.dup
|
---|
| 179 | options.delete :http_basic_authentication
|
---|
| 180 | end
|
---|
| 181 | uri = redirect
|
---|
| 182 | raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
|
---|
| 183 | uri_set[uri.to_s] = true
|
---|
| 184 | else
|
---|
| 185 | break
|
---|
| 186 | end
|
---|
| 187 | end
|
---|
| 188 | io = buf.io
|
---|
| 189 | io.base_uri = uri
|
---|
| 190 | io
|
---|
| 191 | end
|
---|
| 192 |
|
---|
| 193 | def OpenURI.redirectable?(uri1, uri2) # :nodoc:
|
---|
| 194 | # This test is intended to forbid a redirection from http://... to
|
---|
| 195 | # file:///etc/passwd.
|
---|
| 196 | # However this is ad hoc. It should be extensible/configurable.
|
---|
| 197 | uri1.scheme.downcase == uri2.scheme.downcase ||
|
---|
| 198 | (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
|
---|
| 199 | end
|
---|
| 200 |
|
---|
| 201 | def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
|
---|
| 202 | if proxy
|
---|
| 203 | raise "Non-HTTP proxy URI: #{proxy}" if proxy.class != URI::HTTP
|
---|
| 204 | end
|
---|
| 205 |
|
---|
| 206 | if target.userinfo && "1.9.0" <= RUBY_VERSION
|
---|
| 207 | # don't raise for 1.8 because compatibility.
|
---|
| 208 | raise ArgumentError, "userinfo not supported. [RFC3986]"
|
---|
| 209 | end
|
---|
| 210 |
|
---|
| 211 | require 'net/http'
|
---|
| 212 | klass = Net::HTTP
|
---|
| 213 | if URI::HTTP === target
|
---|
| 214 | # HTTP or HTTPS
|
---|
| 215 | if proxy
|
---|
| 216 | klass = Net::HTTP::Proxy(proxy.host, proxy.port)
|
---|
| 217 | end
|
---|
| 218 | target_host = target.host
|
---|
| 219 | target_port = target.port
|
---|
| 220 | request_uri = target.request_uri
|
---|
| 221 | else
|
---|
| 222 | # FTP over HTTP proxy
|
---|
| 223 | target_host = proxy.host
|
---|
| 224 | target_port = proxy.port
|
---|
| 225 | request_uri = target.to_s
|
---|
| 226 | end
|
---|
| 227 |
|
---|
| 228 | http = klass.new(target_host, target_port)
|
---|
| 229 | if target.class == URI::HTTPS
|
---|
| 230 | require 'net/https'
|
---|
| 231 | http.use_ssl = true
|
---|
| 232 | http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
---|
| 233 | store = OpenSSL::X509::Store.new
|
---|
| 234 | store.set_default_paths
|
---|
| 235 | http.cert_store = store
|
---|
| 236 | end
|
---|
| 237 |
|
---|
| 238 | header = {}
|
---|
| 239 | options.each {|k, v| header[k] = v if String === k }
|
---|
| 240 |
|
---|
| 241 | resp = nil
|
---|
| 242 | http.start {
|
---|
| 243 | if target.class == URI::HTTPS
|
---|
| 244 | # xxx: information hiding violation
|
---|
| 245 | sock = http.instance_variable_get(:@socket)
|
---|
| 246 | if sock.respond_to?(:io)
|
---|
| 247 | sock = sock.io # 1.9
|
---|
| 248 | else
|
---|
| 249 | sock = sock.instance_variable_get(:@socket) # 1.8
|
---|
| 250 | end
|
---|
| 251 | sock.post_connection_check(target_host)
|
---|
| 252 | end
|
---|
| 253 | req = Net::HTTP::Get.new(request_uri, header)
|
---|
| 254 | if options.include? :http_basic_authentication
|
---|
| 255 | user, pass = options[:http_basic_authentication]
|
---|
| 256 | req.basic_auth user, pass
|
---|
| 257 | end
|
---|
| 258 | http.request(req) {|response|
|
---|
| 259 | resp = response
|
---|
| 260 | if options[:content_length_proc] && Net::HTTPSuccess === resp
|
---|
| 261 | if resp.key?('Content-Length')
|
---|
| 262 | options[:content_length_proc].call(resp['Content-Length'].to_i)
|
---|
| 263 | else
|
---|
| 264 | options[:content_length_proc].call(nil)
|
---|
| 265 | end
|
---|
| 266 | end
|
---|
| 267 | resp.read_body {|str|
|
---|
| 268 | buf << str
|
---|
| 269 | if options[:progress_proc] && Net::HTTPSuccess === resp
|
---|
| 270 | options[:progress_proc].call(buf.size)
|
---|
| 271 | end
|
---|
| 272 | }
|
---|
| 273 | }
|
---|
| 274 | }
|
---|
| 275 | io = buf.io
|
---|
| 276 | io.rewind
|
---|
| 277 | io.status = [resp.code, resp.message]
|
---|
| 278 | resp.each {|name,value| buf.io.meta_add_field name, value }
|
---|
| 279 | case resp
|
---|
| 280 | when Net::HTTPSuccess
|
---|
| 281 | when Net::HTTPMovedPermanently, # 301
|
---|
| 282 | Net::HTTPFound, # 302
|
---|
| 283 | Net::HTTPSeeOther, # 303
|
---|
| 284 | Net::HTTPTemporaryRedirect # 307
|
---|
| 285 | throw :open_uri_redirect, URI.parse(resp['location'])
|
---|
| 286 | else
|
---|
| 287 | raise OpenURI::HTTPError.new(io.status.join(' '), io)
|
---|
| 288 | end
|
---|
| 289 | end
|
---|
| 290 |
|
---|
| 291 | class HTTPError < StandardError
|
---|
| 292 | def initialize(message, io)
|
---|
| 293 | super(message)
|
---|
| 294 | @io = io
|
---|
| 295 | end
|
---|
| 296 | attr_reader :io
|
---|
| 297 | end
|
---|
| 298 |
|
---|
| 299 | class Buffer # :nodoc:
|
---|
| 300 | def initialize
|
---|
| 301 | @io = StringIO.new
|
---|
| 302 | @size = 0
|
---|
| 303 | end
|
---|
| 304 | attr_reader :size
|
---|
| 305 |
|
---|
| 306 | StringMax = 10240
|
---|
| 307 | def <<(str)
|
---|
| 308 | @io << str
|
---|
| 309 | @size += str.length
|
---|
| 310 | if StringIO === @io && StringMax < @size
|
---|
| 311 | require 'tempfile'
|
---|
| 312 | io = Tempfile.new('open-uri')
|
---|
| 313 | io.binmode
|
---|
| 314 | Meta.init io, @io if @io.respond_to? :meta
|
---|
| 315 | io << @io.string
|
---|
| 316 | @io = io
|
---|
| 317 | end
|
---|
| 318 | end
|
---|
| 319 |
|
---|
| 320 | def io
|
---|
| 321 | Meta.init @io unless @io.respond_to? :meta
|
---|
| 322 | @io
|
---|
| 323 | end
|
---|
| 324 | end
|
---|
| 325 |
|
---|
| 326 | # Mixin for holding meta-information.
|
---|
| 327 | module Meta
|
---|
| 328 | def Meta.init(obj, src=nil) # :nodoc:
|
---|
| 329 | obj.extend Meta
|
---|
| 330 | obj.instance_eval {
|
---|
| 331 | @base_uri = nil
|
---|
| 332 | @meta = {}
|
---|
| 333 | }
|
---|
| 334 | if src
|
---|
| 335 | obj.status = src.status
|
---|
| 336 | obj.base_uri = src.base_uri
|
---|
| 337 | src.meta.each {|name, value|
|
---|
| 338 | obj.meta_add_field(name, value)
|
---|
| 339 | }
|
---|
| 340 | end
|
---|
| 341 | end
|
---|
| 342 |
|
---|
| 343 | # returns an Array which consists status code and message.
|
---|
| 344 | attr_accessor :status
|
---|
| 345 |
|
---|
| 346 | # returns a URI which is base of relative URIs in the data.
|
---|
| 347 | # It may differ from the URI supplied by a user because redirection.
|
---|
| 348 | attr_accessor :base_uri
|
---|
| 349 |
|
---|
| 350 | # returns a Hash which represents header fields.
|
---|
| 351 | # The Hash keys are downcased for canonicalization.
|
---|
| 352 | attr_reader :meta
|
---|
| 353 |
|
---|
| 354 | def meta_add_field(name, value) # :nodoc:
|
---|
| 355 | @meta[name.downcase] = value
|
---|
| 356 | end
|
---|
| 357 |
|
---|
| 358 | # returns a Time which represents Last-Modified field.
|
---|
| 359 | def last_modified
|
---|
| 360 | if v = @meta['last-modified']
|
---|
| 361 | Time.httpdate(v)
|
---|
| 362 | else
|
---|
| 363 | nil
|
---|
| 364 | end
|
---|
| 365 | end
|
---|
| 366 |
|
---|
| 367 | RE_LWS = /[\r\n\t ]+/n
|
---|
| 368 | RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
|
---|
| 369 | RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
|
---|
| 370 | RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
|
---|
| 371 |
|
---|
| 372 | def content_type_parse # :nodoc:
|
---|
| 373 | v = @meta['content-type']
|
---|
| 374 | # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
|
---|
| 375 | if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ v
|
---|
| 376 | type = $1.downcase
|
---|
| 377 | subtype = $2.downcase
|
---|
| 378 | parameters = []
|
---|
| 379 | $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
|
---|
| 380 | val = qval.gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/) { $1 ? $1[1,1] : $& } if qval
|
---|
| 381 | parameters << [att.downcase, val]
|
---|
| 382 | }
|
---|
| 383 | ["#{type}/#{subtype}", *parameters]
|
---|
| 384 | else
|
---|
| 385 | nil
|
---|
| 386 | end
|
---|
| 387 | end
|
---|
| 388 |
|
---|
| 389 | # returns "type/subtype" which is MIME Content-Type.
|
---|
| 390 | # It is downcased for canonicalization.
|
---|
| 391 | # Content-Type parameters are stripped.
|
---|
| 392 | def content_type
|
---|
| 393 | type, *parameters = content_type_parse
|
---|
| 394 | type || 'application/octet-stream'
|
---|
| 395 | end
|
---|
| 396 |
|
---|
| 397 | # returns a charset parameter in Content-Type field.
|
---|
| 398 | # It is downcased for canonicalization.
|
---|
| 399 | #
|
---|
| 400 | # If charset parameter is not given but a block is given,
|
---|
| 401 | # the block is called and its result is returned.
|
---|
| 402 | # It can be used to guess charset.
|
---|
| 403 | #
|
---|
| 404 | # If charset parameter and block is not given,
|
---|
| 405 | # nil is returned except text type in HTTP.
|
---|
| 406 | # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1.
|
---|
| 407 | def charset
|
---|
| 408 | type, *parameters = content_type_parse
|
---|
| 409 | if pair = parameters.assoc('charset')
|
---|
| 410 | pair.last.downcase
|
---|
| 411 | elsif block_given?
|
---|
| 412 | yield
|
---|
| 413 | elsif type && %r{\Atext/} =~ type &&
|
---|
| 414 | @base_uri && /\Ahttp\z/i =~ @base_uri.scheme
|
---|
| 415 | "iso-8859-1" # RFC2616 3.7.1
|
---|
| 416 | else
|
---|
| 417 | nil
|
---|
| 418 | end
|
---|
| 419 | end
|
---|
| 420 |
|
---|
| 421 | # returns a list of encodings in Content-Encoding field
|
---|
| 422 | # as an Array of String.
|
---|
| 423 | # The encodings are downcased for canonicalization.
|
---|
| 424 | def content_encoding
|
---|
| 425 | v = @meta['content-encoding']
|
---|
| 426 | if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v
|
---|
| 427 | v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
|
---|
| 428 | else
|
---|
| 429 | []
|
---|
| 430 | end
|
---|
| 431 | end
|
---|
| 432 | end
|
---|
| 433 |
|
---|
| 434 | # Mixin for HTTP and FTP URIs.
|
---|
| 435 | module OpenRead
|
---|
| 436 | # OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
|
---|
| 437 | #
|
---|
| 438 | # OpenURI::OpenRead#open takes optional 3 arguments as:
|
---|
| 439 | # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
|
---|
| 440 | #
|
---|
| 441 | # `mode', `perm' is same as Kernel#open.
|
---|
| 442 | #
|
---|
| 443 | # However, `mode' must be read mode because OpenURI::OpenRead#open doesn't
|
---|
| 444 | # support write mode (yet).
|
---|
| 445 | # Also `perm' is just ignored because it is meaningful only for file
|
---|
| 446 | # creation.
|
---|
| 447 | #
|
---|
| 448 | # `options' must be a hash.
|
---|
| 449 | #
|
---|
| 450 | # Each pairs which key is a string in the hash specify a extra header
|
---|
| 451 | # field for HTTP.
|
---|
| 452 | # I.e. it is ignored for FTP without HTTP proxy.
|
---|
| 453 | #
|
---|
| 454 | # The hash may include other options which key is a symbol:
|
---|
| 455 | #
|
---|
| 456 | # [:proxy]
|
---|
| 457 | # Synopsis:
|
---|
| 458 | # :proxy => "http://proxy.foo.com:8000/"
|
---|
| 459 | # :proxy => URI.parse("http://proxy.foo.com:8000/")
|
---|
| 460 | # :proxy => true
|
---|
| 461 | # :proxy => false
|
---|
| 462 | # :proxy => nil
|
---|
| 463 | #
|
---|
| 464 | # If :proxy option is specified, the value should be String, URI,
|
---|
| 465 | # boolean or nil.
|
---|
| 466 | # When String or URI is given, it is treated as proxy URI.
|
---|
| 467 | # When true is given or the option itself is not specified,
|
---|
| 468 | # environment variable `scheme_proxy' is examined.
|
---|
| 469 | # `scheme' is replaced by `http', `https' or `ftp'.
|
---|
| 470 | # When false or nil is given, the environment variables are ignored and
|
---|
| 471 | # connection will be made to a server directly.
|
---|
| 472 | #
|
---|
| 473 | # [:http_basic_authentication]
|
---|
| 474 | # Synopsis:
|
---|
| 475 | # :http_basic_authentication=>[user, password]
|
---|
| 476 | #
|
---|
| 477 | # If :http_basic_authentication is specified,
|
---|
| 478 | # the value should be an array which contains 2 strings:
|
---|
| 479 | # username and password.
|
---|
| 480 | # It is used for HTTP Basic authentication defined by RFC 2617.
|
---|
| 481 | #
|
---|
| 482 | # [:content_length_proc]
|
---|
| 483 | # Synopsis:
|
---|
| 484 | # :content_length_proc => lambda {|content_length| ... }
|
---|
| 485 | #
|
---|
| 486 | # If :content_length_proc option is specified, the option value procedure
|
---|
| 487 | # is called before actual transfer is started.
|
---|
| 488 | # It takes one argument which is expected content length in bytes.
|
---|
| 489 | #
|
---|
| 490 | # If two or more transfer is done by HTTP redirection, the procedure
|
---|
| 491 | # is called only one for a last transfer.
|
---|
| 492 | #
|
---|
| 493 | # When expected content length is unknown, the procedure is called with
|
---|
| 494 | # nil.
|
---|
| 495 | # It is happen when HTTP response has no Content-Length header.
|
---|
| 496 | #
|
---|
| 497 | # [:progress_proc]
|
---|
| 498 | # Synopsis:
|
---|
| 499 | # :progress_proc => lambda {|size| ...}
|
---|
| 500 | #
|
---|
| 501 | # If :progress_proc option is specified, the proc is called with one
|
---|
| 502 | # argument each time when `open' gets content fragment from network.
|
---|
| 503 | # The argument `size' `size' is a accumulated transfered size in bytes.
|
---|
| 504 | #
|
---|
| 505 | # If two or more transfer is done by HTTP redirection, the procedure
|
---|
| 506 | # is called only one for a last transfer.
|
---|
| 507 | #
|
---|
| 508 | # :progress_proc and :content_length_proc are intended to be used for
|
---|
| 509 | # progress bar.
|
---|
| 510 | # For example, it can be implemented as follows using Ruby/ProgressBar.
|
---|
| 511 | #
|
---|
| 512 | # pbar = nil
|
---|
| 513 | # open("http://...",
|
---|
| 514 | # :content_length_proc => lambda {|t|
|
---|
| 515 | # if t && 0 < t
|
---|
| 516 | # pbar = ProgressBar.new("...", t)
|
---|
| 517 | # pbar.file_transfer_mode
|
---|
| 518 | # end
|
---|
| 519 | # },
|
---|
| 520 | # :progress_proc => lambda {|s|
|
---|
| 521 | # pbar.set s if pbar
|
---|
| 522 | # }) {|f| ... }
|
---|
| 523 | #
|
---|
| 524 | # OpenURI::OpenRead#open returns an IO like object if block is not given.
|
---|
| 525 | # Otherwise it yields the IO object and return the value of the block.
|
---|
| 526 | # The IO object is extended with OpenURI::Meta.
|
---|
| 527 | def open(*rest, &block)
|
---|
| 528 | OpenURI.open_uri(self, *rest, &block)
|
---|
| 529 | end
|
---|
| 530 |
|
---|
| 531 | # OpenURI::OpenRead#read([options]) reads a content referenced by self and
|
---|
| 532 | # returns the content as string.
|
---|
| 533 | # The string is extended with OpenURI::Meta.
|
---|
| 534 | # The argument `options' is same as OpenURI::OpenRead#open.
|
---|
| 535 | def read(options={})
|
---|
| 536 | self.open(options) {|f|
|
---|
| 537 | str = f.read
|
---|
| 538 | Meta.init str, f
|
---|
| 539 | str
|
---|
| 540 | }
|
---|
| 541 | end
|
---|
| 542 | end
|
---|
| 543 | end
|
---|
| 544 |
|
---|
| 545 | module URI
|
---|
| 546 | class Generic
|
---|
| 547 | # returns a proxy URI.
|
---|
| 548 | # The proxy URI is obtained from environment variables such as http_proxy,
|
---|
| 549 | # ftp_proxy, no_proxy, etc.
|
---|
| 550 | # If there is no proper proxy, nil is returned.
|
---|
| 551 | #
|
---|
| 552 | # Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.)
|
---|
| 553 | # are examined too.
|
---|
| 554 | #
|
---|
| 555 | # But http_proxy and HTTP_PROXY is treated specially under CGI environment.
|
---|
| 556 | # It's because HTTP_PROXY may be set by Proxy: header.
|
---|
| 557 | # So HTTP_PROXY is not used.
|
---|
| 558 | # http_proxy is not used too if the variable is case insensitive.
|
---|
| 559 | # CGI_HTTP_PROXY can be used instead.
|
---|
| 560 | def find_proxy
|
---|
| 561 | name = self.scheme.downcase + '_proxy'
|
---|
| 562 | proxy_uri = nil
|
---|
| 563 | if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI?
|
---|
| 564 | # HTTP_PROXY conflicts with *_proxy for proxy settings and
|
---|
| 565 | # HTTP_* for header information in CGI.
|
---|
| 566 | # So it should be careful to use it.
|
---|
| 567 | pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
|
---|
| 568 | case pairs.length
|
---|
| 569 | when 0 # no proxy setting anyway.
|
---|
| 570 | proxy_uri = nil
|
---|
| 571 | when 1
|
---|
| 572 | k, v = pairs.shift
|
---|
| 573 | if k == 'http_proxy' && ENV[k.upcase] == nil
|
---|
| 574 | # http_proxy is safe to use because ENV is case sensitive.
|
---|
| 575 | proxy_uri = ENV[name]
|
---|
| 576 | else
|
---|
| 577 | proxy_uri = nil
|
---|
| 578 | end
|
---|
| 579 | else # http_proxy is safe to use because ENV is case sensitive.
|
---|
| 580 | proxy_uri = ENV[name]
|
---|
| 581 | end
|
---|
| 582 | if !proxy_uri
|
---|
| 583 | # Use CGI_HTTP_PROXY. cf. libwww-perl.
|
---|
| 584 | proxy_uri = ENV["CGI_#{name.upcase}"]
|
---|
| 585 | end
|
---|
| 586 | elsif name == 'http_proxy'
|
---|
| 587 | unless proxy_uri = ENV[name]
|
---|
| 588 | if proxy_uri = ENV[name.upcase]
|
---|
| 589 | warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.'
|
---|
| 590 | end
|
---|
| 591 | end
|
---|
| 592 | else
|
---|
| 593 | proxy_uri = ENV[name] || ENV[name.upcase]
|
---|
| 594 | end
|
---|
| 595 |
|
---|
| 596 | if proxy_uri && self.host
|
---|
| 597 | require 'socket'
|
---|
| 598 | begin
|
---|
| 599 | addr = IPSocket.getaddress(self.host)
|
---|
| 600 | proxy_uri = nil if /\A127\.|\A::1\z/ =~ addr
|
---|
| 601 | rescue SocketError
|
---|
| 602 | end
|
---|
| 603 | end
|
---|
| 604 |
|
---|
| 605 | if proxy_uri
|
---|
| 606 | proxy_uri = URI.parse(proxy_uri)
|
---|
| 607 | name = 'no_proxy'
|
---|
| 608 | if no_proxy = ENV[name] || ENV[name.upcase]
|
---|
| 609 | no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port|
|
---|
| 610 | if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host &&
|
---|
| 611 | (!port || self.port == port.to_i)
|
---|
| 612 | proxy_uri = nil
|
---|
| 613 | break
|
---|
| 614 | end
|
---|
| 615 | }
|
---|
| 616 | end
|
---|
| 617 | proxy_uri
|
---|
| 618 | else
|
---|
| 619 | nil
|
---|
| 620 | end
|
---|
| 621 | end
|
---|
| 622 | end
|
---|
| 623 |
|
---|
| 624 | class HTTP
|
---|
| 625 | def buffer_open(buf, proxy, options) # :nodoc:
|
---|
| 626 | OpenURI.open_http(buf, self, proxy, options)
|
---|
| 627 | end
|
---|
| 628 |
|
---|
| 629 | include OpenURI::OpenRead
|
---|
| 630 | end
|
---|
| 631 |
|
---|
| 632 | class FTP
|
---|
| 633 | def buffer_open(buf, proxy, options) # :nodoc:
|
---|
| 634 | if proxy
|
---|
| 635 | OpenURI.open_http(buf, self, proxy, options)
|
---|
| 636 | return
|
---|
| 637 | end
|
---|
| 638 | require 'net/ftp'
|
---|
| 639 |
|
---|
| 640 | directories = self.path.split(%r{/}, -1)
|
---|
| 641 | directories.shift if directories[0] == '' # strip a field before leading slash
|
---|
| 642 | directories.each {|d|
|
---|
| 643 | d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
|
---|
| 644 | }
|
---|
| 645 | unless filename = directories.pop
|
---|
| 646 | raise ArgumentError, "no filename: #{self.inspect}"
|
---|
| 647 | end
|
---|
| 648 | directories.each {|d|
|
---|
| 649 | if /[\r\n]/ =~ d
|
---|
| 650 | raise ArgumentError, "invalid directory: #{d.inspect}"
|
---|
| 651 | end
|
---|
| 652 | }
|
---|
| 653 | if /[\r\n]/ =~ filename
|
---|
| 654 | raise ArgumentError, "invalid filename: #{filename.inspect}"
|
---|
| 655 | end
|
---|
| 656 | typecode = self.typecode
|
---|
| 657 | if typecode && /\A[aid]\z/ !~ typecode
|
---|
| 658 | raise ArgumentError, "invalid typecode: #{typecode.inspect}"
|
---|
| 659 | end
|
---|
| 660 |
|
---|
| 661 | # The access sequence is defined by RFC 1738
|
---|
| 662 | ftp = Net::FTP.open(self.host)
|
---|
| 663 | # todo: extract user/passwd from .netrc.
|
---|
| 664 | user = 'anonymous'
|
---|
| 665 | passwd = nil
|
---|
| 666 | user, passwd = self.userinfo.split(/:/) if self.userinfo
|
---|
| 667 | ftp.login(user, passwd)
|
---|
| 668 | directories.each {|cwd|
|
---|
| 669 | ftp.voidcmd("CWD #{cwd}")
|
---|
| 670 | }
|
---|
| 671 | if typecode
|
---|
| 672 | # xxx: typecode D is not handled.
|
---|
| 673 | ftp.voidcmd("TYPE #{typecode.upcase}")
|
---|
| 674 | end
|
---|
| 675 | if options[:content_length_proc]
|
---|
| 676 | options[:content_length_proc].call(ftp.size(filename))
|
---|
| 677 | end
|
---|
| 678 | ftp.retrbinary("RETR #{filename}", 4096) { |str|
|
---|
| 679 | buf << str
|
---|
| 680 | options[:progress_proc].call(buf.size) if options[:progress_proc]
|
---|
| 681 | }
|
---|
| 682 | ftp.close
|
---|
| 683 | buf.io.rewind
|
---|
| 684 | end
|
---|
| 685 |
|
---|
| 686 | include OpenURI::OpenRead
|
---|
| 687 | end
|
---|
| 688 | end
|
---|