[18425] | 1 | require "forwardable"
|
---|
| 2 | require "open-uri"
|
---|
| 3 |
|
---|
| 4 | require "rss/rss"
|
---|
| 5 |
|
---|
| 6 | module RSS
|
---|
| 7 |
|
---|
| 8 | class NotWellFormedError < Error
|
---|
| 9 | attr_reader :line, :element
|
---|
| 10 |
|
---|
| 11 | # Create a new NotWellFormedError for an error at +line+
|
---|
| 12 | # in +element+. If a block is given the return value of
|
---|
| 13 | # the block ends up in the error message.
|
---|
| 14 | def initialize(line=nil, element=nil)
|
---|
| 15 | message = "This is not well formed XML"
|
---|
| 16 | if element or line
|
---|
| 17 | message << "\nerror occurred"
|
---|
| 18 | message << " in #{element}" if element
|
---|
| 19 | message << " at about #{line} line" if line
|
---|
| 20 | end
|
---|
| 21 | message << "\n#{yield}" if block_given?
|
---|
| 22 | super(message)
|
---|
| 23 | end
|
---|
| 24 | end
|
---|
| 25 |
|
---|
| 26 | class XMLParserNotFound < Error
|
---|
| 27 | def initialize
|
---|
| 28 | super("available XML parser was not found in " <<
|
---|
| 29 | "#{AVAILABLE_PARSER_LIBRARIES.inspect}.")
|
---|
| 30 | end
|
---|
| 31 | end
|
---|
| 32 |
|
---|
| 33 | class NotValidXMLParser < Error
|
---|
| 34 | def initialize(parser)
|
---|
| 35 | super("#{parser} is not an available XML parser. " <<
|
---|
| 36 | "Available XML parser"<<
|
---|
| 37 | (AVAILABLE_PARSERS.size > 1 ? "s are ": " is ") <<
|
---|
| 38 | "#{AVAILABLE_PARSERS.inspect}.")
|
---|
| 39 | end
|
---|
| 40 | end
|
---|
| 41 |
|
---|
| 42 | class NSError < InvalidRSSError
|
---|
| 43 | attr_reader :tag, :prefix, :uri
|
---|
| 44 | def initialize(tag, prefix, require_uri)
|
---|
| 45 | @tag, @prefix, @uri = tag, prefix, require_uri
|
---|
| 46 | super("prefix <#{prefix}> doesn't associate uri " <<
|
---|
| 47 | "<#{require_uri}> in tag <#{tag}>")
|
---|
| 48 | end
|
---|
| 49 | end
|
---|
| 50 |
|
---|
| 51 | class Parser
|
---|
| 52 |
|
---|
| 53 | extend Forwardable
|
---|
| 54 |
|
---|
| 55 | class << self
|
---|
| 56 |
|
---|
| 57 | @@default_parser = nil
|
---|
| 58 |
|
---|
| 59 | def default_parser
|
---|
| 60 | @@default_parser || AVAILABLE_PARSERS.first
|
---|
| 61 | end
|
---|
| 62 |
|
---|
| 63 | # Set @@default_parser to new_value if it is one of the
|
---|
| 64 | # available parsers. Else raise NotValidXMLParser error.
|
---|
| 65 | def default_parser=(new_value)
|
---|
| 66 | if AVAILABLE_PARSERS.include?(new_value)
|
---|
| 67 | @@default_parser = new_value
|
---|
| 68 | else
|
---|
| 69 | raise NotValidXMLParser.new(new_value)
|
---|
| 70 | end
|
---|
| 71 | end
|
---|
| 72 |
|
---|
| 73 | def parse(rss, do_validate=true, ignore_unknown_element=true,
|
---|
| 74 | parser_class=default_parser)
|
---|
| 75 | parser = new(rss, parser_class)
|
---|
| 76 | parser.do_validate = do_validate
|
---|
| 77 | parser.ignore_unknown_element = ignore_unknown_element
|
---|
| 78 | parser.parse
|
---|
| 79 | end
|
---|
| 80 | end
|
---|
| 81 |
|
---|
| 82 | def_delegators(:@parser, :parse, :rss,
|
---|
| 83 | :ignore_unknown_element,
|
---|
| 84 | :ignore_unknown_element=, :do_validate,
|
---|
| 85 | :do_validate=)
|
---|
| 86 |
|
---|
| 87 | def initialize(rss, parser_class=self.class.default_parser)
|
---|
| 88 | @parser = parser_class.new(normalize_rss(rss))
|
---|
| 89 | end
|
---|
| 90 |
|
---|
| 91 | private
|
---|
| 92 |
|
---|
| 93 | # Try to get the XML associated with +rss+.
|
---|
| 94 | # Return +rss+ if it already looks like XML, or treat it as a URI,
|
---|
| 95 | # or a file to get the XML,
|
---|
| 96 | def normalize_rss(rss)
|
---|
| 97 | return rss if maybe_xml?(rss)
|
---|
| 98 |
|
---|
| 99 | uri = to_uri(rss)
|
---|
| 100 |
|
---|
| 101 | if uri.respond_to?(:read)
|
---|
| 102 | uri.read
|
---|
| 103 | elsif !rss.tainted? and File.readable?(rss)
|
---|
| 104 | File.open(rss) {|f| f.read}
|
---|
| 105 | else
|
---|
| 106 | rss
|
---|
| 107 | end
|
---|
| 108 | end
|
---|
| 109 |
|
---|
| 110 | # maybe_xml? tests if source is a string that looks like XML.
|
---|
| 111 | def maybe_xml?(source)
|
---|
| 112 | source.is_a?(String) and /</ =~ source
|
---|
| 113 | end
|
---|
| 114 |
|
---|
| 115 | # Attempt to convert rss to a URI, but just return it if
|
---|
| 116 | # there's a ::URI::Error
|
---|
| 117 | def to_uri(rss)
|
---|
| 118 | return rss if rss.is_a?(::URI::Generic)
|
---|
| 119 |
|
---|
| 120 | begin
|
---|
| 121 | URI(rss)
|
---|
| 122 | rescue ::URI::Error
|
---|
| 123 | rss
|
---|
| 124 | end
|
---|
| 125 | end
|
---|
| 126 | end
|
---|
| 127 |
|
---|
| 128 | class BaseParser
|
---|
| 129 |
|
---|
| 130 | class << self
|
---|
| 131 | def raise_for_undefined_entity?
|
---|
| 132 | listener.raise_for_undefined_entity?
|
---|
| 133 | end
|
---|
| 134 | end
|
---|
| 135 |
|
---|
| 136 | def initialize(rss)
|
---|
| 137 | @listener = self.class.listener.new
|
---|
| 138 | @rss = rss
|
---|
| 139 | end
|
---|
| 140 |
|
---|
| 141 | def rss
|
---|
| 142 | @listener.rss
|
---|
| 143 | end
|
---|
| 144 |
|
---|
| 145 | def ignore_unknown_element
|
---|
| 146 | @listener.ignore_unknown_element
|
---|
| 147 | end
|
---|
| 148 |
|
---|
| 149 | def ignore_unknown_element=(new_value)
|
---|
| 150 | @listener.ignore_unknown_element = new_value
|
---|
| 151 | end
|
---|
| 152 |
|
---|
| 153 | def do_validate
|
---|
| 154 | @listener.do_validate
|
---|
| 155 | end
|
---|
| 156 |
|
---|
| 157 | def do_validate=(new_value)
|
---|
| 158 | @listener.do_validate = new_value
|
---|
| 159 | end
|
---|
| 160 |
|
---|
| 161 | def parse
|
---|
| 162 | if @listener.rss.nil?
|
---|
| 163 | _parse
|
---|
| 164 | end
|
---|
| 165 | @listener.rss
|
---|
| 166 | end
|
---|
| 167 |
|
---|
| 168 | end
|
---|
| 169 |
|
---|
| 170 | class BaseListener
|
---|
| 171 |
|
---|
| 172 | extend Utils
|
---|
| 173 |
|
---|
| 174 | class << self
|
---|
| 175 |
|
---|
| 176 | @@setters = {}
|
---|
| 177 | @@registered_uris = {}
|
---|
| 178 | @@class_names = {}
|
---|
| 179 |
|
---|
| 180 | # return the setter for the uri, tag_name pair, or nil.
|
---|
| 181 | def setter(uri, tag_name)
|
---|
| 182 | begin
|
---|
| 183 | @@setters[uri][tag_name]
|
---|
| 184 | rescue NameError
|
---|
| 185 | nil
|
---|
| 186 | end
|
---|
| 187 | end
|
---|
| 188 |
|
---|
| 189 |
|
---|
| 190 | # return the tag_names for setters associated with uri
|
---|
| 191 | def available_tags(uri)
|
---|
| 192 | begin
|
---|
| 193 | @@setters[uri].keys
|
---|
| 194 | rescue NameError
|
---|
| 195 | []
|
---|
| 196 | end
|
---|
| 197 | end
|
---|
| 198 |
|
---|
| 199 | # register uri against this name.
|
---|
| 200 | def register_uri(uri, name)
|
---|
| 201 | @@registered_uris[name] ||= {}
|
---|
| 202 | @@registered_uris[name][uri] = nil
|
---|
| 203 | end
|
---|
| 204 |
|
---|
| 205 | # test if this uri is registered against this name
|
---|
| 206 | def uri_registered?(uri, name)
|
---|
| 207 | @@registered_uris[name].has_key?(uri)
|
---|
| 208 | end
|
---|
| 209 |
|
---|
| 210 | # record class_name for the supplied uri and tag_name
|
---|
| 211 | def install_class_name(uri, tag_name, class_name)
|
---|
| 212 | @@class_names[uri] ||= {}
|
---|
| 213 | @@class_names[uri][tag_name] = class_name
|
---|
| 214 | end
|
---|
| 215 |
|
---|
| 216 | # retrieve class_name for the supplied uri and tag_name
|
---|
| 217 | # If it doesn't exist, capitalize the tag_name
|
---|
| 218 | def class_name(uri, tag_name)
|
---|
| 219 | begin
|
---|
| 220 | @@class_names[uri][tag_name]
|
---|
| 221 | rescue NameError
|
---|
| 222 | tag_name[0,1].upcase + tag_name[1..-1]
|
---|
| 223 | end
|
---|
| 224 | end
|
---|
| 225 |
|
---|
| 226 | def install_get_text_element(uri, name, setter)
|
---|
| 227 | install_setter(uri, name, setter)
|
---|
| 228 | def_get_text_element(uri, name, *get_file_and_line_from_caller(1))
|
---|
| 229 | end
|
---|
| 230 |
|
---|
| 231 | def raise_for_undefined_entity?
|
---|
| 232 | true
|
---|
| 233 | end
|
---|
| 234 |
|
---|
| 235 | private
|
---|
| 236 | # set the setter for the uri, tag_name pair
|
---|
| 237 | def install_setter(uri, tag_name, setter)
|
---|
| 238 | @@setters[uri] ||= {}
|
---|
| 239 | @@setters[uri][tag_name] = setter
|
---|
| 240 | end
|
---|
| 241 |
|
---|
| 242 | def def_get_text_element(uri, name, file, line)
|
---|
| 243 | register_uri(uri, name)
|
---|
| 244 | unless private_instance_methods(false).include?("start_#{name}")
|
---|
| 245 | module_eval(<<-EOT, file, line)
|
---|
| 246 | def start_#{name}(name, prefix, attrs, ns)
|
---|
| 247 | uri = _ns(ns, prefix)
|
---|
| 248 | if self.class.uri_registered?(uri, #{name.inspect})
|
---|
| 249 | start_get_text_element(name, prefix, ns, uri)
|
---|
| 250 | else
|
---|
| 251 | start_else_element(name, prefix, attrs, ns)
|
---|
| 252 | end
|
---|
| 253 | end
|
---|
| 254 | EOT
|
---|
| 255 | __send__("private", "start_#{name}")
|
---|
| 256 | end
|
---|
| 257 | end
|
---|
| 258 |
|
---|
| 259 | end
|
---|
| 260 |
|
---|
| 261 | end
|
---|
| 262 |
|
---|
| 263 | module ListenerMixin
|
---|
| 264 |
|
---|
| 265 | attr_reader :rss
|
---|
| 266 |
|
---|
| 267 | attr_accessor :ignore_unknown_element
|
---|
| 268 | attr_accessor :do_validate
|
---|
| 269 |
|
---|
| 270 | def initialize
|
---|
| 271 | @rss = nil
|
---|
| 272 | @ignore_unknown_element = true
|
---|
| 273 | @do_validate = true
|
---|
| 274 | @ns_stack = [{}]
|
---|
| 275 | @tag_stack = [[]]
|
---|
| 276 | @text_stack = ['']
|
---|
| 277 | @proc_stack = []
|
---|
| 278 | @last_element = nil
|
---|
| 279 | @version = @encoding = @standalone = nil
|
---|
| 280 | @xml_stylesheets = []
|
---|
| 281 | end
|
---|
| 282 |
|
---|
| 283 | # set instance vars for version, encoding, standalone
|
---|
| 284 | def xmldecl(version, encoding, standalone)
|
---|
| 285 | @version, @encoding, @standalone = version, encoding, standalone
|
---|
| 286 | end
|
---|
| 287 |
|
---|
| 288 | def instruction(name, content)
|
---|
| 289 | if name == "xml-stylesheet"
|
---|
| 290 | params = parse_pi_content(content)
|
---|
| 291 | if params.has_key?("href")
|
---|
| 292 | @xml_stylesheets << XMLStyleSheet.new(*params)
|
---|
| 293 | end
|
---|
| 294 | end
|
---|
| 295 | end
|
---|
| 296 |
|
---|
| 297 | def tag_start(name, attributes)
|
---|
| 298 | @text_stack.push('')
|
---|
| 299 |
|
---|
| 300 | ns = @ns_stack.last.dup
|
---|
| 301 | attrs = {}
|
---|
| 302 | attributes.each do |n, v|
|
---|
| 303 | if /\Axmlns(?:\z|:)/ =~ n
|
---|
| 304 | ns[$POSTMATCH] = v
|
---|
| 305 | else
|
---|
| 306 | attrs[n] = v
|
---|
| 307 | end
|
---|
| 308 | end
|
---|
| 309 | @ns_stack.push(ns)
|
---|
| 310 |
|
---|
| 311 | prefix, local = split_name(name)
|
---|
| 312 | @tag_stack.last.push([_ns(ns, prefix), local])
|
---|
| 313 | @tag_stack.push([])
|
---|
| 314 | if respond_to?("start_#{local}", true)
|
---|
| 315 | __send__("start_#{local}", local, prefix, attrs, ns.dup)
|
---|
| 316 | else
|
---|
| 317 | start_else_element(local, prefix, attrs, ns.dup)
|
---|
| 318 | end
|
---|
| 319 | end
|
---|
| 320 |
|
---|
| 321 | def tag_end(name)
|
---|
| 322 | if DEBUG
|
---|
| 323 | p "end tag #{name}"
|
---|
| 324 | p @tag_stack
|
---|
| 325 | end
|
---|
| 326 | text = @text_stack.pop
|
---|
| 327 | tags = @tag_stack.pop
|
---|
| 328 | pr = @proc_stack.pop
|
---|
| 329 | pr.call(text, tags) unless pr.nil?
|
---|
| 330 | @ns_stack.pop
|
---|
| 331 | end
|
---|
| 332 |
|
---|
| 333 | def text(data)
|
---|
| 334 | @text_stack.last << data
|
---|
| 335 | end
|
---|
| 336 |
|
---|
| 337 | private
|
---|
| 338 | def _ns(ns, prefix)
|
---|
| 339 | ns.fetch(prefix, "")
|
---|
| 340 | end
|
---|
| 341 |
|
---|
| 342 | CONTENT_PATTERN = /\s*([^=]+)=(["'])([^\2]+?)\2/
|
---|
| 343 | # Extract the first name="value" pair from content.
|
---|
| 344 | # Works with single quotes according to the constant
|
---|
| 345 | # CONTENT_PATTERN. Return a Hash.
|
---|
| 346 | def parse_pi_content(content)
|
---|
| 347 | params = {}
|
---|
| 348 | content.scan(CONTENT_PATTERN) do |name, quote, value|
|
---|
| 349 | params[name] = value
|
---|
| 350 | end
|
---|
| 351 | params
|
---|
| 352 | end
|
---|
| 353 |
|
---|
| 354 | def start_else_element(local, prefix, attrs, ns)
|
---|
| 355 | class_name = self.class.class_name(_ns(ns, prefix), local)
|
---|
| 356 | current_class = @last_element.class
|
---|
| 357 | if current_class.constants.include?(class_name)
|
---|
| 358 | next_class = current_class.const_get(class_name)
|
---|
| 359 | start_have_something_element(local, prefix, attrs, ns, next_class)
|
---|
| 360 | else
|
---|
| 361 | if !@do_validate or @ignore_unknown_element
|
---|
| 362 | @proc_stack.push(nil)
|
---|
| 363 | else
|
---|
| 364 | parent = "ROOT ELEMENT???"
|
---|
| 365 | if current_class.tag_name
|
---|
| 366 | parent = current_class.tag_name
|
---|
| 367 | end
|
---|
| 368 | raise NotExpectedTagError.new(local, _ns(ns, prefix), parent)
|
---|
| 369 | end
|
---|
| 370 | end
|
---|
| 371 | end
|
---|
| 372 |
|
---|
| 373 | NAMESPLIT = /^(?:([\w:][-\w\d.]*):)?([\w:][-\w\d.]*)/
|
---|
| 374 | def split_name(name)
|
---|
| 375 | name =~ NAMESPLIT
|
---|
| 376 | [$1 || '', $2]
|
---|
| 377 | end
|
---|
| 378 |
|
---|
| 379 | def check_ns(tag_name, prefix, ns, require_uri)
|
---|
| 380 | if @do_validate
|
---|
| 381 | if _ns(ns, prefix) == require_uri
|
---|
| 382 | #ns.delete(prefix)
|
---|
| 383 | else
|
---|
| 384 | raise NSError.new(tag_name, prefix, require_uri)
|
---|
| 385 | end
|
---|
| 386 | end
|
---|
| 387 | end
|
---|
| 388 |
|
---|
| 389 | def start_get_text_element(tag_name, prefix, ns, required_uri)
|
---|
| 390 | @proc_stack.push Proc.new {|text, tags|
|
---|
| 391 | setter = self.class.setter(required_uri, tag_name)
|
---|
| 392 | if @last_element.respond_to?(setter)
|
---|
| 393 | @last_element.__send__(setter, text.to_s)
|
---|
| 394 | else
|
---|
| 395 | if @do_validate and !@ignore_unknown_element
|
---|
| 396 | raise NotExpectedTagError.new(tag_name, _ns(ns, prefix),
|
---|
| 397 | @last_element.tag_name)
|
---|
| 398 | end
|
---|
| 399 | end
|
---|
| 400 | }
|
---|
| 401 | end
|
---|
| 402 |
|
---|
| 403 | def start_have_something_element(tag_name, prefix, attrs, ns, klass)
|
---|
| 404 |
|
---|
| 405 | check_ns(tag_name, prefix, ns, klass.required_uri)
|
---|
| 406 |
|
---|
| 407 | attributes = {}
|
---|
| 408 | klass.get_attributes.each do |a_name, a_uri, required, element_name|
|
---|
| 409 |
|
---|
| 410 | if a_uri.is_a?(String) or !a_uri.respond_to?(:include?)
|
---|
| 411 | a_uri = [a_uri]
|
---|
| 412 | end
|
---|
| 413 | unless a_uri == [""]
|
---|
| 414 | for prefix, uri in ns
|
---|
| 415 | if a_uri.include?(uri)
|
---|
| 416 | val = attrs["#{prefix}:#{a_name}"]
|
---|
| 417 | break if val
|
---|
| 418 | end
|
---|
| 419 | end
|
---|
| 420 | end
|
---|
| 421 | if val.nil? and a_uri.include?("")
|
---|
| 422 | val = attrs[a_name]
|
---|
| 423 | end
|
---|
| 424 |
|
---|
| 425 | if @do_validate and required and val.nil?
|
---|
| 426 | unless a_uri.include?("")
|
---|
| 427 | for prefix, uri in ns
|
---|
| 428 | if a_uri.include?(uri)
|
---|
| 429 | a_name = "#{prefix}:#{a_name}"
|
---|
| 430 | end
|
---|
| 431 | end
|
---|
| 432 | end
|
---|
| 433 | raise MissingAttributeError.new(tag_name, a_name)
|
---|
| 434 | end
|
---|
| 435 |
|
---|
| 436 | attributes[a_name] = val
|
---|
| 437 | end
|
---|
| 438 |
|
---|
| 439 | previous = @last_element
|
---|
| 440 | next_element = klass.new(@do_validate, attributes)
|
---|
| 441 | previous.instance_eval {set_next_element(tag_name, next_element)}
|
---|
| 442 | @last_element = next_element
|
---|
| 443 | @proc_stack.push Proc.new { |text, tags|
|
---|
| 444 | p(@last_element.class) if DEBUG
|
---|
| 445 | @last_element.content = text if klass.have_content?
|
---|
| 446 | if @do_validate
|
---|
| 447 | @last_element.validate_for_stream(tags, @ignore_unknown_element)
|
---|
| 448 | end
|
---|
| 449 | @last_element = previous
|
---|
| 450 | }
|
---|
| 451 | end
|
---|
| 452 |
|
---|
| 453 | end
|
---|
| 454 |
|
---|
| 455 | unless const_defined? :AVAILABLE_PARSER_LIBRARIES
|
---|
| 456 | AVAILABLE_PARSER_LIBRARIES = [
|
---|
| 457 | ["rss/xmlparser", :XMLParserParser],
|
---|
| 458 | ["rss/xmlscanner", :XMLScanParser],
|
---|
| 459 | ["rss/rexmlparser", :REXMLParser],
|
---|
| 460 | ]
|
---|
| 461 | end
|
---|
| 462 |
|
---|
| 463 | AVAILABLE_PARSERS = []
|
---|
| 464 |
|
---|
| 465 | AVAILABLE_PARSER_LIBRARIES.each do |lib, parser|
|
---|
| 466 | begin
|
---|
| 467 | require lib
|
---|
| 468 | AVAILABLE_PARSERS.push(const_get(parser))
|
---|
| 469 | rescue LoadError
|
---|
| 470 | end
|
---|
| 471 | end
|
---|
| 472 |
|
---|
| 473 | if AVAILABLE_PARSERS.empty?
|
---|
| 474 | raise XMLParserNotFound
|
---|
| 475 | end
|
---|
| 476 | end
|
---|