source: extensions/gsdl-video/trunk/installed/cmdline/lib/ruby/1.8/rss/parser.rb@ 18425

Last change on this file since 18425 was 18425, checked in by davidb, 15 years ago

Video extension to Greenstone

File size: 12.0 KB
Line 
1require "forwardable"
2require "open-uri"
3
4require "rss/rss"
5
6module RSS
7
8 class NotWellFormedError < Error
9 attr_reader :line, :element
10
11 # Create a new NotWellFormedError for an error at +line+
12 # in +element+. If a block is given the return value of
13 # the block ends up in the error message.
14 def initialize(line=nil, element=nil)
15 message = "This is not well formed XML"
16 if element or line
17 message << "\nerror occurred"
18 message << " in #{element}" if element
19 message << " at about #{line} line" if line
20 end
21 message << "\n#{yield}" if block_given?
22 super(message)
23 end
24 end
25
26 class XMLParserNotFound < Error
27 def initialize
28 super("available XML parser was not found in " <<
29 "#{AVAILABLE_PARSER_LIBRARIES.inspect}.")
30 end
31 end
32
33 class NotValidXMLParser < Error
34 def initialize(parser)
35 super("#{parser} is not an available XML parser. " <<
36 "Available XML parser"<<
37 (AVAILABLE_PARSERS.size > 1 ? "s are ": " is ") <<
38 "#{AVAILABLE_PARSERS.inspect}.")
39 end
40 end
41
42 class NSError < InvalidRSSError
43 attr_reader :tag, :prefix, :uri
44 def initialize(tag, prefix, require_uri)
45 @tag, @prefix, @uri = tag, prefix, require_uri
46 super("prefix <#{prefix}> doesn't associate uri " <<
47 "<#{require_uri}> in tag <#{tag}>")
48 end
49 end
50
51 class Parser
52
53 extend Forwardable
54
55 class << self
56
57 @@default_parser = nil
58
59 def default_parser
60 @@default_parser || AVAILABLE_PARSERS.first
61 end
62
63 # Set @@default_parser to new_value if it is one of the
64 # available parsers. Else raise NotValidXMLParser error.
65 def default_parser=(new_value)
66 if AVAILABLE_PARSERS.include?(new_value)
67 @@default_parser = new_value
68 else
69 raise NotValidXMLParser.new(new_value)
70 end
71 end
72
73 def parse(rss, do_validate=true, ignore_unknown_element=true,
74 parser_class=default_parser)
75 parser = new(rss, parser_class)
76 parser.do_validate = do_validate
77 parser.ignore_unknown_element = ignore_unknown_element
78 parser.parse
79 end
80 end
81
82 def_delegators(:@parser, :parse, :rss,
83 :ignore_unknown_element,
84 :ignore_unknown_element=, :do_validate,
85 :do_validate=)
86
87 def initialize(rss, parser_class=self.class.default_parser)
88 @parser = parser_class.new(normalize_rss(rss))
89 end
90
91 private
92
93 # Try to get the XML associated with +rss+.
94 # Return +rss+ if it already looks like XML, or treat it as a URI,
95 # or a file to get the XML,
96 def normalize_rss(rss)
97 return rss if maybe_xml?(rss)
98
99 uri = to_uri(rss)
100
101 if uri.respond_to?(:read)
102 uri.read
103 elsif !rss.tainted? and File.readable?(rss)
104 File.open(rss) {|f| f.read}
105 else
106 rss
107 end
108 end
109
110 # maybe_xml? tests if source is a string that looks like XML.
111 def maybe_xml?(source)
112 source.is_a?(String) and /</ =~ source
113 end
114
115 # Attempt to convert rss to a URI, but just return it if
116 # there's a ::URI::Error
117 def to_uri(rss)
118 return rss if rss.is_a?(::URI::Generic)
119
120 begin
121 URI(rss)
122 rescue ::URI::Error
123 rss
124 end
125 end
126 end
127
128 class BaseParser
129
130 class << self
131 def raise_for_undefined_entity?
132 listener.raise_for_undefined_entity?
133 end
134 end
135
136 def initialize(rss)
137 @listener = self.class.listener.new
138 @rss = rss
139 end
140
141 def rss
142 @listener.rss
143 end
144
145 def ignore_unknown_element
146 @listener.ignore_unknown_element
147 end
148
149 def ignore_unknown_element=(new_value)
150 @listener.ignore_unknown_element = new_value
151 end
152
153 def do_validate
154 @listener.do_validate
155 end
156
157 def do_validate=(new_value)
158 @listener.do_validate = new_value
159 end
160
161 def parse
162 if @listener.rss.nil?
163 _parse
164 end
165 @listener.rss
166 end
167
168 end
169
170 class BaseListener
171
172 extend Utils
173
174 class << self
175
176 @@setters = {}
177 @@registered_uris = {}
178 @@class_names = {}
179
180 # return the setter for the uri, tag_name pair, or nil.
181 def setter(uri, tag_name)
182 begin
183 @@setters[uri][tag_name]
184 rescue NameError
185 nil
186 end
187 end
188
189
190 # return the tag_names for setters associated with uri
191 def available_tags(uri)
192 begin
193 @@setters[uri].keys
194 rescue NameError
195 []
196 end
197 end
198
199 # register uri against this name.
200 def register_uri(uri, name)
201 @@registered_uris[name] ||= {}
202 @@registered_uris[name][uri] = nil
203 end
204
205 # test if this uri is registered against this name
206 def uri_registered?(uri, name)
207 @@registered_uris[name].has_key?(uri)
208 end
209
210 # record class_name for the supplied uri and tag_name
211 def install_class_name(uri, tag_name, class_name)
212 @@class_names[uri] ||= {}
213 @@class_names[uri][tag_name] = class_name
214 end
215
216 # retrieve class_name for the supplied uri and tag_name
217 # If it doesn't exist, capitalize the tag_name
218 def class_name(uri, tag_name)
219 begin
220 @@class_names[uri][tag_name]
221 rescue NameError
222 tag_name[0,1].upcase + tag_name[1..-1]
223 end
224 end
225
226 def install_get_text_element(uri, name, setter)
227 install_setter(uri, name, setter)
228 def_get_text_element(uri, name, *get_file_and_line_from_caller(1))
229 end
230
231 def raise_for_undefined_entity?
232 true
233 end
234
235 private
236 # set the setter for the uri, tag_name pair
237 def install_setter(uri, tag_name, setter)
238 @@setters[uri] ||= {}
239 @@setters[uri][tag_name] = setter
240 end
241
242 def def_get_text_element(uri, name, file, line)
243 register_uri(uri, name)
244 unless private_instance_methods(false).include?("start_#{name}")
245 module_eval(<<-EOT, file, line)
246 def start_#{name}(name, prefix, attrs, ns)
247 uri = _ns(ns, prefix)
248 if self.class.uri_registered?(uri, #{name.inspect})
249 start_get_text_element(name, prefix, ns, uri)
250 else
251 start_else_element(name, prefix, attrs, ns)
252 end
253 end
254 EOT
255 __send__("private", "start_#{name}")
256 end
257 end
258
259 end
260
261 end
262
263 module ListenerMixin
264
265 attr_reader :rss
266
267 attr_accessor :ignore_unknown_element
268 attr_accessor :do_validate
269
270 def initialize
271 @rss = nil
272 @ignore_unknown_element = true
273 @do_validate = true
274 @ns_stack = [{}]
275 @tag_stack = [[]]
276 @text_stack = ['']
277 @proc_stack = []
278 @last_element = nil
279 @version = @encoding = @standalone = nil
280 @xml_stylesheets = []
281 end
282
283 # set instance vars for version, encoding, standalone
284 def xmldecl(version, encoding, standalone)
285 @version, @encoding, @standalone = version, encoding, standalone
286 end
287
288 def instruction(name, content)
289 if name == "xml-stylesheet"
290 params = parse_pi_content(content)
291 if params.has_key?("href")
292 @xml_stylesheets << XMLStyleSheet.new(*params)
293 end
294 end
295 end
296
297 def tag_start(name, attributes)
298 @text_stack.push('')
299
300 ns = @ns_stack.last.dup
301 attrs = {}
302 attributes.each do |n, v|
303 if /\Axmlns(?:\z|:)/ =~ n
304 ns[$POSTMATCH] = v
305 else
306 attrs[n] = v
307 end
308 end
309 @ns_stack.push(ns)
310
311 prefix, local = split_name(name)
312 @tag_stack.last.push([_ns(ns, prefix), local])
313 @tag_stack.push([])
314 if respond_to?("start_#{local}", true)
315 __send__("start_#{local}", local, prefix, attrs, ns.dup)
316 else
317 start_else_element(local, prefix, attrs, ns.dup)
318 end
319 end
320
321 def tag_end(name)
322 if DEBUG
323 p "end tag #{name}"
324 p @tag_stack
325 end
326 text = @text_stack.pop
327 tags = @tag_stack.pop
328 pr = @proc_stack.pop
329 pr.call(text, tags) unless pr.nil?
330 @ns_stack.pop
331 end
332
333 def text(data)
334 @text_stack.last << data
335 end
336
337 private
338 def _ns(ns, prefix)
339 ns.fetch(prefix, "")
340 end
341
342 CONTENT_PATTERN = /\s*([^=]+)=(["'])([^\2]+?)\2/
343 # Extract the first name="value" pair from content.
344 # Works with single quotes according to the constant
345 # CONTENT_PATTERN. Return a Hash.
346 def parse_pi_content(content)
347 params = {}
348 content.scan(CONTENT_PATTERN) do |name, quote, value|
349 params[name] = value
350 end
351 params
352 end
353
354 def start_else_element(local, prefix, attrs, ns)
355 class_name = self.class.class_name(_ns(ns, prefix), local)
356 current_class = @last_element.class
357 if current_class.constants.include?(class_name)
358 next_class = current_class.const_get(class_name)
359 start_have_something_element(local, prefix, attrs, ns, next_class)
360 else
361 if !@do_validate or @ignore_unknown_element
362 @proc_stack.push(nil)
363 else
364 parent = "ROOT ELEMENT???"
365 if current_class.tag_name
366 parent = current_class.tag_name
367 end
368 raise NotExpectedTagError.new(local, _ns(ns, prefix), parent)
369 end
370 end
371 end
372
373 NAMESPLIT = /^(?:([\w:][-\w\d.]*):)?([\w:][-\w\d.]*)/
374 def split_name(name)
375 name =~ NAMESPLIT
376 [$1 || '', $2]
377 end
378
379 def check_ns(tag_name, prefix, ns, require_uri)
380 if @do_validate
381 if _ns(ns, prefix) == require_uri
382 #ns.delete(prefix)
383 else
384 raise NSError.new(tag_name, prefix, require_uri)
385 end
386 end
387 end
388
389 def start_get_text_element(tag_name, prefix, ns, required_uri)
390 @proc_stack.push Proc.new {|text, tags|
391 setter = self.class.setter(required_uri, tag_name)
392 if @last_element.respond_to?(setter)
393 @last_element.__send__(setter, text.to_s)
394 else
395 if @do_validate and !@ignore_unknown_element
396 raise NotExpectedTagError.new(tag_name, _ns(ns, prefix),
397 @last_element.tag_name)
398 end
399 end
400 }
401 end
402
403 def start_have_something_element(tag_name, prefix, attrs, ns, klass)
404
405 check_ns(tag_name, prefix, ns, klass.required_uri)
406
407 attributes = {}
408 klass.get_attributes.each do |a_name, a_uri, required, element_name|
409
410 if a_uri.is_a?(String) or !a_uri.respond_to?(:include?)
411 a_uri = [a_uri]
412 end
413 unless a_uri == [""]
414 for prefix, uri in ns
415 if a_uri.include?(uri)
416 val = attrs["#{prefix}:#{a_name}"]
417 break if val
418 end
419 end
420 end
421 if val.nil? and a_uri.include?("")
422 val = attrs[a_name]
423 end
424
425 if @do_validate and required and val.nil?
426 unless a_uri.include?("")
427 for prefix, uri in ns
428 if a_uri.include?(uri)
429 a_name = "#{prefix}:#{a_name}"
430 end
431 end
432 end
433 raise MissingAttributeError.new(tag_name, a_name)
434 end
435
436 attributes[a_name] = val
437 end
438
439 previous = @last_element
440 next_element = klass.new(@do_validate, attributes)
441 previous.instance_eval {set_next_element(tag_name, next_element)}
442 @last_element = next_element
443 @proc_stack.push Proc.new { |text, tags|
444 p(@last_element.class) if DEBUG
445 @last_element.content = text if klass.have_content?
446 if @do_validate
447 @last_element.validate_for_stream(tags, @ignore_unknown_element)
448 end
449 @last_element = previous
450 }
451 end
452
453 end
454
455 unless const_defined? :AVAILABLE_PARSER_LIBRARIES
456 AVAILABLE_PARSER_LIBRARIES = [
457 ["rss/xmlparser", :XMLParserParser],
458 ["rss/xmlscanner", :XMLScanParser],
459 ["rss/rexmlparser", :REXMLParser],
460 ]
461 end
462
463 AVAILABLE_PARSERS = []
464
465 AVAILABLE_PARSER_LIBRARIES.each do |lib, parser|
466 begin
467 require lib
468 AVAILABLE_PARSERS.push(const_get(parser))
469 rescue LoadError
470 end
471 end
472
473 if AVAILABLE_PARSERS.empty?
474 raise XMLParserNotFound
475 end
476end
Note: See TracBrowser for help on using the repository browser.