1 | require 'xmlscan/scanner'
|
---|
2 | require 'stringio'
|
---|
3 |
|
---|
4 | module RSS
|
---|
5 |
|
---|
6 | class XMLScanParser < BaseParser
|
---|
7 |
|
---|
8 | class << self
|
---|
9 | def listener
|
---|
10 | XMLScanListener
|
---|
11 | end
|
---|
12 | end
|
---|
13 |
|
---|
14 | private
|
---|
15 | def _parse
|
---|
16 | begin
|
---|
17 | if @rss.is_a?(String)
|
---|
18 | input = StringIO.new(@rss)
|
---|
19 | else
|
---|
20 | input = @rss
|
---|
21 | end
|
---|
22 | scanner = XMLScan::XMLScanner.new(@listener)
|
---|
23 | scanner.parse(input)
|
---|
24 | rescue XMLScan::Error => e
|
---|
25 | lineno = e.lineno || scanner.lineno || input.lineno
|
---|
26 | raise NotWellFormedError.new(lineno){e.message}
|
---|
27 | end
|
---|
28 | end
|
---|
29 |
|
---|
30 | end
|
---|
31 |
|
---|
32 | class XMLScanListener < BaseListener
|
---|
33 |
|
---|
34 | include XMLScan::Visitor
|
---|
35 | include ListenerMixin
|
---|
36 |
|
---|
37 | ENTITIES = {
|
---|
38 | 'lt' => '<',
|
---|
39 | 'gt' => '>',
|
---|
40 | 'amp' => '&',
|
---|
41 | 'quot' => '"',
|
---|
42 | 'apos' => '\''
|
---|
43 | }
|
---|
44 |
|
---|
45 | def on_xmldecl_version(str)
|
---|
46 | @version = str
|
---|
47 | end
|
---|
48 |
|
---|
49 | def on_xmldecl_encoding(str)
|
---|
50 | @encoding = str
|
---|
51 | end
|
---|
52 |
|
---|
53 | def on_xmldecl_standalone(str)
|
---|
54 | @standalone = str
|
---|
55 | end
|
---|
56 |
|
---|
57 | def on_xmldecl_end
|
---|
58 | xmldecl(@version, @encoding, @standalone == "yes")
|
---|
59 | end
|
---|
60 |
|
---|
61 | alias_method(:on_pi, :instruction)
|
---|
62 | alias_method(:on_chardata, :text)
|
---|
63 | alias_method(:on_cdata, :text)
|
---|
64 |
|
---|
65 | def on_etag(name)
|
---|
66 | tag_end(name)
|
---|
67 | end
|
---|
68 |
|
---|
69 | def on_entityref(ref)
|
---|
70 | text(entity(ref))
|
---|
71 | end
|
---|
72 |
|
---|
73 | def on_charref(code)
|
---|
74 | text([code].pack('U'))
|
---|
75 | end
|
---|
76 |
|
---|
77 | alias_method(:on_charref_hex, :on_charref)
|
---|
78 |
|
---|
79 | def on_stag(name)
|
---|
80 | @attrs = {}
|
---|
81 | end
|
---|
82 |
|
---|
83 | def on_attribute(name)
|
---|
84 | @attrs[name] = @current_attr = ''
|
---|
85 | end
|
---|
86 |
|
---|
87 | def on_attr_value(str)
|
---|
88 | @current_attr << str
|
---|
89 | end
|
---|
90 |
|
---|
91 | def on_attr_entityref(ref)
|
---|
92 | @current_attr << entity(ref)
|
---|
93 | end
|
---|
94 |
|
---|
95 | def on_attr_charref(code)
|
---|
96 | @current_attr << [code].pack('U')
|
---|
97 | end
|
---|
98 |
|
---|
99 | alias_method(:on_attr_charref_hex, :on_attr_charref)
|
---|
100 |
|
---|
101 | def on_stag_end(name)
|
---|
102 | tag_start(name, @attrs)
|
---|
103 | end
|
---|
104 |
|
---|
105 | def on_stag_end_empty(name)
|
---|
106 | tag_start(name, @attrs)
|
---|
107 | tag_end(name)
|
---|
108 | end
|
---|
109 |
|
---|
110 | private
|
---|
111 | def entity(ref)
|
---|
112 | ent = ENTITIES[ref]
|
---|
113 | if ent
|
---|
114 | ent
|
---|
115 | else
|
---|
116 | wellformed_error("undefined entity: #{ref}")
|
---|
117 | end
|
---|
118 | end
|
---|
119 | end
|
---|
120 |
|
---|
121 | end
|
---|