1 | require 'rexml/parsers/baseparser'
|
---|
2 | require 'rexml/parseexception'
|
---|
3 | require 'rexml/namespace'
|
---|
4 | require 'rexml/text'
|
---|
5 |
|
---|
6 | module REXML
|
---|
7 | module Parsers
|
---|
8 | # SAX2Parser
|
---|
9 | class SAX2Parser
|
---|
10 | def initialize source
|
---|
11 | @parser = BaseParser.new(source)
|
---|
12 | @listeners = []
|
---|
13 | @procs = []
|
---|
14 | @namespace_stack = []
|
---|
15 | @has_listeners = false
|
---|
16 | @tag_stack = []
|
---|
17 | @entities = {}
|
---|
18 | end
|
---|
19 |
|
---|
20 | def source
|
---|
21 | @parser.source
|
---|
22 | end
|
---|
23 |
|
---|
24 | def add_listener( listener )
|
---|
25 | @parser.add_listener( listener )
|
---|
26 | end
|
---|
27 |
|
---|
28 | # Listen arguments:
|
---|
29 | #
|
---|
30 | # Symbol, Array, Block
|
---|
31 | # Listen to Symbol events on Array elements
|
---|
32 | # Symbol, Block
|
---|
33 | # Listen to Symbol events
|
---|
34 | # Array, Listener
|
---|
35 | # Listen to all events on Array elements
|
---|
36 | # Array, Block
|
---|
37 | # Listen to :start_element events on Array elements
|
---|
38 | # Listener
|
---|
39 | # Listen to All events
|
---|
40 | #
|
---|
41 | # Symbol can be one of: :start_element, :end_element,
|
---|
42 | # :start_prefix_mapping, :end_prefix_mapping, :characters,
|
---|
43 | # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
|
---|
44 | # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
|
---|
45 | #
|
---|
46 | # There is an additional symbol that can be listened for: :progress.
|
---|
47 | # This will be called for every event generated, passing in the current
|
---|
48 | # stream position.
|
---|
49 | #
|
---|
50 | # Array contains regular expressions or strings which will be matched
|
---|
51 | # against fully qualified element names.
|
---|
52 | #
|
---|
53 | # Listener must implement the methods in SAX2Listener
|
---|
54 | #
|
---|
55 | # Block will be passed the same arguments as a SAX2Listener method would
|
---|
56 | # be, where the method name is the same as the matched Symbol.
|
---|
57 | # See the SAX2Listener for more information.
|
---|
58 | def listen( *args, &blok )
|
---|
59 | if args[0].kind_of? Symbol
|
---|
60 | if args.size == 2
|
---|
61 | args[1].each { |match| @procs << [args[0], match, blok] }
|
---|
62 | else
|
---|
63 | add( [args[0], nil, blok] )
|
---|
64 | end
|
---|
65 | elsif args[0].kind_of? Array
|
---|
66 | if args.size == 2
|
---|
67 | args[0].each { |match| add( [nil, match, args[1]] ) }
|
---|
68 | else
|
---|
69 | args[0].each { |match| add( [ :start_element, match, blok ] ) }
|
---|
70 | end
|
---|
71 | else
|
---|
72 | add([nil, nil, args[0]])
|
---|
73 | end
|
---|
74 | end
|
---|
75 |
|
---|
76 | def deafen( listener=nil, &blok )
|
---|
77 | if listener
|
---|
78 | @listeners.delete_if {|item| item[-1] == listener }
|
---|
79 | @has_listeners = false if @listeners.size == 0
|
---|
80 | else
|
---|
81 | @procs.delete_if {|item| item[-1] == blok }
|
---|
82 | end
|
---|
83 | end
|
---|
84 |
|
---|
85 | def parse
|
---|
86 | @procs.each { |sym,match,block| block.call if sym == :start_document }
|
---|
87 | @listeners.each { |sym,match,block|
|
---|
88 | block.start_document if sym == :start_document or sym.nil?
|
---|
89 | }
|
---|
90 | root = context = []
|
---|
91 | while true
|
---|
92 | event = @parser.pull
|
---|
93 | case event[0]
|
---|
94 | when :end_document
|
---|
95 | handle( :end_document )
|
---|
96 | break
|
---|
97 | when :end_doctype
|
---|
98 | context = context[1]
|
---|
99 | when :start_element
|
---|
100 | @tag_stack.push(event[1])
|
---|
101 | # find the observers for namespaces
|
---|
102 | procs = get_procs( :start_prefix_mapping, event[1] )
|
---|
103 | listeners = get_listeners( :start_prefix_mapping, event[1] )
|
---|
104 | if procs or listeners
|
---|
105 | # break out the namespace declarations
|
---|
106 | # The attributes live in event[2]
|
---|
107 | event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
|
---|
108 | nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
|
---|
109 | nsdecl.collect! { |n, value| [ n[6..-1], value ] }
|
---|
110 | @namespace_stack.push({})
|
---|
111 | nsdecl.each do |n,v|
|
---|
112 | @namespace_stack[-1][n] = v
|
---|
113 | # notify observers of namespaces
|
---|
114 | procs.each { |ob| ob.call( n, v ) } if procs
|
---|
115 | listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
|
---|
116 | end
|
---|
117 | end
|
---|
118 | event[1] =~ Namespace::NAMESPLIT
|
---|
119 | prefix = $1
|
---|
120 | local = $2
|
---|
121 | uri = get_namespace(prefix)
|
---|
122 | # find the observers for start_element
|
---|
123 | procs = get_procs( :start_element, event[1] )
|
---|
124 | listeners = get_listeners( :start_element, event[1] )
|
---|
125 | # notify observers
|
---|
126 | procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
|
---|
127 | listeners.each { |ob|
|
---|
128 | ob.start_element( uri, local, event[1], event[2] )
|
---|
129 | } if listeners
|
---|
130 | when :end_element
|
---|
131 | @tag_stack.pop
|
---|
132 | event[1] =~ Namespace::NAMESPLIT
|
---|
133 | prefix = $1
|
---|
134 | local = $2
|
---|
135 | uri = get_namespace(prefix)
|
---|
136 | # find the observers for start_element
|
---|
137 | procs = get_procs( :end_element, event[1] )
|
---|
138 | listeners = get_listeners( :end_element, event[1] )
|
---|
139 | # notify observers
|
---|
140 | procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
|
---|
141 | listeners.each { |ob|
|
---|
142 | ob.end_element( uri, local, event[1] )
|
---|
143 | } if listeners
|
---|
144 |
|
---|
145 | namespace_mapping = @namespace_stack.pop
|
---|
146 | # find the observers for namespaces
|
---|
147 | procs = get_procs( :end_prefix_mapping, event[1] )
|
---|
148 | listeners = get_listeners( :end_prefix_mapping, event[1] )
|
---|
149 | if procs or listeners
|
---|
150 | namespace_mapping.each do |prefix, uri|
|
---|
151 | # notify observers of namespaces
|
---|
152 | procs.each { |ob| ob.call( prefix ) } if procs
|
---|
153 | listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
|
---|
154 | end
|
---|
155 | end
|
---|
156 | when :text
|
---|
157 | #normalized = @parser.normalize( event[1] )
|
---|
158 | #handle( :characters, normalized )
|
---|
159 | copy = event[1].clone
|
---|
160 | @entities.each { |key, value| copy = copy.gsub("&#{key};", value) }
|
---|
161 | copy.gsub!( Text::NUMERICENTITY ) {|m|
|
---|
162 | m=$1
|
---|
163 | m = "0#{m}" if m[0] == ?x
|
---|
164 | [Integer(m)].pack('U*')
|
---|
165 | }
|
---|
166 | handle( :characters, copy )
|
---|
167 | when :entitydecl
|
---|
168 | @entities[ event[1] ] = event[2] if event.size == 3
|
---|
169 | handle( *event )
|
---|
170 | when :processing_instruction, :comment, :doctype, :attlistdecl,
|
---|
171 | :elementdecl, :cdata, :notationdecl, :xmldecl
|
---|
172 | handle( *event )
|
---|
173 | end
|
---|
174 | handle( :progress, @parser.position )
|
---|
175 | end
|
---|
176 | end
|
---|
177 |
|
---|
178 | private
|
---|
179 | def handle( symbol, *arguments )
|
---|
180 | tag = @tag_stack[-1]
|
---|
181 | procs = get_procs( symbol, tag )
|
---|
182 | listeners = get_listeners( symbol, tag )
|
---|
183 | # notify observers
|
---|
184 | procs.each { |ob| ob.call( *arguments ) } if procs
|
---|
185 | listeners.each { |l|
|
---|
186 | l.send( symbol.to_s, *arguments )
|
---|
187 | } if listeners
|
---|
188 | end
|
---|
189 |
|
---|
190 | # The following methods are duplicates, but it is faster than using
|
---|
191 | # a helper
|
---|
192 | def get_procs( symbol, name )
|
---|
193 | return nil if @procs.size == 0
|
---|
194 | @procs.find_all do |sym, match, block|
|
---|
195 | #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
|
---|
196 | (
|
---|
197 | (sym.nil? or symbol == sym) and
|
---|
198 | ((name.nil? and match.nil?) or match.nil? or (
|
---|
199 | (name == match) or
|
---|
200 | (match.kind_of? Regexp and name =~ match)
|
---|
201 | )
|
---|
202 | )
|
---|
203 | )
|
---|
204 | end.collect{|x| x[-1]}
|
---|
205 | end
|
---|
206 | def get_listeners( symbol, name )
|
---|
207 | return nil if @listeners.size == 0
|
---|
208 | @listeners.find_all do |sym, match, block|
|
---|
209 | (
|
---|
210 | (sym.nil? or symbol == sym) and
|
---|
211 | ((name.nil? and match.nil?) or match.nil? or (
|
---|
212 | (name == match) or
|
---|
213 | (match.kind_of? Regexp and name =~ match)
|
---|
214 | )
|
---|
215 | )
|
---|
216 | )
|
---|
217 | end.collect{|x| x[-1]}
|
---|
218 | end
|
---|
219 |
|
---|
220 | def add( pair )
|
---|
221 | if pair[-1].respond_to? :call
|
---|
222 | @procs << pair unless @procs.include? pair
|
---|
223 | else
|
---|
224 | @listeners << pair unless @listeners.include? pair
|
---|
225 | @has_listeners = true
|
---|
226 | end
|
---|
227 | end
|
---|
228 |
|
---|
229 | def get_namespace( prefix )
|
---|
230 | uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
---|
231 | (@namespace_stack.find { |ns| not ns[nil].nil? })
|
---|
232 | uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
---|
233 | end
|
---|
234 | end
|
---|
235 | end
|
---|
236 | end
|
---|