1 | require 'rexml/parseexception'
|
---|
2 | require 'rexml/source'
|
---|
3 |
|
---|
4 | module REXML
|
---|
5 | module Parsers
|
---|
6 | # = Using the Pull Parser
|
---|
7 | # <em>This API is experimental, and subject to change.</em>
|
---|
8 | # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
---|
9 | # while parser.has_next?
|
---|
10 | # res = parser.next
|
---|
11 | # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
---|
12 | # end
|
---|
13 | # See the PullEvent class for information on the content of the results.
|
---|
14 | # The data is identical to the arguments passed for the various events to
|
---|
15 | # the StreamListener API.
|
---|
16 | #
|
---|
17 | # Notice that:
|
---|
18 | # parser = PullParser.new( "<a>BAD DOCUMENT" )
|
---|
19 | # while parser.has_next?
|
---|
20 | # res = parser.next
|
---|
21 | # raise res[1] if res.error?
|
---|
22 | # end
|
---|
23 | #
|
---|
24 | # Nat Price gave me some good ideas for the API.
|
---|
25 | class BaseParser
|
---|
26 | NCNAME_STR= '[\w:][\-\w\d.]*'
|
---|
27 | NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
---|
28 |
|
---|
29 | NAMECHAR = '[\-\w\d\.:]'
|
---|
30 | NAME = "([\\w:]#{NAMECHAR}*)"
|
---|
31 | NMTOKEN = "(?:#{NAMECHAR})+"
|
---|
32 | NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
---|
33 | REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
---|
34 | REFERENCE_RE = /#{REFERENCE}/
|
---|
35 |
|
---|
36 | DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
---|
37 | DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
---|
38 | ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
---|
39 | COMMENT_START = /\A<!--/u
|
---|
40 | COMMENT_PATTERN = /<!--(.*?)-->/um
|
---|
41 | CDATA_START = /\A<!\[CDATA\[/u
|
---|
42 | CDATA_END = /^\s*\]\s*>/um
|
---|
43 | CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
---|
44 | XMLDECL_START = /\A<\?xml\s/u;
|
---|
45 | XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
---|
46 | INSTRUCTION_START = /\A<\?/u
|
---|
47 | INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
|
---|
48 | TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
|
---|
49 | CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
|
---|
50 |
|
---|
51 | VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
---|
52 | ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
---|
53 | STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
|
---|
54 |
|
---|
55 | ENTITY_START = /^\s*<!ENTITY/
|
---|
56 | IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
|
---|
57 | ELEMENTDECL_START = /^\s*<!ELEMENT/um
|
---|
58 | ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
|
---|
59 | SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
|
---|
60 | ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
---|
61 | NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
---|
62 | ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
---|
63 | ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
|
---|
64 | ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
|
---|
65 | DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
---|
66 | ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
---|
67 | ATTDEF_RE = /#{ATTDEF}/
|
---|
68 | ATTLISTDECL_START = /^\s*<!ATTLIST/um
|
---|
69 | ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
---|
70 | NOTATIONDECL_START = /^\s*<!NOTATION/um
|
---|
71 | PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
|
---|
72 | SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
|
---|
73 |
|
---|
74 | TEXT_PATTERN = /\A([^<]*)/um
|
---|
75 |
|
---|
76 | # Entity constants
|
---|
77 | PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
---|
78 | SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
---|
79 | PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
---|
80 | EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
---|
81 | NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
---|
82 | PEREFERENCE = "%#{NAME};"
|
---|
83 | ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
---|
84 | PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
---|
85 | ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
---|
86 | PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
---|
87 | GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
---|
88 | ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
---|
89 |
|
---|
90 | EREFERENCE = /&(?!#{NAME};)/
|
---|
91 |
|
---|
92 | DEFAULT_ENTITIES = {
|
---|
93 | 'gt' => [/>/, '>', '>', />/],
|
---|
94 | 'lt' => [/</, '<', '<', /</],
|
---|
95 | 'quot' => [/"/, '"', '"', /"/],
|
---|
96 | "apos" => [/'/, "'", "'", /'/]
|
---|
97 | }
|
---|
98 |
|
---|
99 |
|
---|
100 | ######################################################################
|
---|
101 | # These are patterns to identify common markup errors, to make the
|
---|
102 | # error messages more informative.
|
---|
103 | ######################################################################
|
---|
104 | MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
|
---|
105 |
|
---|
106 | def initialize( source )
|
---|
107 | self.stream = source
|
---|
108 | end
|
---|
109 |
|
---|
110 | def add_listener( listener )
|
---|
111 | if !defined?(@listeners) or !@listeners
|
---|
112 | @listeners = []
|
---|
113 | instance_eval <<-EOL
|
---|
114 | alias :_old_pull :pull
|
---|
115 | def pull
|
---|
116 | event = _old_pull
|
---|
117 | @listeners.each do |listener|
|
---|
118 | listener.receive event
|
---|
119 | end
|
---|
120 | event
|
---|
121 | end
|
---|
122 | EOL
|
---|
123 | end
|
---|
124 | @listeners << listener
|
---|
125 | end
|
---|
126 |
|
---|
127 | attr_reader :source
|
---|
128 |
|
---|
129 | def stream=( source )
|
---|
130 | @source = SourceFactory.create_from( source )
|
---|
131 | @closed = nil
|
---|
132 | @document_status = nil
|
---|
133 | @tags = []
|
---|
134 | @stack = []
|
---|
135 | @entities = []
|
---|
136 | end
|
---|
137 |
|
---|
138 | def position
|
---|
139 | if @source.respond_to? :position
|
---|
140 | @source.position
|
---|
141 | else
|
---|
142 | # FIXME
|
---|
143 | 0
|
---|
144 | end
|
---|
145 | end
|
---|
146 |
|
---|
147 | # Returns true if there are no more events
|
---|
148 | def empty?
|
---|
149 | return (@source.empty? and @stack.empty?)
|
---|
150 | end
|
---|
151 |
|
---|
152 | # Returns true if there are more events. Synonymous with !empty?
|
---|
153 | def has_next?
|
---|
154 | return !(@source.empty? and @stack.empty?)
|
---|
155 | end
|
---|
156 |
|
---|
157 | # Push an event back on the head of the stream. This method
|
---|
158 | # has (theoretically) infinite depth.
|
---|
159 | def unshift token
|
---|
160 | @stack.unshift(token)
|
---|
161 | end
|
---|
162 |
|
---|
163 | # Peek at the +depth+ event in the stack. The first element on the stack
|
---|
164 | # is at depth 0. If +depth+ is -1, will parse to the end of the input
|
---|
165 | # stream and return the last event, which is always :end_document.
|
---|
166 | # Be aware that this causes the stream to be parsed up to the +depth+
|
---|
167 | # event, so you can effectively pre-parse the entire document (pull the
|
---|
168 | # entire thing into memory) using this method.
|
---|
169 | def peek depth=0
|
---|
170 | raise %Q[Illegal argument "#{depth}"] if depth < -1
|
---|
171 | temp = []
|
---|
172 | if depth == -1
|
---|
173 | temp.push(pull()) until empty?
|
---|
174 | else
|
---|
175 | while @stack.size+temp.size < depth+1
|
---|
176 | temp.push(pull())
|
---|
177 | end
|
---|
178 | end
|
---|
179 | @stack += temp if temp.size > 0
|
---|
180 | @stack[depth]
|
---|
181 | end
|
---|
182 |
|
---|
183 | # Returns the next event. This is a +PullEvent+ object.
|
---|
184 | def pull
|
---|
185 | if @closed
|
---|
186 | x, @closed = @closed, nil
|
---|
187 | return [ :end_element, x ]
|
---|
188 | end
|
---|
189 | return [ :end_document ] if empty?
|
---|
190 | return @stack.shift if @stack.size > 0
|
---|
191 | @source.read if @source.buffer.size<2
|
---|
192 | #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
---|
193 | if @document_status == nil
|
---|
194 | #@source.consume( /^\s*/um )
|
---|
195 | word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
|
---|
196 | word = word[1] unless word.nil?
|
---|
197 | #STDERR.puts "WORD = #{word.inspect}"
|
---|
198 | case word
|
---|
199 | when COMMENT_START
|
---|
200 | return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
---|
201 | when XMLDECL_START
|
---|
202 | #STDERR.puts "XMLDECL"
|
---|
203 | results = @source.match( XMLDECL_PATTERN, true )[1]
|
---|
204 | version = VERSION.match( results )
|
---|
205 | version = version[1] unless version.nil?
|
---|
206 | encoding = ENCODING.match(results)
|
---|
207 | encoding = encoding[1] unless encoding.nil?
|
---|
208 | @source.encoding = encoding
|
---|
209 | standalone = STANDALONE.match(results)
|
---|
210 | standalone = standalone[1] unless standalone.nil?
|
---|
211 | return [ :xmldecl, version, encoding, standalone ]
|
---|
212 | when INSTRUCTION_START
|
---|
213 | return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
|
---|
214 | when DOCTYPE_START
|
---|
215 | md = @source.match( DOCTYPE_PATTERN, true )
|
---|
216 | identity = md[1]
|
---|
217 | close = md[2]
|
---|
218 | identity =~ IDENTITY
|
---|
219 | name = $1
|
---|
220 | raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
|
---|
221 | pub_sys = $2.nil? ? nil : $2.strip
|
---|
222 | long_name = $3.nil? ? nil : $3.strip
|
---|
223 | uri = $4.nil? ? nil : $4.strip
|
---|
224 | args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
---|
225 | if close == ">"
|
---|
226 | @document_status = :after_doctype
|
---|
227 | @source.read if @source.buffer.size<2
|
---|
228 | md = @source.match(/^\s*/um, true)
|
---|
229 | @stack << [ :end_doctype ]
|
---|
230 | else
|
---|
231 | @document_status = :in_doctype
|
---|
232 | end
|
---|
233 | return args
|
---|
234 | when /^\s+/
|
---|
235 | else
|
---|
236 | @document_status = :after_doctype
|
---|
237 | @source.read if @source.buffer.size<2
|
---|
238 | md = @source.match(/\s*/um, true)
|
---|
239 | end
|
---|
240 | end
|
---|
241 | if @document_status == :in_doctype
|
---|
242 | md = @source.match(/\s*(.*?>)/um)
|
---|
243 | case md[1]
|
---|
244 | when SYSTEMENTITY
|
---|
245 | match = @source.match( SYSTEMENTITY, true )[1]
|
---|
246 | return [ :externalentity, match ]
|
---|
247 |
|
---|
248 | when ELEMENTDECL_START
|
---|
249 | return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
|
---|
250 |
|
---|
251 | when ENTITY_START
|
---|
252 | match = @source.match( ENTITYDECL, true ).to_a.compact
|
---|
253 | match[0] = :entitydecl
|
---|
254 | ref = false
|
---|
255 | if match[1] == '%'
|
---|
256 | ref = true
|
---|
257 | match.delete_at 1
|
---|
258 | end
|
---|
259 | # Now we have to sort out what kind of entity reference this is
|
---|
260 | if match[2] == 'SYSTEM'
|
---|
261 | # External reference
|
---|
262 | match[3] = match[3][1..-2] # PUBID
|
---|
263 | match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
---|
264 | # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
---|
265 | elsif match[2] == 'PUBLIC'
|
---|
266 | # External reference
|
---|
267 | match[3] = match[3][1..-2] # PUBID
|
---|
268 | match[4] = match[4][1..-2] # HREF
|
---|
269 | # match is [ :entity, name, PUBLIC, pubid, href ]
|
---|
270 | else
|
---|
271 | match[2] = match[2][1..-2]
|
---|
272 | match.pop if match.size == 4
|
---|
273 | # match is [ :entity, name, value ]
|
---|
274 | end
|
---|
275 | match << '%' if ref
|
---|
276 | return match
|
---|
277 | when ATTLISTDECL_START
|
---|
278 | md = @source.match( ATTLISTDECL_PATTERN, true )
|
---|
279 | raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
---|
280 | element = md[1]
|
---|
281 | contents = md[0]
|
---|
282 |
|
---|
283 | pairs = {}
|
---|
284 | values = md[0].scan( ATTDEF_RE )
|
---|
285 | values.each do |attdef|
|
---|
286 | unless attdef[3] == "#IMPLIED"
|
---|
287 | attdef.compact!
|
---|
288 | val = attdef[3]
|
---|
289 | val = attdef[4] if val == "#FIXED "
|
---|
290 | pairs[attdef[0]] = val
|
---|
291 | end
|
---|
292 | end
|
---|
293 | return [ :attlistdecl, element, pairs, contents ]
|
---|
294 | when NOTATIONDECL_START
|
---|
295 | md = nil
|
---|
296 | if @source.match( PUBLIC )
|
---|
297 | md = @source.match( PUBLIC, true )
|
---|
298 | vals = [md[1],md[2],md[4],md[6]]
|
---|
299 | elsif @source.match( SYSTEM )
|
---|
300 | md = @source.match( SYSTEM, true )
|
---|
301 | vals = [md[1],md[2],nil,md[4]]
|
---|
302 | else
|
---|
303 | raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
---|
304 | end
|
---|
305 | return [ :notationdecl, *vals ]
|
---|
306 | when CDATA_END
|
---|
307 | @document_status = :after_doctype
|
---|
308 | @source.match( CDATA_END, true )
|
---|
309 | return [ :end_doctype ]
|
---|
310 | end
|
---|
311 | end
|
---|
312 | begin
|
---|
313 | if @source.buffer[0] == ?<
|
---|
314 | if @source.buffer[1] == ?/
|
---|
315 | last_tag = @tags.pop
|
---|
316 | #md = @source.match_to_consume( '>', CLOSE_MATCH)
|
---|
317 | md = @source.match( CLOSE_MATCH, true )
|
---|
318 | raise REXML::ParseException.new( "Missing end tag for "+
|
---|
319 | "'#{last_tag}' (got \"#{md[1]}\")",
|
---|
320 | @source) unless last_tag == md[1]
|
---|
321 | return [ :end_element, last_tag ]
|
---|
322 | elsif @source.buffer[1] == ?!
|
---|
323 | md = @source.match(/\A(\s*[^>]*>)/um)
|
---|
324 | #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
---|
325 | raise REXML::ParseException.new("Malformed node", @source) unless md
|
---|
326 | if md[0][2] == ?-
|
---|
327 | md = @source.match( COMMENT_PATTERN, true )
|
---|
328 | return [ :comment, md[1] ] if md
|
---|
329 | else
|
---|
330 | md = @source.match( CDATA_PATTERN, true )
|
---|
331 | return [ :cdata, md[1] ] if md
|
---|
332 | end
|
---|
333 | raise REXML::ParseException.new( "Declarations can only occur "+
|
---|
334 | "in the doctype declaration.", @source)
|
---|
335 | elsif @source.buffer[1] == ??
|
---|
336 | md = @source.match( INSTRUCTION_PATTERN, true )
|
---|
337 | return [ :processing_instruction, md[1], md[2] ] if md
|
---|
338 | raise REXML::ParseException.new( "Bad instruction declaration",
|
---|
339 | @source)
|
---|
340 | else
|
---|
341 | # Get the next tag
|
---|
342 | md = @source.match(TAG_MATCH, true)
|
---|
343 | unless md
|
---|
344 | # Check for missing attribute quotes
|
---|
345 | raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
|
---|
346 | raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
---|
347 | end
|
---|
348 | attrs = []
|
---|
349 | if md[2].size > 0
|
---|
350 | attrs = md[2].scan( ATTRIBUTE_PATTERN )
|
---|
351 | raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
|
---|
352 | end
|
---|
353 |
|
---|
354 | if md[4]
|
---|
355 | @closed = md[1]
|
---|
356 | else
|
---|
357 | @tags.push( md[1] )
|
---|
358 | end
|
---|
359 | attributes = {}
|
---|
360 | attrs.each { |a,b,c| attributes[a] = c }
|
---|
361 | return [ :start_element, md[1], attributes ]
|
---|
362 | end
|
---|
363 | else
|
---|
364 | md = @source.match( TEXT_PATTERN, true )
|
---|
365 | if md[0].length == 0
|
---|
366 | @source.match( /(\s+)/, true )
|
---|
367 | end
|
---|
368 | #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
|
---|
369 | #return [ :text, "" ] if md[0].length == 0
|
---|
370 | # unnormalized = Text::unnormalize( md[1], self )
|
---|
371 | # return PullEvent.new( :text, md[1], unnormalized )
|
---|
372 | return [ :text, md[1] ]
|
---|
373 | end
|
---|
374 | rescue REXML::ParseException
|
---|
375 | raise
|
---|
376 | rescue Exception, NameError => error
|
---|
377 | raise REXML::ParseException.new( "Exception parsing",
|
---|
378 | @source, self, (error ? error : $!) )
|
---|
379 | end
|
---|
380 | return [ :dummy ]
|
---|
381 | end
|
---|
382 |
|
---|
383 | def entity( reference, entities )
|
---|
384 | value = nil
|
---|
385 | value = entities[ reference ] if entities
|
---|
386 | if not value
|
---|
387 | value = DEFAULT_ENTITIES[ reference ]
|
---|
388 | value = value[2] if value
|
---|
389 | end
|
---|
390 | unnormalize( value, entities ) if value
|
---|
391 | end
|
---|
392 |
|
---|
393 | # Escapes all possible entities
|
---|
394 | def normalize( input, entities=nil, entity_filter=nil )
|
---|
395 | copy = input.clone
|
---|
396 | # Doing it like this rather than in a loop improves the speed
|
---|
397 | copy.gsub!( EREFERENCE, '&' )
|
---|
398 | entities.each do |key, value|
|
---|
399 | copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
---|
400 | entity_filter.include?(entity)
|
---|
401 | end if entities
|
---|
402 | copy.gsub!( EREFERENCE, '&' )
|
---|
403 | DEFAULT_ENTITIES.each do |key, value|
|
---|
404 | copy.gsub!( value[3], value[1] )
|
---|
405 | end
|
---|
406 | copy
|
---|
407 | end
|
---|
408 |
|
---|
409 | # Unescapes all possible entities
|
---|
410 | def unnormalize( string, entities=nil, filter=nil )
|
---|
411 | rv = string.clone
|
---|
412 | rv.gsub!( /\r\n?/, "\n" )
|
---|
413 | matches = rv.scan( REFERENCE_RE )
|
---|
414 | return rv if matches.size == 0
|
---|
415 | rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
|
---|
416 | m=$1
|
---|
417 | m = "0#{m}" if m[0] == ?x
|
---|
418 | [Integer(m)].pack('U*')
|
---|
419 | }
|
---|
420 | matches.collect!{|x|x[0]}.compact!
|
---|
421 | if matches.size > 0
|
---|
422 | matches.each do |entity_reference|
|
---|
423 | unless filter and filter.include?(entity_reference)
|
---|
424 | entity_value = entity( entity_reference, entities )
|
---|
425 | if entity_value
|
---|
426 | re = /&#{entity_reference};/
|
---|
427 | rv.gsub!( re, entity_value )
|
---|
428 | end
|
---|
429 | end
|
---|
430 | end
|
---|
431 | matches.each do |entity_reference|
|
---|
432 | unless filter and filter.include?(entity_reference)
|
---|
433 | er = DEFAULT_ENTITIES[entity_reference]
|
---|
434 | rv.gsub!( er[0], er[2] ) if er
|
---|
435 | end
|
---|
436 | end
|
---|
437 | rv.gsub!( /&/, '&' )
|
---|
438 | end
|
---|
439 | rv
|
---|
440 | end
|
---|
441 | end
|
---|
442 | end
|
---|
443 | end
|
---|
444 |
|
---|
445 | =begin
|
---|
446 | case event[0]
|
---|
447 | when :start_element
|
---|
448 | when :text
|
---|
449 | when :end_element
|
---|
450 | when :processing_instruction
|
---|
451 | when :cdata
|
---|
452 | when :comment
|
---|
453 | when :xmldecl
|
---|
454 | when :start_doctype
|
---|
455 | when :end_doctype
|
---|
456 | when :externalentity
|
---|
457 | when :elementdecl
|
---|
458 | when :entity
|
---|
459 | when :attlistdecl
|
---|
460 | when :notationdecl
|
---|
461 | when :end_doctype
|
---|
462 | end
|
---|
463 | =end
|
---|