1 | require 'forwardable'
|
---|
2 |
|
---|
3 | require 'rexml/parseexception'
|
---|
4 | require 'rexml/parsers/baseparser'
|
---|
5 | require 'rexml/xmltokens'
|
---|
6 |
|
---|
7 | module REXML
|
---|
8 | module Parsers
|
---|
9 | # = Using the Pull Parser
|
---|
10 | # <em>This API is experimental, and subject to change.</em>
|
---|
11 | # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
---|
12 | # while parser.has_next?
|
---|
13 | # res = parser.next
|
---|
14 | # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
---|
15 | # end
|
---|
16 | # See the PullEvent class for information on the content of the results.
|
---|
17 | # The data is identical to the arguments passed for the various events to
|
---|
18 | # the StreamListener API.
|
---|
19 | #
|
---|
20 | # Notice that:
|
---|
21 | # parser = PullParser.new( "<a>BAD DOCUMENT" )
|
---|
22 | # while parser.has_next?
|
---|
23 | # res = parser.next
|
---|
24 | # raise res[1] if res.error?
|
---|
25 | # end
|
---|
26 | #
|
---|
27 | # Nat Price gave me some good ideas for the API.
|
---|
28 | class PullParser
|
---|
29 | include XMLTokens
|
---|
30 | extend Forwardable
|
---|
31 |
|
---|
32 | def_delegators( :@parser, :has_next? )
|
---|
33 | def_delegators( :@parser, :entity )
|
---|
34 | def_delegators( :@parser, :empty? )
|
---|
35 | def_delegators( :@parser, :source )
|
---|
36 |
|
---|
37 | def initialize stream
|
---|
38 | @entities = {}
|
---|
39 | @listeners = nil
|
---|
40 | @parser = BaseParser.new( stream )
|
---|
41 | @my_stack = []
|
---|
42 | end
|
---|
43 |
|
---|
44 | def add_listener( listener )
|
---|
45 | @listeners = [] unless @listeners
|
---|
46 | @listeners << listener
|
---|
47 | end
|
---|
48 |
|
---|
49 | def each
|
---|
50 | while has_next?
|
---|
51 | yield self.pull
|
---|
52 | end
|
---|
53 | end
|
---|
54 |
|
---|
55 | def peek depth=0
|
---|
56 | if @my_stack.length <= depth
|
---|
57 | (depth - @my_stack.length + 1).times {
|
---|
58 | e = PullEvent.new(@parser.pull)
|
---|
59 | @my_stack.push(e)
|
---|
60 | }
|
---|
61 | end
|
---|
62 | @my_stack[depth]
|
---|
63 | end
|
---|
64 |
|
---|
65 | def pull
|
---|
66 | return @my_stack.shift if @my_stack.length > 0
|
---|
67 |
|
---|
68 | event = @parser.pull
|
---|
69 | case event[0]
|
---|
70 | when :entitydecl
|
---|
71 | @entities[ event[1] ] =
|
---|
72 | event[2] unless event[2] =~ /PUBLIC|SYSTEM/
|
---|
73 | when :text
|
---|
74 | unnormalized = @parser.unnormalize( event[1], @entities )
|
---|
75 | event << unnormalized
|
---|
76 | end
|
---|
77 | PullEvent.new( event )
|
---|
78 | end
|
---|
79 |
|
---|
80 | def unshift token
|
---|
81 | @my_stack.unshift token
|
---|
82 | end
|
---|
83 | end
|
---|
84 |
|
---|
85 | # A parsing event. The contents of the event are accessed as an +Array?,
|
---|
86 | # and the type is given either by the ...? methods, or by accessing the
|
---|
87 | # +type+ accessor. The contents of this object vary from event to event,
|
---|
88 | # but are identical to the arguments passed to +StreamListener+s for each
|
---|
89 | # event.
|
---|
90 | class PullEvent
|
---|
91 | # The type of this event. Will be one of :tag_start, :tag_end, :text,
|
---|
92 | # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
|
---|
93 | # :notationdecl, :entity, :cdata, :xmldecl, or :error.
|
---|
94 | def initialize(arg)
|
---|
95 | @contents = arg
|
---|
96 | end
|
---|
97 |
|
---|
98 | def []( start, endd=nil)
|
---|
99 | if start.kind_of? Range
|
---|
100 | @contents.slice( start.begin+1 .. start.end )
|
---|
101 | elsif start.kind_of? Numeric
|
---|
102 | if endd.nil?
|
---|
103 | @contents.slice( start+1 )
|
---|
104 | else
|
---|
105 | @contents.slice( start+1, endd )
|
---|
106 | end
|
---|
107 | else
|
---|
108 | raise "Illegal argument #{start.inspect} (#{start.class})"
|
---|
109 | end
|
---|
110 | end
|
---|
111 |
|
---|
112 | def event_type
|
---|
113 | @contents[0]
|
---|
114 | end
|
---|
115 |
|
---|
116 | # Content: [ String tag_name, Hash attributes ]
|
---|
117 | def start_element?
|
---|
118 | @contents[0] == :start_element
|
---|
119 | end
|
---|
120 |
|
---|
121 | # Content: [ String tag_name ]
|
---|
122 | def end_element?
|
---|
123 | @contents[0] == :end_element
|
---|
124 | end
|
---|
125 |
|
---|
126 | # Content: [ String raw_text, String unnormalized_text ]
|
---|
127 | def text?
|
---|
128 | @contents[0] == :text
|
---|
129 | end
|
---|
130 |
|
---|
131 | # Content: [ String text ]
|
---|
132 | def instruction?
|
---|
133 | @contents[0] == :processing_instruction
|
---|
134 | end
|
---|
135 |
|
---|
136 | # Content: [ String text ]
|
---|
137 | def comment?
|
---|
138 | @contents[0] == :comment
|
---|
139 | end
|
---|
140 |
|
---|
141 | # Content: [ String name, String pub_sys, String long_name, String uri ]
|
---|
142 | def doctype?
|
---|
143 | @contents[0] == :start_doctype
|
---|
144 | end
|
---|
145 |
|
---|
146 | # Content: [ String text ]
|
---|
147 | def attlistdecl?
|
---|
148 | @contents[0] == :attlistdecl
|
---|
149 | end
|
---|
150 |
|
---|
151 | # Content: [ String text ]
|
---|
152 | def elementdecl?
|
---|
153 | @contents[0] == :elementdecl
|
---|
154 | end
|
---|
155 |
|
---|
156 | # Due to the wonders of DTDs, an entity declaration can be just about
|
---|
157 | # anything. There's no way to normalize it; you'll have to interpret the
|
---|
158 | # content yourself. However, the following is true:
|
---|
159 | #
|
---|
160 | # * If the entity declaration is an internal entity:
|
---|
161 | # [ String name, String value ]
|
---|
162 | # Content: [ String text ]
|
---|
163 | def entitydecl?
|
---|
164 | @contents[0] == :entitydecl
|
---|
165 | end
|
---|
166 |
|
---|
167 | # Content: [ String text ]
|
---|
168 | def notationdecl?
|
---|
169 | @contents[0] == :notationdecl
|
---|
170 | end
|
---|
171 |
|
---|
172 | # Content: [ String text ]
|
---|
173 | def entity?
|
---|
174 | @contents[0] == :entity
|
---|
175 | end
|
---|
176 |
|
---|
177 | # Content: [ String text ]
|
---|
178 | def cdata?
|
---|
179 | @contents[0] == :cdata
|
---|
180 | end
|
---|
181 |
|
---|
182 | # Content: [ String version, String encoding, String standalone ]
|
---|
183 | def xmldecl?
|
---|
184 | @contents[0] == :xmldecl
|
---|
185 | end
|
---|
186 |
|
---|
187 | def error?
|
---|
188 | @contents[0] == :error
|
---|
189 | end
|
---|
190 |
|
---|
191 | def inspect
|
---|
192 | @contents[0].to_s + ": " + @contents[1..-1].inspect
|
---|
193 | end
|
---|
194 | end
|
---|
195 | end
|
---|
196 | end
|
---|