1 | require "rexml/parent"
|
---|
2 | require "rexml/parseexception"
|
---|
3 | require "rexml/namespace"
|
---|
4 | require 'rexml/entity'
|
---|
5 | require 'rexml/attlistdecl'
|
---|
6 | require 'rexml/xmltokens'
|
---|
7 |
|
---|
8 | module REXML
|
---|
9 | # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
|
---|
10 | # ... >. DOCTYPES can be used to declare the DTD of a document, as well as
|
---|
11 | # being used to declare entities used in the document.
|
---|
12 | class DocType < Parent
|
---|
13 | include XMLTokens
|
---|
14 | START = "<!DOCTYPE"
|
---|
15 | STOP = ">"
|
---|
16 | SYSTEM = "SYSTEM"
|
---|
17 | PUBLIC = "PUBLIC"
|
---|
18 | DEFAULT_ENTITIES = {
|
---|
19 | 'gt'=>EntityConst::GT,
|
---|
20 | 'lt'=>EntityConst::LT,
|
---|
21 | 'quot'=>EntityConst::QUOT,
|
---|
22 | "apos"=>EntityConst::APOS
|
---|
23 | }
|
---|
24 |
|
---|
25 | # name is the name of the doctype
|
---|
26 | # external_id is the referenced DTD, if given
|
---|
27 | attr_reader :name, :external_id, :entities, :namespaces
|
---|
28 |
|
---|
29 | # Constructor
|
---|
30 | #
|
---|
31 | # dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
|
---|
32 | # # <!DOCTYPE foo '-//I/Hate/External/IDs'>
|
---|
33 | # dt = DocType.new( doctype_to_clone )
|
---|
34 | # # Incomplete. Shallow clone of doctype
|
---|
35 | #
|
---|
36 | # +Note+ that the constructor:
|
---|
37 | #
|
---|
38 | # Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
|
---|
39 | #
|
---|
40 | # is _deprecated_. Do not use it. It will probably disappear.
|
---|
41 | def initialize( first, parent=nil )
|
---|
42 | @entities = DEFAULT_ENTITIES
|
---|
43 | @long_name = @uri = nil
|
---|
44 | if first.kind_of? String
|
---|
45 | super()
|
---|
46 | @name = first
|
---|
47 | @external_id = parent
|
---|
48 | elsif first.kind_of? DocType
|
---|
49 | super( parent )
|
---|
50 | @name = first.name
|
---|
51 | @external_id = first.external_id
|
---|
52 | elsif first.kind_of? Array
|
---|
53 | super( parent )
|
---|
54 | @name = first[0]
|
---|
55 | @external_id = first[1]
|
---|
56 | @long_name = first[2]
|
---|
57 | @uri = first[3]
|
---|
58 | elsif first.kind_of? Source
|
---|
59 | super( parent )
|
---|
60 | parser = Parsers::BaseParser.new( first )
|
---|
61 | event = parser.pull
|
---|
62 | if event[0] == :start_doctype
|
---|
63 | @name, @external_id, @long_name, @uri, = event[1..-1]
|
---|
64 | end
|
---|
65 | else
|
---|
66 | super()
|
---|
67 | end
|
---|
68 | end
|
---|
69 |
|
---|
70 | def node_type
|
---|
71 | :doctype
|
---|
72 | end
|
---|
73 |
|
---|
74 | def attributes_of element
|
---|
75 | rv = []
|
---|
76 | each do |child|
|
---|
77 | child.each do |key,val|
|
---|
78 | rv << Attribute.new(key,val)
|
---|
79 | end if child.kind_of? AttlistDecl and child.element_name == element
|
---|
80 | end
|
---|
81 | rv
|
---|
82 | end
|
---|
83 |
|
---|
84 | def attribute_of element, attribute
|
---|
85 | att_decl = find do |child|
|
---|
86 | child.kind_of? AttlistDecl and
|
---|
87 | child.element_name == element and
|
---|
88 | child.include? attribute
|
---|
89 | end
|
---|
90 | return nil unless att_decl
|
---|
91 | att_decl[attribute]
|
---|
92 | end
|
---|
93 |
|
---|
94 | def clone
|
---|
95 | DocType.new self
|
---|
96 | end
|
---|
97 |
|
---|
98 | # output::
|
---|
99 | # Where to write the string
|
---|
100 | # indent::
|
---|
101 | # An integer. If -1, no indenting will be used; otherwise, the
|
---|
102 | # indentation will be this number of spaces, and children will be
|
---|
103 | # indented an additional amount.
|
---|
104 | # transitive::
|
---|
105 | # If transitive is true and indent is >= 0, then the output will be
|
---|
106 | # pretty-printed in such a way that the added whitespace does not affect
|
---|
107 | # the absolute *value* of the document -- that is, it leaves the value
|
---|
108 | # and number of Text nodes in the document unchanged.
|
---|
109 | # ie_hack::
|
---|
110 | # Internet Explorer is the worst piece of crap to have ever been
|
---|
111 | # written, with the possible exception of Windows itself. Since IE is
|
---|
112 | # unable to parse proper XML, we have to provide a hack to generate XML
|
---|
113 | # that IE's limited abilities can handle. This hack inserts a space
|
---|
114 | # before the /> on empty tags.
|
---|
115 | #
|
---|
116 | def write( output, indent=0, transitive=false, ie_hack=false )
|
---|
117 | indent( output, indent )
|
---|
118 | output << START
|
---|
119 | output << ' '
|
---|
120 | output << @name
|
---|
121 | output << " #@external_id" if @external_id
|
---|
122 | output << " #@long_name" if @long_name
|
---|
123 | output << " #@uri" if @uri
|
---|
124 | unless @children.empty?
|
---|
125 | next_indent = indent + 1
|
---|
126 | output << ' ['
|
---|
127 | child = nil # speed
|
---|
128 | @children.each { |child|
|
---|
129 | output << "\n"
|
---|
130 | child.write( output, next_indent )
|
---|
131 | }
|
---|
132 | #output << ' '*next_indent
|
---|
133 | output << "\n]"
|
---|
134 | end
|
---|
135 | output << STOP
|
---|
136 | end
|
---|
137 |
|
---|
138 | def context
|
---|
139 | @parent.context
|
---|
140 | end
|
---|
141 |
|
---|
142 | def entity( name )
|
---|
143 | @entities[name].unnormalized if @entities[name]
|
---|
144 | end
|
---|
145 |
|
---|
146 | def add child
|
---|
147 | super(child)
|
---|
148 | @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
|
---|
149 | @entities[ child.name ] = child if child.kind_of? Entity
|
---|
150 | end
|
---|
151 |
|
---|
152 | # This method retrieves the public identifier identifying the document's
|
---|
153 | # DTD.
|
---|
154 | #
|
---|
155 | # Method contributed by Henrik Martensson
|
---|
156 | def public
|
---|
157 | case @external_id
|
---|
158 | when "SYSTEM"
|
---|
159 | nil
|
---|
160 | when "PUBLIC"
|
---|
161 | strip_quotes(@long_name)
|
---|
162 | end
|
---|
163 | end
|
---|
164 |
|
---|
165 | # This method retrieves the system identifier identifying the document's DTD
|
---|
166 | #
|
---|
167 | # Method contributed by Henrik Martensson
|
---|
168 | def system
|
---|
169 | case @external_id
|
---|
170 | when "SYSTEM"
|
---|
171 | strip_quotes(@long_name)
|
---|
172 | when "PUBLIC"
|
---|
173 | @uri.kind_of?(String) ? strip_quotes(@uri) : nil
|
---|
174 | end
|
---|
175 | end
|
---|
176 |
|
---|
177 | # This method returns a list of notations that have been declared in the
|
---|
178 | # _internal_ DTD subset. Notations in the external DTD subset are not
|
---|
179 | # listed.
|
---|
180 | #
|
---|
181 | # Method contributed by Henrik Martensson
|
---|
182 | def notations
|
---|
183 | children().select {|node| node.kind_of?(REXML::NotationDecl)}
|
---|
184 | end
|
---|
185 |
|
---|
186 | # Retrieves a named notation. Only notations declared in the internal
|
---|
187 | # DTD subset can be retrieved.
|
---|
188 | #
|
---|
189 | # Method contributed by Henrik Martensson
|
---|
190 | def notation(name)
|
---|
191 | notations.find { |notation_decl|
|
---|
192 | notation_decl.name == name
|
---|
193 | }
|
---|
194 | end
|
---|
195 |
|
---|
196 | private
|
---|
197 |
|
---|
198 | # Method contributed by Henrik Martensson
|
---|
199 | def strip_quotes(quoted_string)
|
---|
200 | quoted_string =~ /^[\'\"].*[\ÂŽ\"]$/ ?
|
---|
201 | quoted_string[1, quoted_string.length-2] :
|
---|
202 | quoted_string
|
---|
203 | end
|
---|
204 | end
|
---|
205 |
|
---|
206 | # We don't really handle any of these since we're not a validating
|
---|
207 | # parser, so we can be pretty dumb about them. All we need to be able
|
---|
208 | # to do is spew them back out on a write()
|
---|
209 |
|
---|
210 | # This is an abstract class. You never use this directly; it serves as a
|
---|
211 | # parent class for the specific declarations.
|
---|
212 | class Declaration < Child
|
---|
213 | def initialize src
|
---|
214 | super()
|
---|
215 | @string = src
|
---|
216 | end
|
---|
217 |
|
---|
218 | def to_s
|
---|
219 | @string+'>'
|
---|
220 | end
|
---|
221 |
|
---|
222 | def write( output, indent )
|
---|
223 | output << (' '*indent) if indent > 0
|
---|
224 | output << to_s
|
---|
225 | end
|
---|
226 | end
|
---|
227 |
|
---|
228 | public
|
---|
229 | class ElementDecl < Declaration
|
---|
230 | def initialize( src )
|
---|
231 | super
|
---|
232 | end
|
---|
233 | end
|
---|
234 |
|
---|
235 | class ExternalEntity < Child
|
---|
236 | def initialize( src )
|
---|
237 | super()
|
---|
238 | @entity = src
|
---|
239 | end
|
---|
240 | def to_s
|
---|
241 | @entity
|
---|
242 | end
|
---|
243 | def write( output, indent )
|
---|
244 | output << @entity
|
---|
245 | end
|
---|
246 | end
|
---|
247 |
|
---|
248 | class NotationDecl < Child
|
---|
249 | attr_accessor :public, :system
|
---|
250 | def initialize name, middle, pub, sys
|
---|
251 | super(nil)
|
---|
252 | @name = name
|
---|
253 | @middle = middle
|
---|
254 | @public = pub
|
---|
255 | @system = sys
|
---|
256 | end
|
---|
257 |
|
---|
258 | def to_s
|
---|
259 | "<!NOTATION #@name #@middle#{
|
---|
260 | @public ? ' ' + public.inspect : ''
|
---|
261 | }#{
|
---|
262 | @system ? ' ' [email protected] : ''
|
---|
263 | }>"
|
---|
264 | end
|
---|
265 |
|
---|
266 | def write( output, indent=-1 )
|
---|
267 | output << (' '*indent) if indent > 0
|
---|
268 | output << to_s
|
---|
269 | end
|
---|
270 |
|
---|
271 | # This method retrieves the name of the notation.
|
---|
272 | #
|
---|
273 | # Method contributed by Henrik Martensson
|
---|
274 | def name
|
---|
275 | @name
|
---|
276 | end
|
---|
277 | end
|
---|
278 | end
|
---|