source: extensions/gsdl-video/trunk/installed/cmdline/lib/ruby/1.8/rexml/text.rb@ 18425

Last change on this file since 18425 was 18425, checked in by davidb, 15 years ago

Video extension to Greenstone

File size: 11.5 KB
Line 
1require 'rexml/entity'
2require 'rexml/doctype'
3require 'rexml/child'
4require 'rexml/doctype'
5require 'rexml/parseexception'
6
7module REXML
8 # Represents text nodes in an XML document
9 class Text < Child
10 include Comparable
11 # The order in which the substitutions occur
12 SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
13 SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
14 # Characters which are substituted in written strings
15 SLAICEPS = [ '<', '>', '"', "'", '&' ]
16 SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
17
18 # If +raw+ is true, then REXML leaves the value alone
19 attr_accessor :raw
20
21 ILLEGAL = /(<|&(?!(#{Entity::NAME})|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));))/um
22 NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
23
24 # Constructor
25 # +arg+ if a String, the content is set to the String. If a Text,
26 # the object is shallowly cloned.
27 #
28 # +respect_whitespace+ (boolean, false) if true, whitespace is
29 # respected
30 #
31 # +parent+ (nil) if this is a Parent object, the parent
32 # will be set to this.
33 #
34 # +raw+ (nil) This argument can be given three values.
35 # If true, then the value of used to construct this object is expected to
36 # contain no unescaped XML markup, and REXML will not change the text. If
37 # this value is false, the string may contain any characters, and REXML will
38 # escape any and all defined entities whose values are contained in the
39 # text. If this value is nil (the default), then the raw value of the
40 # parent will be used as the raw value for this node. If there is no raw
41 # value for the parent, and no value is supplied, the default is false.
42 # Use this field if you have entities defined for some text, and you don't
43 # want REXML to escape that text in output.
44 # Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
45 # Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
46 # Text.new( "<&", false, nil, true ) #-> Parse exception
47 # Text.new( "&lt;&amp;", false, nil, true ) #-> "&lt;&amp;"
48 # # Assume that the entity "s" is defined to be "sean"
49 # # and that the entity "r" is defined to be "russell"
50 # Text.new( "sean russell" ) #-> "&s; &r;"
51 # Text.new( "sean russell", false, nil, true ) #-> "sean russell"
52 #
53 # +entity_filter+ (nil) This can be an array of entities to match in the
54 # supplied text. This argument is only useful if +raw+ is set to false.
55 # Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
56 # Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
57 # In the last example, the +entity_filter+ argument is ignored.
58 #
59 # +pattern+ INTERNAL USE ONLY
60 def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
61 entity_filter=nil, illegal=ILLEGAL )
62
63 @raw = false
64
65 if parent
66 super( parent )
67 @raw = parent.raw
68 else
69 @parent = nil
70 end
71
72 @raw = raw unless raw.nil?
73 @entity_filter = entity_filter
74 @normalized = @unnormalized = nil
75
76 if arg.kind_of? String
77 @string = arg.clone
78 @string.squeeze!(" \n\t") unless respect_whitespace
79 elsif arg.kind_of? Text
80 @string = arg.to_s
81 @raw = arg.raw
82 elsif
83 raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
84 end
85
86 @string.gsub!( /\r\n?/, "\n" )
87
88 # check for illegal characters
89 if @raw
90 if @string =~ illegal
91 raise "Illegal character '#{$1}' in raw string \"#{@string}\""
92 end
93 end
94 end
95
96 def node_type
97 :text
98 end
99
100 def empty?
101 @string.size==0
102 end
103
104
105 def clone
106 return Text.new(self)
107 end
108
109
110 # Appends text to this text node. The text is appended in the +raw+ mode
111 # of this text node.
112 def <<( to_append )
113 @string << to_append.gsub( /\r\n?/, "\n" )
114 end
115
116
117 # +other+ a String or a Text
118 # +returns+ the result of (to_s <=> arg.to_s)
119 def <=>( other )
120 to_s() <=> other.to_s
121 end
122
123 REFERENCE = /#{Entity::REFERENCE}/
124 # Returns the string value of this text node. This string is always
125 # escaped, meaning that it is a valid XML text node string, and all
126 # entities that can be escaped, have been inserted. This method respects
127 # the entity filter set in the constructor.
128 #
129 # # Assume that the entity "s" is defined to be "sean", and that the
130 # # entity "r" is defined to be "russell"
131 # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
132 # t.to_s #-> "&lt; &amp; &s; russell"
133 # t = Text.new( "< & &s; russell", false, nil, false )
134 # t.to_s #-> "&lt; &amp; &s; russell"
135 # u = Text.new( "sean russell", false, nil, true )
136 # u.to_s #-> "sean russell"
137 def to_s
138 return @string if @raw
139 return @normalized if @normalized
140
141 doctype = nil
142 if @parent
143 doc = @parent.document
144 doctype = doc.doctype if doc
145 end
146
147 @normalized = Text::normalize( @string, doctype, @entity_filter )
148 end
149
150 def inspect
151 @string.inspect
152 end
153
154 # Returns the string value of this text. This is the text without
155 # entities, as it might be used programmatically, or printed to the
156 # console. This ignores the 'raw' attribute setting, and any
157 # entity_filter.
158 #
159 # # Assume that the entity "s" is defined to be "sean", and that the
160 # # entity "r" is defined to be "russell"
161 # t = Text.new( "< & sean russell", false, nil, false, ['s'] )
162 # t.value #-> "< & sean russell"
163 # t = Text.new( "< & &s; russell", false, nil, false )
164 # t.value #-> "< & sean russell"
165 # u = Text.new( "sean russell", false, nil, true )
166 # u.value #-> "sean russell"
167 def value
168 @unnormalized if @unnormalized
169 doctype = nil
170 if @parent
171 doc = @parent.document
172 doctype = doc.doctype if doc
173 end
174 @unnormalized = Text::unnormalize( @string, doctype )
175 end
176
177 # Sets the contents of this text node. This expects the text to be
178 # unnormalized. It returns self.
179 #
180 # e = Element.new( "a" )
181 # e.add_text( "foo" ) # <a>foo</a>
182 # e[0].value = "bar" # <a>bar</a>
183 # e[0].value = "<a>" # <a>&lt;a&gt;</a>
184 def value=( val )
185 @string = val.gsub( /\r\n?/, "\n" )
186 @unnormalized = nil
187 @normalized = nil
188 @raw = false
189 end
190
191 def wrap(string, width, addnewline=false)
192 # Recursivly wrap string at width.
193 return string if string.length <= width
194 place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
195 if addnewline then
196 return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
197 else
198 return string[0,place] + "\n" + wrap(string[place+1..-1], width)
199 end
200 end
201
202 def indent_text(string, level=1, style="\t", indentfirstline=true)
203 return string if level < 0
204 new_string = ''
205 string.each { |line|
206 indent_string = style * level
207 new_line = (indent_string + line).sub(/[\s]+$/,'')
208 new_string << new_line
209 }
210 new_string.strip! unless indentfirstline
211 return new_string
212 end
213
214 def write( writer, indent=-1, transitive=false, ie_hack=false )
215 s = to_s()
216 if not (@parent and @parent.whitespace) then
217 s = wrap(s, 60, false) if @parent and @parent.context[:wordwrap] == :all
218 if @parent and not @parent.context[:indentstyle].nil? and indent > 0 and s.count("\n") > 0
219 s = indent_text(s, indent, @parent.context[:indentstyle], false)
220 end
221 s.squeeze!(" \n\t") if @parent and [email protected]
222 end
223 writer << s
224 end
225
226 # FIXME
227 # This probably won't work properly
228 def xpath
229 path = @parent.xpath
230 path += "/text()"
231 return path
232 end
233
234 # Writes out text, substituting special characters beforehand.
235 # +out+ A String, IO, or any other object supporting <<( String )
236 # +input+ the text to substitute and the write out
237 #
238 # z=utf8.unpack("U*")
239 # ascOut=""
240 # z.each{|r|
241 # if r < 0x100
242 # ascOut.concat(r.chr)
243 # else
244 # ascOut.concat(sprintf("&#x%x;", r))
245 # end
246 # }
247 # puts ascOut
248 def write_with_substitution out, input
249 copy = input.clone
250 # Doing it like this rather than in a loop improves the speed
251 copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
252 copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
253 copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
254 copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
255 copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
256 copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
257 out << copy
258 end
259
260 # Reads text, substituting entities
261 def Text::read_with_substitution( input, illegal=nil )
262 copy = input.clone
263
264 if copy =~ illegal
265 raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
266 end if illegal
267
268 copy.gsub!( /\r\n?/, "\n" )
269 if copy.include? ?&
270 copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
271 copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
272 copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
273 copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
274 copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
275 copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
276 m=$1
277 #m='0' if m==''
278 m = "0#{m}" if m[0] == ?x
279 [Integer(m)].pack('U*')
280 }
281 end
282 copy
283 end
284
285 EREFERENCE = /&(?!#{Entity::NAME};)/
286 # Escapes all possible entities
287 def Text::normalize( input, doctype=nil, entity_filter=nil )
288 copy = input
289 # Doing it like this rather than in a loop improves the speed
290 #copy = copy.gsub( EREFERENCE, '&amp;' )
291 copy = copy.gsub( "&", "&amp;" )
292 if doctype
293 # Replace all ampersands that aren't part of an entity
294 doctype.entities.each_value do |entity|
295 copy = copy.gsub( entity.value,
296 "&#{entity.name};" ) if entity.value and
297 not( entity_filter and entity_filter.include?(entity) )
298 end
299 else
300 # Replace all ampersands that aren't part of an entity
301 DocType::DEFAULT_ENTITIES.each_value do |entity|
302 copy = copy.gsub(entity.value, "&#{entity.name};" )
303 end
304 end
305 copy
306 end
307
308 # Unescapes all possible entities
309 def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
310 rv = string.clone
311 rv.gsub!( /\r\n?/, "\n" )
312 matches = rv.scan( REFERENCE )
313 return rv if matches.size == 0
314 rv.gsub!( NUMERICENTITY ) {|m|
315 m=$1
316 m = "0#{m}" if m[0] == ?x
317 [Integer(m)].pack('U*')
318 }
319 matches.collect!{|x|x[0]}.compact!
320 if matches.size > 0
321 if doctype
322 matches.each do |entity_reference|
323 unless filter and filter.include?(entity_reference)
324 entity_value = doctype.entity( entity_reference )
325 re = /&#{entity_reference};/
326 rv.gsub!( re, entity_value ) if entity_value
327 end
328 end
329 else
330 matches.each do |entity_reference|
331 unless filter and filter.include?(entity_reference)
332 entity_value = DocType::DEFAULT_ENTITIES[ entity_reference ]
333 re = /&#{entity_reference};/
334 rv.gsub!( re, entity_value.value ) if entity_value
335 end
336 end
337 end
338 rv.gsub!( /&amp;/, '&' )
339 end
340 rv
341 end
342 end
343end
Note: See TracBrowser for help on using the repository browser.