[18425] | 1 | require 'rexml/encoding'
|
---|
| 2 |
|
---|
| 3 | module REXML
|
---|
| 4 | # Generates Source-s. USE THIS CLASS.
|
---|
| 5 | class SourceFactory
|
---|
| 6 | # Generates a Source object
|
---|
| 7 | # @param arg Either a String, or an IO
|
---|
| 8 | # @return a Source, or nil if a bad argument was given
|
---|
| 9 | def SourceFactory::create_from(arg)
|
---|
| 10 | if arg.kind_of? String
|
---|
| 11 | Source.new(arg)
|
---|
| 12 | elsif arg.respond_to? :read and
|
---|
| 13 | arg.respond_to? :readline and
|
---|
| 14 | arg.respond_to? :nil? and
|
---|
| 15 | arg.respond_to? :eof?
|
---|
| 16 | IOSource.new(arg)
|
---|
| 17 | elsif arg.kind_of? Source
|
---|
| 18 | arg
|
---|
| 19 | else
|
---|
| 20 | raise "#{source.class} is not a valid input stream. It must walk \n"+
|
---|
| 21 | "like either a String, IO, or Source."
|
---|
| 22 | end
|
---|
| 23 | end
|
---|
| 24 | end
|
---|
| 25 |
|
---|
| 26 | # A Source can be searched for patterns, and wraps buffers and other
|
---|
| 27 | # objects and provides consumption of text
|
---|
| 28 | class Source
|
---|
| 29 | include Encoding
|
---|
| 30 | # The current buffer (what we're going to read next)
|
---|
| 31 | attr_reader :buffer
|
---|
| 32 | # The line number of the last consumed text
|
---|
| 33 | attr_reader :line
|
---|
| 34 | attr_reader :encoding
|
---|
| 35 |
|
---|
| 36 | # Constructor
|
---|
| 37 | # @param arg must be a String, and should be a valid XML document
|
---|
| 38 | # @param encoding if non-null, sets the encoding of the source to this
|
---|
| 39 | # value, overriding all encoding detection
|
---|
| 40 | def initialize(arg, encoding=nil)
|
---|
| 41 | @orig = @buffer = arg
|
---|
| 42 | if encoding
|
---|
| 43 | self.encoding = encoding
|
---|
| 44 | else
|
---|
| 45 | self.encoding = check_encoding( @buffer )
|
---|
| 46 | end
|
---|
| 47 | @line = 0
|
---|
| 48 | end
|
---|
| 49 |
|
---|
| 50 |
|
---|
| 51 | # Inherited from Encoding
|
---|
| 52 | # Overridden to support optimized en/decoding
|
---|
| 53 | def encoding=(enc)
|
---|
| 54 | return unless super
|
---|
| 55 | @line_break = encode( '>' )
|
---|
| 56 | if enc != UTF_8
|
---|
| 57 | @buffer = decode(@buffer)
|
---|
| 58 | @to_utf = true
|
---|
| 59 | else
|
---|
| 60 | @to_utf = false
|
---|
| 61 | end
|
---|
| 62 | end
|
---|
| 63 |
|
---|
| 64 | # Scans the source for a given pattern. Note, that this is not your
|
---|
| 65 | # usual scan() method. For one thing, the pattern argument has some
|
---|
| 66 | # requirements; for another, the source can be consumed. You can easily
|
---|
| 67 | # confuse this method. Originally, the patterns were easier
|
---|
| 68 | # to construct and this method more robust, because this method
|
---|
| 69 | # generated search regexes on the fly; however, this was
|
---|
| 70 | # computationally expensive and slowed down the entire REXML package
|
---|
| 71 | # considerably, since this is by far the most commonly called method.
|
---|
| 72 | # @param pattern must be a Regexp, and must be in the form of
|
---|
| 73 | # /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
---|
| 74 | # will be returned; the second group is used if the consume flag is
|
---|
| 75 | # set.
|
---|
| 76 | # @param consume if true, the pattern returned will be consumed, leaving
|
---|
| 77 | # everything after it in the Source.
|
---|
| 78 | # @return the pattern, if found, or nil if the Source is empty or the
|
---|
| 79 | # pattern is not found.
|
---|
| 80 | def scan(pattern, cons=false)
|
---|
| 81 | return nil if @buffer.nil?
|
---|
| 82 | rv = @buffer.scan(pattern)
|
---|
| 83 | @buffer = $' if cons and rv.size>0
|
---|
| 84 | rv
|
---|
| 85 | end
|
---|
| 86 |
|
---|
| 87 | def read
|
---|
| 88 | end
|
---|
| 89 |
|
---|
| 90 | def consume( pattern )
|
---|
| 91 | @buffer = $' if pattern.match( @buffer )
|
---|
| 92 | end
|
---|
| 93 |
|
---|
| 94 | def match_to( char, pattern )
|
---|
| 95 | return pattern.match(@buffer)
|
---|
| 96 | end
|
---|
| 97 |
|
---|
| 98 | def match_to_consume( char, pattern )
|
---|
| 99 | md = pattern.match(@buffer)
|
---|
| 100 | @buffer = $'
|
---|
| 101 | return md
|
---|
| 102 | end
|
---|
| 103 |
|
---|
| 104 | def match(pattern, cons=false)
|
---|
| 105 | md = pattern.match(@buffer)
|
---|
| 106 | @buffer = $' if cons and md
|
---|
| 107 | return md
|
---|
| 108 | end
|
---|
| 109 |
|
---|
| 110 | # @return true if the Source is exhausted
|
---|
| 111 | def empty?
|
---|
| 112 | @buffer == ""
|
---|
| 113 | end
|
---|
| 114 |
|
---|
| 115 | def position
|
---|
| 116 | @orig.index( @buffer )
|
---|
| 117 | end
|
---|
| 118 |
|
---|
| 119 | # @return the current line in the source
|
---|
| 120 | def current_line
|
---|
| 121 | lines = @orig.split
|
---|
| 122 | res = lines.grep @buffer[0..30]
|
---|
| 123 | res = res[-1] if res.kind_of? Array
|
---|
| 124 | lines.index( res ) if res
|
---|
| 125 | end
|
---|
| 126 | end
|
---|
| 127 |
|
---|
| 128 | # A Source that wraps an IO. See the Source class for method
|
---|
| 129 | # documentation
|
---|
| 130 | class IOSource < Source
|
---|
| 131 | #attr_reader :block_size
|
---|
| 132 |
|
---|
| 133 | # block_size has been deprecated
|
---|
| 134 | def initialize(arg, block_size=500, encoding=nil)
|
---|
| 135 | @er_source = @source = arg
|
---|
| 136 | @to_utf = false
|
---|
| 137 | # Determining the encoding is a deceptively difficult issue to resolve.
|
---|
| 138 | # First, we check the first two bytes for UTF-16. Then we
|
---|
| 139 | # assume that the encoding is at least ASCII enough for the '>', and
|
---|
| 140 | # we read until we get one of those. This gives us the XML declaration,
|
---|
| 141 | # if there is one. If there isn't one, the file MUST be UTF-8, as per
|
---|
| 142 | # the XML spec. If there is one, we can determine the encoding from
|
---|
| 143 | # it.
|
---|
| 144 | @buffer = ""
|
---|
| 145 | str = @source.read( 2 )
|
---|
| 146 | if encoding
|
---|
| 147 | self.encoding = encoding
|
---|
| 148 | elsif /\A(?:\xfe\xff|\xff\xfe)/n =~ str
|
---|
| 149 | self.encoding = check_encoding( str )
|
---|
| 150 | else
|
---|
| 151 | @line_break = '>'
|
---|
| 152 | end
|
---|
| 153 | super [email protected]( @line_break )
|
---|
| 154 | end
|
---|
| 155 |
|
---|
| 156 | def scan(pattern, cons=false)
|
---|
| 157 | rv = super
|
---|
| 158 | # You'll notice that this next section is very similar to the same
|
---|
| 159 | # section in match(), but just a liiittle different. This is
|
---|
| 160 | # because it is a touch faster to do it this way with scan()
|
---|
| 161 | # than the way match() does it; enough faster to warrent duplicating
|
---|
| 162 | # some code
|
---|
| 163 | if rv.size == 0
|
---|
| 164 | until @buffer =~ pattern or @source.nil?
|
---|
| 165 | begin
|
---|
| 166 | # READLINE OPT
|
---|
| 167 | #str = @source.read(@block_size)
|
---|
| 168 | str = @source.readline(@line_break)
|
---|
| 169 | str = decode(str) if @to_utf and str
|
---|
| 170 | @buffer << str
|
---|
| 171 | rescue Iconv::IllegalSequence
|
---|
| 172 | raise
|
---|
| 173 | rescue
|
---|
| 174 | @source = nil
|
---|
| 175 | end
|
---|
| 176 | end
|
---|
| 177 | rv = super
|
---|
| 178 | end
|
---|
| 179 | rv.taint
|
---|
| 180 | rv
|
---|
| 181 | end
|
---|
| 182 |
|
---|
| 183 | def read
|
---|
| 184 | begin
|
---|
| 185 | str = @source.readline(@line_break)
|
---|
| 186 | str = decode(str) if @to_utf and str
|
---|
| 187 | @buffer << str
|
---|
| 188 | rescue Exception, NameError
|
---|
| 189 | @source = nil
|
---|
| 190 | end
|
---|
| 191 | end
|
---|
| 192 |
|
---|
| 193 | def consume( pattern )
|
---|
| 194 | match( pattern, true )
|
---|
| 195 | end
|
---|
| 196 |
|
---|
| 197 | def match( pattern, cons=false )
|
---|
| 198 | rv = pattern.match(@buffer)
|
---|
| 199 | @buffer = $' if cons and rv
|
---|
| 200 | while !rv and @source
|
---|
| 201 | begin
|
---|
| 202 | str = @source.readline(@line_break)
|
---|
| 203 | str = decode(str) if @to_utf and str
|
---|
| 204 | @buffer << str
|
---|
| 205 | rv = pattern.match(@buffer)
|
---|
| 206 | @buffer = $' if cons and rv
|
---|
| 207 | rescue
|
---|
| 208 | @source = nil
|
---|
| 209 | end
|
---|
| 210 | end
|
---|
| 211 | rv.taint
|
---|
| 212 | rv
|
---|
| 213 | end
|
---|
| 214 |
|
---|
| 215 | def empty?
|
---|
| 216 | super and ( @source.nil? || @source.eof? )
|
---|
| 217 | end
|
---|
| 218 |
|
---|
| 219 | def position
|
---|
| 220 | @er_source.stat.pipe? ? 0 : @er_source.pos
|
---|
| 221 | end
|
---|
| 222 |
|
---|
| 223 | # @return the current line in the source
|
---|
| 224 | def current_line
|
---|
| 225 | begin
|
---|
| 226 | pos = @er_source.pos # The byte position in the source
|
---|
| 227 | lineno = @er_source.lineno # The XML < position in the source
|
---|
| 228 | @er_source.rewind
|
---|
| 229 | line = 0 # The \r\n position in the source
|
---|
| 230 | begin
|
---|
| 231 | while @er_source.pos < pos
|
---|
| 232 | @er_source.readline
|
---|
| 233 | line += 1
|
---|
| 234 | end
|
---|
| 235 | rescue
|
---|
| 236 | end
|
---|
| 237 | rescue IOError
|
---|
| 238 | pos = -1
|
---|
| 239 | line = -1
|
---|
| 240 | end
|
---|
| 241 | [pos, lineno, line]
|
---|
| 242 | end
|
---|
| 243 | end
|
---|
| 244 | end
|
---|