1 | module SM
|
---|
2 |
|
---|
3 | # We manage a set of attributes. Each attribute has a symbol name
|
---|
4 | # and a bit value
|
---|
5 |
|
---|
6 | class Attribute
|
---|
7 | SPECIAL = 1
|
---|
8 |
|
---|
9 | @@name_to_bitmap = { :_SPECIAL_ => SPECIAL }
|
---|
10 | @@next_bitmap = 2
|
---|
11 |
|
---|
12 | def Attribute.bitmap_for(name)
|
---|
13 | bitmap = @@name_to_bitmap[name]
|
---|
14 | if !bitmap
|
---|
15 | bitmap = @@next_bitmap
|
---|
16 | @@next_bitmap <<= 1
|
---|
17 | @@name_to_bitmap[name] = bitmap
|
---|
18 | end
|
---|
19 | bitmap
|
---|
20 | end
|
---|
21 |
|
---|
22 | def Attribute.as_string(bitmap)
|
---|
23 | return "none" if bitmap.zero?
|
---|
24 | res = []
|
---|
25 | @@name_to_bitmap.each do |name, bit|
|
---|
26 | res << name if (bitmap & bit) != 0
|
---|
27 | end
|
---|
28 | res.join(",")
|
---|
29 | end
|
---|
30 |
|
---|
31 | def Attribute.each_name_of(bitmap)
|
---|
32 | @@name_to_bitmap.each do |name, bit|
|
---|
33 | next if bit == SPECIAL
|
---|
34 | yield name.to_s if (bitmap & bit) != 0
|
---|
35 | end
|
---|
36 | end
|
---|
37 | end
|
---|
38 |
|
---|
39 |
|
---|
40 | # An AttrChanger records a change in attributes. It contains
|
---|
41 | # a bitmap of the attributes to turn on, and a bitmap of those to
|
---|
42 | # turn off
|
---|
43 |
|
---|
44 | AttrChanger = Struct.new(:turn_on, :turn_off)
|
---|
45 | class AttrChanger
|
---|
46 | def to_s
|
---|
47 | "Attr: +#{Attribute.as_string(@turn_on)}/-#{Attribute.as_string(@turn_on)}"
|
---|
48 | end
|
---|
49 | end
|
---|
50 |
|
---|
51 | # An array of attributes which parallels the characters in a string
|
---|
52 | class AttrSpan
|
---|
53 | def initialize(length)
|
---|
54 | @attrs = Array.new(length, 0)
|
---|
55 | end
|
---|
56 |
|
---|
57 | def set_attrs(start, length, bits)
|
---|
58 | for i in start ... (start+length)
|
---|
59 | @attrs[i] |= bits
|
---|
60 | end
|
---|
61 | end
|
---|
62 |
|
---|
63 | def [](n)
|
---|
64 | @attrs[n]
|
---|
65 | end
|
---|
66 | end
|
---|
67 |
|
---|
68 | ##
|
---|
69 | # Hold details of a special sequence
|
---|
70 |
|
---|
71 | class Special
|
---|
72 | attr_reader :type
|
---|
73 | attr_accessor :text
|
---|
74 |
|
---|
75 | def initialize(type, text)
|
---|
76 | @type, @text = type, text
|
---|
77 | end
|
---|
78 |
|
---|
79 | def ==(o)
|
---|
80 | self.text == o.text && self.type == o.type
|
---|
81 | end
|
---|
82 |
|
---|
83 | def to_s
|
---|
84 | "Special: type=#{type}, text=#{text.dump}"
|
---|
85 | end
|
---|
86 | end
|
---|
87 |
|
---|
88 | class AttributeManager
|
---|
89 |
|
---|
90 | NULL = "\000".freeze
|
---|
91 |
|
---|
92 | ##
|
---|
93 | # We work by substituting non-printing characters in to the
|
---|
94 | # text. For now I'm assuming that I can substitute
|
---|
95 | # a character in the range 0..8 for a 7 bit character
|
---|
96 | # without damaging the encoded string, but this might
|
---|
97 | # be optimistic
|
---|
98 | #
|
---|
99 |
|
---|
100 | A_PROTECT = 004
|
---|
101 | PROTECT_ATTR = A_PROTECT.chr
|
---|
102 |
|
---|
103 | # This maps delimiters that occur around words (such as
|
---|
104 | # *bold* or +tt+) where the start and end delimiters
|
---|
105 | # and the same. This lets us optimize the regexp
|
---|
106 | MATCHING_WORD_PAIRS = {}
|
---|
107 |
|
---|
108 | # And this is used when the delimiters aren't the same. In this
|
---|
109 | # case the hash maps a pattern to the attribute character
|
---|
110 | WORD_PAIR_MAP = {}
|
---|
111 |
|
---|
112 | # This maps HTML tags to the corresponding attribute char
|
---|
113 | HTML_TAGS = {}
|
---|
114 |
|
---|
115 | # And this maps _special_ sequences to a name. A special sequence
|
---|
116 | # is something like a WikiWord
|
---|
117 | SPECIAL = {}
|
---|
118 |
|
---|
119 | # Return an attribute object with the given turn_on
|
---|
120 | # and turn_off bits set
|
---|
121 |
|
---|
122 | def attribute(turn_on, turn_off)
|
---|
123 | AttrChanger.new(turn_on, turn_off)
|
---|
124 | end
|
---|
125 |
|
---|
126 |
|
---|
127 | def change_attribute(current, new)
|
---|
128 | diff = current ^ new
|
---|
129 | attribute(new & diff, current & diff)
|
---|
130 | end
|
---|
131 |
|
---|
132 | def changed_attribute_by_name(current_set, new_set)
|
---|
133 | current = new = 0
|
---|
134 | current_set.each {|name| current |= Attribute.bitmap_for(name) }
|
---|
135 | new_set.each {|name| new |= Attribute.bitmap_for(name) }
|
---|
136 | change_attribute(current, new)
|
---|
137 | end
|
---|
138 |
|
---|
139 | def copy_string(start_pos, end_pos)
|
---|
140 | res = @str[start_pos...end_pos]
|
---|
141 | res.gsub!(/\000/, '')
|
---|
142 | res
|
---|
143 | end
|
---|
144 |
|
---|
145 | # Map attributes like <b>text</b>to the sequence \001\002<char>\001\003<char>,
|
---|
146 | # where <char> is a per-attribute specific character
|
---|
147 |
|
---|
148 | def convert_attrs(str, attrs)
|
---|
149 | # first do matching ones
|
---|
150 | tags = MATCHING_WORD_PAIRS.keys.join("")
|
---|
151 | re = "(^|\\W)([#{tags}])([A-Za-z_]+?)\\2(\\W|\$)"
|
---|
152 | # re = "(^|\\W)([#{tags}])(\\S+?)\\2(\\W|\$)"
|
---|
153 | 1 while str.gsub!(Regexp.new(re)) {
|
---|
154 | attr = MATCHING_WORD_PAIRS[$2];
|
---|
155 | attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr)
|
---|
156 | $1 + NULL*$2.length + $3 + NULL*$2.length + $4
|
---|
157 | }
|
---|
158 |
|
---|
159 | # then non-matching
|
---|
160 | unless WORD_PAIR_MAP.empty?
|
---|
161 | WORD_PAIR_MAP.each do |regexp, attr|
|
---|
162 | str.gsub!(regexp) {
|
---|
163 | attrs.set_attrs($`.length + $1.length, $2.length, attr)
|
---|
164 | NULL*$1.length + $2 + NULL*$3.length
|
---|
165 | }
|
---|
166 | end
|
---|
167 | end
|
---|
168 | end
|
---|
169 |
|
---|
170 | def convert_html(str, attrs)
|
---|
171 | tags = HTML_TAGS.keys.join("|")
|
---|
172 | re = "<(#{tags})>(.*?)</\\1>"
|
---|
173 | 1 while str.gsub!(Regexp.new(re, Regexp::IGNORECASE)) {
|
---|
174 | attr = HTML_TAGS[$1.downcase]
|
---|
175 | html_length = $1.length + 2
|
---|
176 | seq = NULL * html_length
|
---|
177 | attrs.set_attrs($`.length + html_length, $2.length, attr)
|
---|
178 | seq + $2 + seq + NULL
|
---|
179 | }
|
---|
180 | end
|
---|
181 |
|
---|
182 | def convert_specials(str, attrs)
|
---|
183 | unless SPECIAL.empty?
|
---|
184 | SPECIAL.each do |regexp, attr|
|
---|
185 | str.scan(regexp) do
|
---|
186 | attrs.set_attrs($`.length, $&.length, attr | Attribute::SPECIAL)
|
---|
187 | end
|
---|
188 | end
|
---|
189 | end
|
---|
190 | end
|
---|
191 |
|
---|
192 | # A \ in front of a character that would normally be
|
---|
193 | # processed turns off processing. We do this by turning
|
---|
194 | # \< into <#{PROTECT}
|
---|
195 |
|
---|
196 | PROTECTABLE = [ "<" << "\\" ] #"
|
---|
197 |
|
---|
198 |
|
---|
199 | def mask_protected_sequences
|
---|
200 | protect_pattern = Regexp.new("\\\\([#{Regexp.escape(PROTECTABLE.join(''))}])")
|
---|
201 | @str.gsub!(protect_pattern, "\\1#{PROTECT_ATTR}")
|
---|
202 | end
|
---|
203 |
|
---|
204 | def unmask_protected_sequences
|
---|
205 | @str.gsub!(/(.)#{PROTECT_ATTR}/, "\\1\000")
|
---|
206 | end
|
---|
207 |
|
---|
208 | def initialize
|
---|
209 | add_word_pair("*", "*", :BOLD)
|
---|
210 | add_word_pair("_", "_", :EM)
|
---|
211 | add_word_pair("+", "+", :TT)
|
---|
212 |
|
---|
213 | add_html("em", :EM)
|
---|
214 | add_html("i", :EM)
|
---|
215 | add_html("b", :BOLD)
|
---|
216 | add_html("tt", :TT)
|
---|
217 | add_html("code", :TT)
|
---|
218 |
|
---|
219 | add_special(/<!--(.*?)-->/, :COMMENT)
|
---|
220 | end
|
---|
221 |
|
---|
222 | def add_word_pair(start, stop, name)
|
---|
223 | raise "Word flags may not start '<'" if start[0] == ?<
|
---|
224 | bitmap = Attribute.bitmap_for(name)
|
---|
225 | if start == stop
|
---|
226 | MATCHING_WORD_PAIRS[start] = bitmap
|
---|
227 | else
|
---|
228 | pattern = Regexp.new("(" + Regexp.escape(start) + ")" +
|
---|
229 | # "([A-Za-z]+)" +
|
---|
230 | "(\\S+)" +
|
---|
231 | "(" + Regexp.escape(stop) +")")
|
---|
232 | WORD_PAIR_MAP[pattern] = bitmap
|
---|
233 | end
|
---|
234 | PROTECTABLE << start[0,1]
|
---|
235 | PROTECTABLE.uniq!
|
---|
236 | end
|
---|
237 |
|
---|
238 | def add_html(tag, name)
|
---|
239 | HTML_TAGS[tag.downcase] = Attribute.bitmap_for(name)
|
---|
240 | end
|
---|
241 |
|
---|
242 | def add_special(pattern, name)
|
---|
243 | SPECIAL[pattern] = Attribute.bitmap_for(name)
|
---|
244 | end
|
---|
245 |
|
---|
246 | def flow(str)
|
---|
247 | @str = str
|
---|
248 |
|
---|
249 | puts("Before flow, str='#{@str.dump}'") if $DEBUG
|
---|
250 | mask_protected_sequences
|
---|
251 |
|
---|
252 | @attrs = AttrSpan.new(@str.length)
|
---|
253 |
|
---|
254 | puts("After protecting, str='#{@str.dump}'") if $DEBUG
|
---|
255 | convert_attrs(@str, @attrs)
|
---|
256 | convert_html(@str, @attrs)
|
---|
257 | convert_specials(str, @attrs)
|
---|
258 | unmask_protected_sequences
|
---|
259 | puts("After flow, str='#{@str.dump}'") if $DEBUG
|
---|
260 | return split_into_flow
|
---|
261 | end
|
---|
262 |
|
---|
263 | def display_attributes
|
---|
264 | puts
|
---|
265 | puts @str.tr(NULL, "!")
|
---|
266 | bit = 1
|
---|
267 | 16.times do |bno|
|
---|
268 | line = ""
|
---|
269 | @str.length.times do |i|
|
---|
270 | if (@attrs[i] & bit) == 0
|
---|
271 | line << " "
|
---|
272 | else
|
---|
273 | if bno.zero?
|
---|
274 | line << "S"
|
---|
275 | else
|
---|
276 | line << ("%d" % (bno+1))
|
---|
277 | end
|
---|
278 | end
|
---|
279 | end
|
---|
280 | puts(line) unless line =~ /^ *$/
|
---|
281 | bit <<= 1
|
---|
282 | end
|
---|
283 | end
|
---|
284 |
|
---|
285 | def split_into_flow
|
---|
286 |
|
---|
287 | display_attributes if $DEBUG
|
---|
288 |
|
---|
289 | res = []
|
---|
290 | current_attr = 0
|
---|
291 | str = ""
|
---|
292 |
|
---|
293 |
|
---|
294 | str_len = @str.length
|
---|
295 |
|
---|
296 | # skip leading invisible text
|
---|
297 | i = 0
|
---|
298 | i += 1 while i < str_len and @str[i].zero?
|
---|
299 | start_pos = i
|
---|
300 |
|
---|
301 | # then scan the string, chunking it on attribute changes
|
---|
302 | while i < str_len
|
---|
303 | new_attr = @attrs[i]
|
---|
304 | if new_attr != current_attr
|
---|
305 | if i > start_pos
|
---|
306 | res << copy_string(start_pos, i)
|
---|
307 | start_pos = i
|
---|
308 | end
|
---|
309 |
|
---|
310 | res << change_attribute(current_attr, new_attr)
|
---|
311 | current_attr = new_attr
|
---|
312 |
|
---|
313 | if (current_attr & Attribute::SPECIAL) != 0
|
---|
314 | i += 1 while i < str_len and (@attrs[i] & Attribute::SPECIAL) != 0
|
---|
315 | res << Special.new(current_attr, copy_string(start_pos, i))
|
---|
316 | start_pos = i
|
---|
317 | next
|
---|
318 | end
|
---|
319 | end
|
---|
320 |
|
---|
321 | # move on, skipping any invisible characters
|
---|
322 | begin
|
---|
323 | i += 1
|
---|
324 | end while i < str_len and @str[i].zero?
|
---|
325 | end
|
---|
326 |
|
---|
327 | # tidy up trailing text
|
---|
328 | if start_pos < str_len
|
---|
329 | res << copy_string(start_pos, str_len)
|
---|
330 | end
|
---|
331 |
|
---|
332 | # and reset to all attributes off
|
---|
333 | res << change_attribute(current_attr, 0) if current_attr != 0
|
---|
334 |
|
---|
335 | return res
|
---|
336 | end
|
---|
337 |
|
---|
338 | end
|
---|
339 |
|
---|
340 | end
|
---|