source: extensions/gsdl-video/trunk/installed/cmdline/lib/ruby/1.8/rdoc/markup/simple_markup/inline.rb@ 18425

Last change on this file since 18425 was 18425, checked in by davidb, 15 years ago

Video extension to Greenstone

File size: 8.5 KB
Line 
1module SM
2
3 # We manage a set of attributes. Each attribute has a symbol name
4 # and a bit value
5
6 class Attribute
7 SPECIAL = 1
8
9 @@name_to_bitmap = { :_SPECIAL_ => SPECIAL }
10 @@next_bitmap = 2
11
12 def Attribute.bitmap_for(name)
13 bitmap = @@name_to_bitmap[name]
14 if !bitmap
15 bitmap = @@next_bitmap
16 @@next_bitmap <<= 1
17 @@name_to_bitmap[name] = bitmap
18 end
19 bitmap
20 end
21
22 def Attribute.as_string(bitmap)
23 return "none" if bitmap.zero?
24 res = []
25 @@name_to_bitmap.each do |name, bit|
26 res << name if (bitmap & bit) != 0
27 end
28 res.join(",")
29 end
30
31 def Attribute.each_name_of(bitmap)
32 @@name_to_bitmap.each do |name, bit|
33 next if bit == SPECIAL
34 yield name.to_s if (bitmap & bit) != 0
35 end
36 end
37 end
38
39
40 # An AttrChanger records a change in attributes. It contains
41 # a bitmap of the attributes to turn on, and a bitmap of those to
42 # turn off
43
44 AttrChanger = Struct.new(:turn_on, :turn_off)
45 class AttrChanger
46 def to_s
47 "Attr: +#{Attribute.as_string(@turn_on)}/-#{Attribute.as_string(@turn_on)}"
48 end
49 end
50
51 # An array of attributes which parallels the characters in a string
52 class AttrSpan
53 def initialize(length)
54 @attrs = Array.new(length, 0)
55 end
56
57 def set_attrs(start, length, bits)
58 for i in start ... (start+length)
59 @attrs[i] |= bits
60 end
61 end
62
63 def [](n)
64 @attrs[n]
65 end
66 end
67
68 ##
69 # Hold details of a special sequence
70
71 class Special
72 attr_reader :type
73 attr_accessor :text
74
75 def initialize(type, text)
76 @type, @text = type, text
77 end
78
79 def ==(o)
80 self.text == o.text && self.type == o.type
81 end
82
83 def to_s
84 "Special: type=#{type}, text=#{text.dump}"
85 end
86 end
87
88 class AttributeManager
89
90 NULL = "\000".freeze
91
92 ##
93 # We work by substituting non-printing characters in to the
94 # text. For now I'm assuming that I can substitute
95 # a character in the range 0..8 for a 7 bit character
96 # without damaging the encoded string, but this might
97 # be optimistic
98 #
99
100 A_PROTECT = 004
101 PROTECT_ATTR = A_PROTECT.chr
102
103 # This maps delimiters that occur around words (such as
104 # *bold* or +tt+) where the start and end delimiters
105 # and the same. This lets us optimize the regexp
106 MATCHING_WORD_PAIRS = {}
107
108 # And this is used when the delimiters aren't the same. In this
109 # case the hash maps a pattern to the attribute character
110 WORD_PAIR_MAP = {}
111
112 # This maps HTML tags to the corresponding attribute char
113 HTML_TAGS = {}
114
115 # And this maps _special_ sequences to a name. A special sequence
116 # is something like a WikiWord
117 SPECIAL = {}
118
119 # Return an attribute object with the given turn_on
120 # and turn_off bits set
121
122 def attribute(turn_on, turn_off)
123 AttrChanger.new(turn_on, turn_off)
124 end
125
126
127 def change_attribute(current, new)
128 diff = current ^ new
129 attribute(new & diff, current & diff)
130 end
131
132 def changed_attribute_by_name(current_set, new_set)
133 current = new = 0
134 current_set.each {|name| current |= Attribute.bitmap_for(name) }
135 new_set.each {|name| new |= Attribute.bitmap_for(name) }
136 change_attribute(current, new)
137 end
138
139 def copy_string(start_pos, end_pos)
140 res = @str[start_pos...end_pos]
141 res.gsub!(/\000/, '')
142 res
143 end
144
145 # Map attributes like <b>text</b>to the sequence \001\002<char>\001\003<char>,
146 # where <char> is a per-attribute specific character
147
148 def convert_attrs(str, attrs)
149 # first do matching ones
150 tags = MATCHING_WORD_PAIRS.keys.join("")
151 re = "(^|\\W)([#{tags}])([A-Za-z_]+?)\\2(\\W|\$)"
152# re = "(^|\\W)([#{tags}])(\\S+?)\\2(\\W|\$)"
153 1 while str.gsub!(Regexp.new(re)) {
154 attr = MATCHING_WORD_PAIRS[$2];
155 attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr)
156 $1 + NULL*$2.length + $3 + NULL*$2.length + $4
157 }
158
159 # then non-matching
160 unless WORD_PAIR_MAP.empty?
161 WORD_PAIR_MAP.each do |regexp, attr|
162 str.gsub!(regexp) {
163 attrs.set_attrs($`.length + $1.length, $2.length, attr)
164 NULL*$1.length + $2 + NULL*$3.length
165 }
166 end
167 end
168 end
169
170 def convert_html(str, attrs)
171 tags = HTML_TAGS.keys.join("|")
172 re = "<(#{tags})>(.*?)</\\1>"
173 1 while str.gsub!(Regexp.new(re, Regexp::IGNORECASE)) {
174 attr = HTML_TAGS[$1.downcase]
175 html_length = $1.length + 2
176 seq = NULL * html_length
177 attrs.set_attrs($`.length + html_length, $2.length, attr)
178 seq + $2 + seq + NULL
179 }
180 end
181
182 def convert_specials(str, attrs)
183 unless SPECIAL.empty?
184 SPECIAL.each do |regexp, attr|
185 str.scan(regexp) do
186 attrs.set_attrs($`.length, $&.length, attr | Attribute::SPECIAL)
187 end
188 end
189 end
190 end
191
192 # A \ in front of a character that would normally be
193 # processed turns off processing. We do this by turning
194 # \< into <#{PROTECT}
195
196 PROTECTABLE = [ "<" << "\\" ] #"
197
198
199 def mask_protected_sequences
200 protect_pattern = Regexp.new("\\\\([#{Regexp.escape(PROTECTABLE.join(''))}])")
201 @str.gsub!(protect_pattern, "\\1#{PROTECT_ATTR}")
202 end
203
204 def unmask_protected_sequences
205 @str.gsub!(/(.)#{PROTECT_ATTR}/, "\\1\000")
206 end
207
208 def initialize
209 add_word_pair("*", "*", :BOLD)
210 add_word_pair("_", "_", :EM)
211 add_word_pair("+", "+", :TT)
212
213 add_html("em", :EM)
214 add_html("i", :EM)
215 add_html("b", :BOLD)
216 add_html("tt", :TT)
217 add_html("code", :TT)
218
219 add_special(/<!--(.*?)-->/, :COMMENT)
220 end
221
222 def add_word_pair(start, stop, name)
223 raise "Word flags may not start '<'" if start[0] == ?<
224 bitmap = Attribute.bitmap_for(name)
225 if start == stop
226 MATCHING_WORD_PAIRS[start] = bitmap
227 else
228 pattern = Regexp.new("(" + Regexp.escape(start) + ")" +
229# "([A-Za-z]+)" +
230 "(\\S+)" +
231 "(" + Regexp.escape(stop) +")")
232 WORD_PAIR_MAP[pattern] = bitmap
233 end
234 PROTECTABLE << start[0,1]
235 PROTECTABLE.uniq!
236 end
237
238 def add_html(tag, name)
239 HTML_TAGS[tag.downcase] = Attribute.bitmap_for(name)
240 end
241
242 def add_special(pattern, name)
243 SPECIAL[pattern] = Attribute.bitmap_for(name)
244 end
245
246 def flow(str)
247 @str = str
248
249 puts("Before flow, str='#{@str.dump}'") if $DEBUG
250 mask_protected_sequences
251
252 @attrs = AttrSpan.new(@str.length)
253
254 puts("After protecting, str='#{@str.dump}'") if $DEBUG
255 convert_attrs(@str, @attrs)
256 convert_html(@str, @attrs)
257 convert_specials(str, @attrs)
258 unmask_protected_sequences
259 puts("After flow, str='#{@str.dump}'") if $DEBUG
260 return split_into_flow
261 end
262
263 def display_attributes
264 puts
265 puts @str.tr(NULL, "!")
266 bit = 1
267 16.times do |bno|
268 line = ""
269 @str.length.times do |i|
270 if (@attrs[i] & bit) == 0
271 line << " "
272 else
273 if bno.zero?
274 line << "S"
275 else
276 line << ("%d" % (bno+1))
277 end
278 end
279 end
280 puts(line) unless line =~ /^ *$/
281 bit <<= 1
282 end
283 end
284
285 def split_into_flow
286
287 display_attributes if $DEBUG
288
289 res = []
290 current_attr = 0
291 str = ""
292
293
294 str_len = @str.length
295
296 # skip leading invisible text
297 i = 0
298 i += 1 while i < str_len and @str[i].zero?
299 start_pos = i
300
301 # then scan the string, chunking it on attribute changes
302 while i < str_len
303 new_attr = @attrs[i]
304 if new_attr != current_attr
305 if i > start_pos
306 res << copy_string(start_pos, i)
307 start_pos = i
308 end
309
310 res << change_attribute(current_attr, new_attr)
311 current_attr = new_attr
312
313 if (current_attr & Attribute::SPECIAL) != 0
314 i += 1 while i < str_len and (@attrs[i] & Attribute::SPECIAL) != 0
315 res << Special.new(current_attr, copy_string(start_pos, i))
316 start_pos = i
317 next
318 end
319 end
320
321 # move on, skipping any invisible characters
322 begin
323 i += 1
324 end while i < str_len and @str[i].zero?
325 end
326
327 # tidy up trailing text
328 if start_pos < str_len
329 res << copy_string(start_pos, str_len)
330 end
331
332 # and reset to all attributes off
333 res << change_attribute(current_attr, 0) if current_attr != 0
334
335 return res
336 end
337
338 end
339
340end
Note: See TracBrowser for help on using the repository browser.