1 | #include "WIKI2XML.h"
|
---|
2 |
|
---|
3 | TTableInfo::TTableInfo ()
|
---|
4 | {
|
---|
5 | tr_open = false ;
|
---|
6 | td_open = false ;
|
---|
7 | }
|
---|
8 |
|
---|
9 | string TTableInfo::close ()
|
---|
10 | {
|
---|
11 | string ret ;
|
---|
12 | if ( td_open ) ret += "</wikitablecell>" ;
|
---|
13 | if ( tr_open ) ret += "</wikitablerow>" ;
|
---|
14 | ret += "</wikitable>" ;
|
---|
15 | return ret ;
|
---|
16 | }
|
---|
17 |
|
---|
18 | string TTableInfo::new_row ()
|
---|
19 | {
|
---|
20 | string ret ;
|
---|
21 | if ( td_open ) ret += "</wikitablecell>" ;
|
---|
22 | if ( tr_open ) ret += "</wikitablerow>" ;
|
---|
23 | ret += "<wikitablerow>" ;
|
---|
24 | td_open = false ;
|
---|
25 | tr_open = true ;
|
---|
26 | return ret ;
|
---|
27 | }
|
---|
28 |
|
---|
29 | string TTableInfo::new_cell ( string type )
|
---|
30 | {
|
---|
31 | string ret ;
|
---|
32 | if ( !tr_open ) ret += new_row () ;
|
---|
33 | if ( td_open ) ret += "</wikitablecell>" ;
|
---|
34 | ret += "<wikitablecell type=\"" + upper ( type ) + "\">" ;
|
---|
35 | td_type = type ;
|
---|
36 | td_open = true ;
|
---|
37 | return ret ;
|
---|
38 | }
|
---|
39 |
|
---|
40 | // *****************************************************************************
|
---|
41 | // *****************************************************************************
|
---|
42 | //
|
---|
43 | // WIKI2XML
|
---|
44 | //
|
---|
45 | // *****************************************************************************
|
---|
46 | // *****************************************************************************
|
---|
47 |
|
---|
48 | void WIKI2XML::parse_symmetric ( string &l , int &from ,
|
---|
49 | string s1 , string s2 ,
|
---|
50 | string r1 , string r2 ,
|
---|
51 | bool extend )
|
---|
52 | {
|
---|
53 | int a , b ;
|
---|
54 | if ( !submatch ( l , s1 , from ) ) return ; // Left does not match
|
---|
55 | for ( a = from + s1.length() ; a + s2.length() <= l.length() ; a++ )
|
---|
56 | {
|
---|
57 | if ( !submatch ( l , s2 , a ) ) continue ;
|
---|
58 | for ( b = a+1 ; extend && submatch ( l , s2 , b ) ; b++ ) ;
|
---|
59 | b-- ;
|
---|
60 | l = l.substr ( 0 , from ) +
|
---|
61 | r1 +
|
---|
62 | l.substr ( from + s1.length() , b - from - s1.length() ) +
|
---|
63 | r2 +
|
---|
64 | l.substr ( b + s2.length() , l.length() ) ;
|
---|
65 | if ( debug ) cout << "newl : " << l << endl ;
|
---|
66 | break ;
|
---|
67 | }
|
---|
68 | }
|
---|
69 |
|
---|
70 | void WIKI2XML::parse_link ( string &l , int &from , char mode )
|
---|
71 | {
|
---|
72 | from += 1 ;
|
---|
73 | int a , cnt = 1 ;
|
---|
74 | chart par_open = '[' ; // mode 'L'
|
---|
75 | chart par_close = ']' ; // mode 'L'
|
---|
76 | if ( mode == 'T' ) { par_open = '{' ; par_close = '}' ; }
|
---|
77 | for ( a = from ; cnt > 0 && a+1 < l.length() ; a++ )
|
---|
78 | {
|
---|
79 | if ( l[a] == par_open && l[a+1] == par_open )
|
---|
80 | parse_link ( l , a ) ;
|
---|
81 | else if ( l[a] == par_close && l[a+1] == par_close )
|
---|
82 | cnt-- ;
|
---|
83 | }
|
---|
84 | if ( cnt > 0 ) return ; // Not a valid link
|
---|
85 |
|
---|
86 | int to = a-1 ; // Without "]]"
|
---|
87 | string link = l.substr ( from+1 , to-from-1 ) ;
|
---|
88 |
|
---|
89 | TXML x ;
|
---|
90 | vector <string> parts ;
|
---|
91 | explode ( '|' , link , parts ) ;
|
---|
92 | if ( mode == 'L' )
|
---|
93 | {
|
---|
94 | x.name = "wikilink" ;
|
---|
95 | x.add_key_value ( "type" , "internal" ) ;
|
---|
96 | }
|
---|
97 | else if ( mode == 'T' ) x.name = "wikitemplate" ;
|
---|
98 |
|
---|
99 | for ( a = 0 ; a < parts.size() ; a++ )
|
---|
100 | {
|
---|
101 | bool last = ( a + 1 == parts.size() ) ;
|
---|
102 | string p = parts[a] ;
|
---|
103 | parse_line_sub ( p ) ;
|
---|
104 |
|
---|
105 | if ( a > 0 && ( mode != 'L' || !last ) )
|
---|
106 | {
|
---|
107 | string key , value ;
|
---|
108 | vector <string> subparts ;
|
---|
109 | explode ( '=' , p , subparts ) ;
|
---|
110 | if ( subparts.size() == 1 )
|
---|
111 | {
|
---|
112 | value = xml_embed ( p , "value" ) ;
|
---|
113 | }
|
---|
114 | else
|
---|
115 | {
|
---|
116 | key = xml_embed ( subparts[0] , "key" ) ;
|
---|
117 | subparts.erase ( subparts.begin() ) ;
|
---|
118 | value = xml_embed ( implode ( "=" , subparts ) , "value" ) ;
|
---|
119 | }
|
---|
120 | p = key + value ;
|
---|
121 | }
|
---|
122 | else p = xml_embed ( p , "value" ) ;
|
---|
123 |
|
---|
124 | string param = "number=\"" + val ( a ) + "\"" ;
|
---|
125 | if ( last ) param += " last=\"1\"" ;
|
---|
126 | x.text += xml_embed ( p , "wikiparameter" , param ) ;
|
---|
127 | }
|
---|
128 |
|
---|
129 | if ( mode == 'L' ) // Try link trail
|
---|
130 | {
|
---|
131 | string trail ;
|
---|
132 | for ( a = to+2 ; a < l.length() && is_text_char ( l[a] ) ; a++ )
|
---|
133 | trail += l[a] ;
|
---|
134 | to = a-2 ;
|
---|
135 | if ( trail != "" ) x.text += xml_embed ( trail , "trail" ) ;
|
---|
136 | }
|
---|
137 |
|
---|
138 | x.add_key_value ( "parameters" , val ( parts.size() ) ) ;
|
---|
139 | string replacement = x.get_string () ;
|
---|
140 | parse_line_sub ( replacement ) ;
|
---|
141 |
|
---|
142 | l.erase ( from-1 , to-from+3 ) ;
|
---|
143 | l.insert ( from-1 , replacement ) ;
|
---|
144 | if ( debug ) cout << "Link : " << link << endl << "Replacement : " << replacement << endl ;
|
---|
145 | if ( debug ) cout << "Result : " << l << endl << endl ;
|
---|
146 | from = from + replacement.length() - 2 ;
|
---|
147 | }
|
---|
148 |
|
---|
149 | bool WIKI2XML::is_list_char ( chart c ) // For now...
|
---|
150 | {
|
---|
151 | if ( c == '*' ) return true ;
|
---|
152 | if ( c == '#' ) return true ;
|
---|
153 | if ( c == ':' ) return true ;
|
---|
154 | return false ;
|
---|
155 | }
|
---|
156 |
|
---|
157 | string WIKI2XML::get_list_tag ( chart c , bool open )
|
---|
158 | {
|
---|
159 | string ret ;
|
---|
160 | if ( debug ) cout << "get_list_tag : " << c << endl ;
|
---|
161 | if ( c == '*' ) ret = "ul" ;
|
---|
162 | if ( c == '#' ) ret = "ol" ;
|
---|
163 | if ( c == ':' ) ret = "dl" ;
|
---|
164 | if ( ret != "" )
|
---|
165 | {
|
---|
166 | string itemname = "li" ;
|
---|
167 | if ( c == ':' ) itemname = "dd" ;
|
---|
168 | if ( open ) ret = "<" + ret + "><" + itemname + ">" ;
|
---|
169 | else ret = "</" + itemname + "></" + ret + ">" ;
|
---|
170 | }
|
---|
171 | return ret ;
|
---|
172 | }
|
---|
173 |
|
---|
174 | string WIKI2XML::fix_list ( string &l )
|
---|
175 | {
|
---|
176 | int a , b ;
|
---|
177 | for ( a = 0 ; a < l.length() && is_list_char ( l[a] ) ; a++ ) ;
|
---|
178 | string newlist , pre ;
|
---|
179 | if ( a > 0 )
|
---|
180 | {
|
---|
181 | newlist = left ( l , a ) ;
|
---|
182 | while ( a < l.length() && l[a] == ' ' ) a++ ; // Removing leading blanks
|
---|
183 | l = l.substr ( a , l.length() ) ;
|
---|
184 | }
|
---|
185 | if ( debug ) cout << "fix_list : " << l << endl ;
|
---|
186 | if ( list == "" && newlist == "" ) return "" ;
|
---|
187 | for ( a = 0 ; a < list.length() &&
|
---|
188 | a < newlist.length() &&
|
---|
189 | list[a] == newlist[a] ; a++ ) ; // The common part, if any
|
---|
190 |
|
---|
191 | for ( b = a ; b < list.length() ; b++ )
|
---|
192 | pre = get_list_tag ( list[b] , false ) + pre ; // Close old list tags
|
---|
193 | for ( b = a ; b < newlist.length() ; b++ )
|
---|
194 | pre += get_list_tag ( newlist[b] , true ) ; // Open new ones
|
---|
195 |
|
---|
196 | if ( debug ) cout << "pre : " << pre << endl ;
|
---|
197 | if ( debug ) cout << "newlist : " << newlist << endl ;
|
---|
198 | list = newlist ;
|
---|
199 | return pre ;
|
---|
200 | }
|
---|
201 |
|
---|
202 | void WIKI2XML::parse_line ( string &l )
|
---|
203 | {
|
---|
204 | int a , b ;
|
---|
205 | if ( debug ) cout << l << endl ;
|
---|
206 | string pre ;
|
---|
207 | string oldlist = list ;
|
---|
208 | pre += fix_list ( l ) ;
|
---|
209 | if ( list != "" && list == oldlist )
|
---|
210 | {
|
---|
211 | string itemname = "li" ;
|
---|
212 | if ( right ( list , 1 ) == ":" ) itemname = "dd" ;
|
---|
213 | pre = "</" + itemname + "><" + itemname + ">" + pre ;
|
---|
214 | }
|
---|
215 |
|
---|
216 | if ( l == "" ) // Paragraph
|
---|
217 | {
|
---|
218 | l = "<p/>" ;
|
---|
219 | }
|
---|
220 | else if ( left ( l , 4 ) == "----" ) // <hr>
|
---|
221 | {
|
---|
222 | for ( a = 0 ; a < l.length() && l[a] == l[0] ; a++ ) ;
|
---|
223 | pre += "<wikiurlcounter action=\"reset\"/><hr/>" ;
|
---|
224 | l = l.substr ( a , l.length() - a ) ;
|
---|
225 | }
|
---|
226 | else if ( l != "" && l[0] == '=' ) // Heading
|
---|
227 | {
|
---|
228 | for ( a = 0 ; a < l.length() && l[a] == '=' && l[l.length()-a-1] == '=' ; a++ ) ;
|
---|
229 | string h = "h0" ;
|
---|
230 | if ( a >= l.length() ) h = "" ; // No heading
|
---|
231 | // else if ( l[a] != ' ' ) h = "" ;
|
---|
232 | // else if ( l[l.length()-a-1] != ' ' ) h = "" ;
|
---|
233 | else if ( a < 1 || a > 9 ) h = "" ;
|
---|
234 | if ( h != "" )
|
---|
235 | {
|
---|
236 | l = l.substr ( a , l.length() - a*2 ) ;
|
---|
237 | h[1] += a ;
|
---|
238 | l = xml_embed ( l , h ) ;
|
---|
239 | }
|
---|
240 | }
|
---|
241 | else if ( l != "" && l[0] == ' ' ) // Pre-formatted text
|
---|
242 | {
|
---|
243 | for ( a = 0 ; a < l.length() && l[a] == ' ' ; a++ ) ;
|
---|
244 | l = l.substr ( a , l.length() ) ;
|
---|
245 | if ( l != "" )
|
---|
246 | {
|
---|
247 | pre += "<pre>" + l + "</pre>" ;
|
---|
248 | l = "" ;
|
---|
249 | }
|
---|
250 | }
|
---|
251 | else if ( left ( l , 2 ) == "{|" || left ( l , 2 ) == "|}" ||
|
---|
252 | ( tables.size() > 0 && l != "" && ( l[0] == '|' || l[0] == '!' ) ) )
|
---|
253 | {
|
---|
254 | pre += table_markup ( l ) ;
|
---|
255 | l = "" ;
|
---|
256 | }
|
---|
257 |
|
---|
258 |
|
---|
259 | if ( l != "" ) parse_line_sub ( l ) ;
|
---|
260 |
|
---|
261 | if ( pre != "" ) l = pre + l ;
|
---|
262 | }
|
---|
263 |
|
---|
264 | bool WIKI2XML::is_external_link_protocol ( string protocol )
|
---|
265 | {
|
---|
266 | if ( protocol == "HTTP" ) return true ;
|
---|
267 | if ( protocol == "FTP" ) return true ;
|
---|
268 | if ( protocol == "MAILTO" ) return true ;
|
---|
269 | return false ;
|
---|
270 | }
|
---|
271 |
|
---|
272 | int WIKI2XML::scan_url ( string &l , int from )
|
---|
273 | {
|
---|
274 | int a ;
|
---|
275 | for ( a = from ; a < l.length() ; a++ )
|
---|
276 | {
|
---|
277 | if ( l[a] == ':' || l[a] == '/' || l[a] == '.' ) continue ;
|
---|
278 | if ( l[a] >= '0' && l[a] <= '9' ) continue ;
|
---|
279 | if ( is_text_char ( l[a] ) ) continue ;
|
---|
280 | break ; // End of URL
|
---|
281 | }
|
---|
282 | return a ;
|
---|
283 | }
|
---|
284 |
|
---|
285 | void WIKI2XML::parse_external_freelink ( string &l , int &from )
|
---|
286 | {
|
---|
287 | int a ;
|
---|
288 | for ( a = from - 1 ; a >= 0 && is_text_char ( l[a] ) ; a-- ) ;
|
---|
289 | if ( a == -1 ) return ;
|
---|
290 | a++ ;
|
---|
291 | string protocol = upper ( l.substr ( a , from - a ) ) ;
|
---|
292 | if ( debug ) cout << "protocol : " << protocol << endl ;
|
---|
293 | if ( !is_external_link_protocol ( protocol ) ) return ;
|
---|
294 | int to = scan_url ( l , a ) ;
|
---|
295 | string url = l.substr ( a , to - a ) ;
|
---|
296 | string replacement ;
|
---|
297 | replacement += xml_embed ( url , "url" ) ;
|
---|
298 | replacement += xml_embed ( url , "title" ) ;
|
---|
299 | l = left ( l , a ) + replacement + l.substr ( to , l.length() - to ) ;
|
---|
300 | from = a + replacement.length() - 1 ;
|
---|
301 | }
|
---|
302 |
|
---|
303 | void WIKI2XML::parse_external_link ( string &l , int &from )
|
---|
304 | {
|
---|
305 | string protocol = upper ( before_first ( ':' , l.substr ( from + 1 , l.length() - from ) ) ) ;
|
---|
306 | if ( !is_external_link_protocol ( protocol ) ) return ;
|
---|
307 | int to ;
|
---|
308 | for ( to = from + 1 ; to < l.length() && l[to] != ']' ; to++ ) ;
|
---|
309 | if ( to == l.length() ) return ;
|
---|
310 | string url = l.substr ( from + 1 , to - from - 1 ) ;
|
---|
311 | string title = after_first ( ' ' , url ) ;
|
---|
312 | url = before_first ( ' ' , url ) ;
|
---|
313 | string replacement ;
|
---|
314 | replacement += xml_embed ( url , "url" ) ;
|
---|
315 | if ( title == "" )
|
---|
316 | replacement += xml_embed ( "<wikiurlcounter action=\"add\"/>" , "title" ) ;
|
---|
317 | else replacement += xml_embed ( title , "title" ) ;
|
---|
318 | replacement = xml_embed ( replacement , "wikilink" , "type='external' protocol='" + protocol + "'" ) ;
|
---|
319 | l = left ( l , from ) + replacement + l.substr ( to + 1 , l.length() - to ) ;
|
---|
320 | from = from + replacement.length() - 1 ;
|
---|
321 | }
|
---|
322 |
|
---|
323 | void WIKI2XML::parse_line_sub ( string &l )
|
---|
324 | {
|
---|
325 | int a ;
|
---|
326 | for ( a = 0 ; a < l.length() ; a++ )
|
---|
327 | {
|
---|
328 | if ( l[a] == '[' && a+1 < l.length() && l[a+1] == '[' ) // [[Link]]
|
---|
329 | parse_link ( l , a , 'L' ) ;
|
---|
330 | else if ( l[a] == '{' && a+1 < l.length() && l[a+1] == '{' ) // {{Template}}
|
---|
331 | parse_link ( l , a , 'T' ) ;
|
---|
332 | else if ( l[a] == '[' ) // External link
|
---|
333 | parse_external_link ( l , a ) ;
|
---|
334 | else if ( a+2 < l.length() && l[a] == ':' && l[a+1] == '/' && l[a+2] == '/' ) // External freelink
|
---|
335 | parse_external_freelink ( l , a ) ;
|
---|
336 | else if ( l[a] == SINGLE_QUOTE ) // Bold and italics
|
---|
337 | {
|
---|
338 | parse_symmetric ( l , a , "'''" , "'''" , "<b>" , "</b>" , true ) ;
|
---|
339 | parse_symmetric ( l , a , "''" , "''" , "<i>" , "</i>" ) ;
|
---|
340 | }
|
---|
341 | }
|
---|
342 | }
|
---|
343 |
|
---|
344 | void WIKI2XML::parse_lines ( vector <string> &lines )
|
---|
345 | {
|
---|
346 | int a ;
|
---|
347 | for ( a = 0 ; a < lines.size() ; a++ )
|
---|
348 | {
|
---|
349 | parse_line ( lines[a] ) ;
|
---|
350 | }
|
---|
351 |
|
---|
352 | string end ;
|
---|
353 |
|
---|
354 | // Cleanup lists
|
---|
355 | end = fix_list ( end ) ;
|
---|
356 | if ( end != "" ) lines.push_back ( end ) ;
|
---|
357 |
|
---|
358 | // Cleanup tables
|
---|
359 | end = "" ;
|
---|
360 | while ( tables.size() )
|
---|
361 | {
|
---|
362 | end += tables[tables.size()-1].close () ;
|
---|
363 | tables.pop_back () ;
|
---|
364 | }
|
---|
365 | if ( end != "" ) lines.push_back ( end ) ;
|
---|
366 | }
|
---|
367 |
|
---|
368 | void WIKI2XML::init ( string s )
|
---|
369 | {
|
---|
370 | list = "" ;
|
---|
371 | lines.clear () ;
|
---|
372 |
|
---|
373 | // Now we remove evil HTML
|
---|
374 | allowed_html.clear () ;
|
---|
375 | allowed_html.push_back ( "b" ) ;
|
---|
376 | allowed_html.push_back ( "i" ) ;
|
---|
377 | allowed_html.push_back ( "p" ) ;
|
---|
378 | allowed_html.push_back ( "b" ) ;
|
---|
379 | allowed_html.push_back ( "br" ) ;
|
---|
380 | allowed_html.push_back ( "hr" ) ;
|
---|
381 | allowed_html.push_back ( "tt" ) ;
|
---|
382 | allowed_html.push_back ( "pre" ) ;
|
---|
383 | allowed_html.push_back ( "nowiki" ) ;
|
---|
384 | allowed_html.push_back ( "math" ) ;
|
---|
385 | allowed_html.push_back ( "strike" ) ;
|
---|
386 | allowed_html.push_back ( "u" ) ;
|
---|
387 | allowed_html.push_back ( "table" ) ;
|
---|
388 | allowed_html.push_back ( "caption" ) ;
|
---|
389 | allowed_html.push_back ( "tr" ) ;
|
---|
390 | allowed_html.push_back ( "td" ) ;
|
---|
391 | allowed_html.push_back ( "th" ) ;
|
---|
392 | allowed_html.push_back ( "li" ) ;
|
---|
393 | allowed_html.push_back ( "ul" ) ;
|
---|
394 | allowed_html.push_back ( "ol" ) ;
|
---|
395 | allowed_html.push_back ( "dl" ) ;
|
---|
396 | allowed_html.push_back ( "dd" ) ;
|
---|
397 | allowed_html.push_back ( "dt" ) ;
|
---|
398 | allowed_html.push_back ( "div" ) ;
|
---|
399 | allowed_html.push_back ( "h1" ) ;
|
---|
400 | allowed_html.push_back ( "h2" ) ;
|
---|
401 | allowed_html.push_back ( "h3" ) ;
|
---|
402 | allowed_html.push_back ( "h4" ) ;
|
---|
403 | allowed_html.push_back ( "h5" ) ;
|
---|
404 | allowed_html.push_back ( "h6" ) ;
|
---|
405 | allowed_html.push_back ( "h7" ) ;
|
---|
406 | allowed_html.push_back ( "h8" ) ;
|
---|
407 | allowed_html.push_back ( "h9" ) ;
|
---|
408 | allowed_html.push_back ( "small" ) ;
|
---|
409 | allowed_html.push_back ( "center" ) ;
|
---|
410 | // allowed_html.push_back ( "" ) ;
|
---|
411 | int a ;
|
---|
412 | for ( a = 0 ; a < allowed_html.size() ; a++ )
|
---|
413 | allowed_html[a] = upper ( allowed_html[a] ) ;
|
---|
414 |
|
---|
415 | vector <TXML> taglist ;
|
---|
416 | make_tag_list ( s , taglist ) ;
|
---|
417 | remove_evil_html ( s , taglist ) ;
|
---|
418 |
|
---|
419 | // Now evaluate each line
|
---|
420 | explode ( '\n' , s , lines ) ;
|
---|
421 | }
|
---|
422 |
|
---|
423 | string WIKI2XML::get_xml ()
|
---|
424 | {
|
---|
425 | string xmlheader = "<?xml version='1.0' encoding='UTF-8'?>" ;
|
---|
426 | string ret = xmlheader + "<text>" + implode ( " " , lines ) + "</text>" ;
|
---|
427 |
|
---|
428 | // Invalidating mdash
|
---|
429 | int a = ret.find ( "—" ) ;
|
---|
430 | while ( a >= 0 && a < ret.length() )
|
---|
431 | {
|
---|
432 | ret[a] = '!' ;
|
---|
433 | a = ret.find ( "—" , a ) ;
|
---|
434 | }
|
---|
435 |
|
---|
436 | return ret ;
|
---|
437 | }
|
---|
438 |
|
---|
439 | void WIKI2XML::replace_part ( string &s , int from , int to , string with )
|
---|
440 | {
|
---|
441 | s = s.substr ( 0 , from ) + with + s.substr ( to + 1 , s.length() - to - 1 ) ;
|
---|
442 | }
|
---|
443 |
|
---|
444 | void WIKI2XML::replace_part_sync ( string &s , int from , int to , string with , vector <TXML> &list )
|
---|
445 | {
|
---|
446 | int a , b ;
|
---|
447 | replace_part ( s , from , to , with ) ;
|
---|
448 | for ( a = 0 ; a < list.size() ; a++ )
|
---|
449 | {
|
---|
450 | for ( b = 0 ; b < with.length() ; b++ ) list[a].insert_at ( from ) ;
|
---|
451 | for ( b = from ; b <= to ; b++ ) list[a].remove_at ( from ) ;
|
---|
452 | }
|
---|
453 | }
|
---|
454 |
|
---|
455 | // ATTENTION : this doesn't handle all HTML comments correctly!
|
---|
456 | void WIKI2XML::make_tag_list ( string &s , vector <TXML> &list )
|
---|
457 | {
|
---|
458 | list.clear () ;
|
---|
459 | int a , b ;
|
---|
460 | for ( a = 0 ; a < s.length() ; a++ )
|
---|
461 | {
|
---|
462 | if ( s[a] == '>' ) // Rouge >
|
---|
463 | {
|
---|
464 | s[a] = ';' ;
|
---|
465 | s.insert ( a , ">" ) ;
|
---|
466 | continue ;
|
---|
467 | }
|
---|
468 | else if ( s[a] != '<' ) continue ;
|
---|
469 | b = find_next_unquoted ( '>' , s , a ) ;
|
---|
470 | if ( b == -1 ) // Rouge <
|
---|
471 | {
|
---|
472 | s[a] = ';' ;
|
---|
473 | s.insert ( a , "<" ) ;
|
---|
474 | continue ;
|
---|
475 | }
|
---|
476 | list.push_back ( TXML ( a , b , s ) ) ;
|
---|
477 | a = list[list.size()-1].to ;
|
---|
478 | }
|
---|
479 | }
|
---|
480 |
|
---|
481 | void WIKI2XML::remove_evil_html ( string &s , vector <TXML> &taglist )
|
---|
482 | {
|
---|
483 | int a , b ;
|
---|
484 | for ( a = 0 ; a < taglist.size() ; a++ )
|
---|
485 | {
|
---|
486 | string tag = upper ( taglist[a].name ) ;
|
---|
487 | for ( b = 0 ; b < allowed_html.size() && tag != allowed_html[b] ; b++ ) ;
|
---|
488 | if ( b < allowed_html.size() ) continue ;
|
---|
489 | replace_part_sync ( s , taglist[a].from , taglist[a].from , "<" , taglist ) ;
|
---|
490 | replace_part_sync ( s , taglist[a].to , taglist[a].to , ">" , taglist ) ;
|
---|
491 | }
|
---|
492 | }
|
---|
493 |
|
---|
494 | string WIKI2XML::table_markup ( string &l )
|
---|
495 | {
|
---|
496 | int a ;
|
---|
497 | string ret ;
|
---|
498 | if ( left ( l , 2 ) == "{|" ) // Open table
|
---|
499 | {
|
---|
500 | ret = "<wikitable>" ;
|
---|
501 | ret += xml_embed ( l.substr ( 2 , l.length() - 2 ) , "wikiparameter" ) ;
|
---|
502 | tables.push_back ( TTableInfo () ) ;
|
---|
503 | }
|
---|
504 | else if ( left ( l , 2 ) == "|}" )
|
---|
505 | {
|
---|
506 | ret = tables[tables.size()-1].close () ;
|
---|
507 | tables.pop_back () ;
|
---|
508 | }
|
---|
509 | else if ( left ( l , 2 ) == "|-" )
|
---|
510 | {
|
---|
511 | ret = tables[tables.size()-1].new_row () ;
|
---|
512 | for ( a = 1 ; a < l.length() && l[a] == '-' ; a++ ) ;
|
---|
513 | ret += xml_params ( l.substr ( a , l.length() - a ) ) ;
|
---|
514 | }
|
---|
515 | else
|
---|
516 | {
|
---|
517 | string init ;
|
---|
518 | if ( left ( l , 2 ) == "|+" )
|
---|
519 | {
|
---|
520 | init = "caption" ;
|
---|
521 | l = l.substr ( 2 , l.length() - 2 ) ;
|
---|
522 | }
|
---|
523 | else if ( l[0] == '!' )
|
---|
524 | {
|
---|
525 | init = "header" ;
|
---|
526 | l = l.substr ( 1 , l.length() - 1 ) ;
|
---|
527 | }
|
---|
528 | else if ( l[0] == '|' )
|
---|
529 | {
|
---|
530 | init = "cell" ;
|
---|
531 | l = l.substr ( 1 , l.length() - 1 ) ;
|
---|
532 | }
|
---|
533 | vector <string> sublines ;
|
---|
534 | for ( a = 0 ; a + 1 < l.length() ; a++ )
|
---|
535 | {
|
---|
536 | if ( l[a] == '|' && l[a+1] == '|' )
|
---|
537 | {
|
---|
538 | sublines.push_back ( left ( l , a ) ) ;
|
---|
539 | l = l.substr ( a + 2 , l.length() - a ) ;
|
---|
540 | a = -1 ;
|
---|
541 | }
|
---|
542 | }
|
---|
543 | if ( l != "" ) sublines.push_back ( l ) ;
|
---|
544 | for ( a = 0 ; a < sublines.size() ; a++ )
|
---|
545 | {
|
---|
546 | l = sublines[a] ;
|
---|
547 | parse_line_sub ( l ) ;
|
---|
548 | string params ;
|
---|
549 | int b = find_next_unquoted ( '|' , l ) ;
|
---|
550 | if ( b != -1 )
|
---|
551 | {
|
---|
552 | params = left ( l , b ) ;
|
---|
553 | l = l.substr ( b + 1 , l.length() - b ) ;
|
---|
554 | }
|
---|
555 | if ( params != "" ) l = xml_params ( params ) + l ;
|
---|
556 | ret += tables[tables.size()-1].new_cell ( init ) ;
|
---|
557 | ret += l ;
|
---|
558 | }
|
---|
559 | }
|
---|
560 | return ret ;
|
---|
561 | }
|
---|