[25141] | 1 | #include "WIKI2XML.h"
|
---|
| 2 |
|
---|
| 3 | TTableInfo::TTableInfo ()
|
---|
| 4 | {
|
---|
| 5 | tr_open = false ;
|
---|
| 6 | td_open = false ;
|
---|
| 7 | }
|
---|
| 8 |
|
---|
| 9 | string TTableInfo::close ()
|
---|
| 10 | {
|
---|
| 11 | string ret ;
|
---|
| 12 | if ( td_open ) ret += "</wikitablecell>" ;
|
---|
| 13 | if ( tr_open ) ret += "</wikitablerow>" ;
|
---|
| 14 | ret += "</wikitable>" ;
|
---|
| 15 | return ret ;
|
---|
| 16 | }
|
---|
| 17 |
|
---|
| 18 | string TTableInfo::new_row ()
|
---|
| 19 | {
|
---|
| 20 | string ret ;
|
---|
| 21 | if ( td_open ) ret += "</wikitablecell>" ;
|
---|
| 22 | if ( tr_open ) ret += "</wikitablerow>" ;
|
---|
| 23 | ret += "<wikitablerow>" ;
|
---|
| 24 | td_open = false ;
|
---|
| 25 | tr_open = true ;
|
---|
| 26 | return ret ;
|
---|
| 27 | }
|
---|
| 28 |
|
---|
| 29 | string TTableInfo::new_cell ( string type )
|
---|
| 30 | {
|
---|
| 31 | string ret ;
|
---|
| 32 | if ( !tr_open ) ret += new_row () ;
|
---|
| 33 | if ( td_open ) ret += "</wikitablecell>" ;
|
---|
| 34 | ret += "<wikitablecell type=\"" + upper ( type ) + "\">" ;
|
---|
| 35 | td_type = type ;
|
---|
| 36 | td_open = true ;
|
---|
| 37 | return ret ;
|
---|
| 38 | }
|
---|
| 39 |
|
---|
| 40 | // *****************************************************************************
|
---|
| 41 | // *****************************************************************************
|
---|
| 42 | //
|
---|
| 43 | // WIKI2XML
|
---|
| 44 | //
|
---|
| 45 | // *****************************************************************************
|
---|
| 46 | // *****************************************************************************
|
---|
| 47 |
|
---|
| 48 | void WIKI2XML::parse_symmetric ( string &l , int &from ,
|
---|
| 49 | string s1 , string s2 ,
|
---|
| 50 | string r1 , string r2 ,
|
---|
| 51 | bool extend )
|
---|
| 52 | {
|
---|
| 53 | int a , b ;
|
---|
| 54 | if ( !submatch ( l , s1 , from ) ) return ; // Left does not match
|
---|
| 55 | for ( a = from + s1.length() ; a + s2.length() <= l.length() ; a++ )
|
---|
| 56 | {
|
---|
| 57 | if ( !submatch ( l , s2 , a ) ) continue ;
|
---|
| 58 | for ( b = a+1 ; extend && submatch ( l , s2 , b ) ; b++ ) ;
|
---|
| 59 | b-- ;
|
---|
| 60 | l = l.substr ( 0 , from ) +
|
---|
| 61 | r1 +
|
---|
| 62 | l.substr ( from + s1.length() , b - from - s1.length() ) +
|
---|
| 63 | r2 +
|
---|
| 64 | l.substr ( b + s2.length() , l.length() ) ;
|
---|
| 65 | if ( debug ) cout << "newl : " << l << endl ;
|
---|
| 66 | break ;
|
---|
| 67 | }
|
---|
| 68 | }
|
---|
| 69 |
|
---|
| 70 | void WIKI2XML::parse_link ( string &l , int &from , char mode )
|
---|
| 71 | {
|
---|
| 72 | from += 1 ;
|
---|
| 73 | int a , cnt = 1 ;
|
---|
| 74 | chart par_open = '[' ; // mode 'L'
|
---|
| 75 | chart par_close = ']' ; // mode 'L'
|
---|
| 76 | if ( mode == 'T' ) { par_open = '{' ; par_close = '}' ; }
|
---|
| 77 | for ( a = from ; cnt > 0 && a+1 < l.length() ; a++ )
|
---|
| 78 | {
|
---|
| 79 | if ( l[a] == par_open && l[a+1] == par_open )
|
---|
| 80 | parse_link ( l , a ) ;
|
---|
| 81 | else if ( l[a] == par_close && l[a+1] == par_close )
|
---|
| 82 | cnt-- ;
|
---|
| 83 | }
|
---|
| 84 | if ( cnt > 0 ) return ; // Not a valid link
|
---|
| 85 |
|
---|
| 86 | int to = a-1 ; // Without "]]"
|
---|
| 87 | string link = l.substr ( from+1 , to-from-1 ) ;
|
---|
| 88 |
|
---|
| 89 | TXML x ;
|
---|
| 90 | vector <string> parts ;
|
---|
| 91 | explode ( '|' , link , parts ) ;
|
---|
| 92 | if ( mode == 'L' )
|
---|
| 93 | {
|
---|
| 94 | x.name = "wikilink" ;
|
---|
| 95 | x.add_key_value ( "type" , "internal" ) ;
|
---|
| 96 | }
|
---|
| 97 | else if ( mode == 'T' ) x.name = "wikitemplate" ;
|
---|
| 98 |
|
---|
| 99 | for ( a = 0 ; a < parts.size() ; a++ )
|
---|
| 100 | {
|
---|
| 101 | bool last = ( a + 1 == parts.size() ) ;
|
---|
| 102 | string p = parts[a] ;
|
---|
| 103 | parse_line_sub ( p ) ;
|
---|
| 104 |
|
---|
| 105 | if ( a > 0 && ( mode != 'L' || !last ) )
|
---|
| 106 | {
|
---|
| 107 | string key , value ;
|
---|
| 108 | vector <string> subparts ;
|
---|
| 109 | explode ( '=' , p , subparts ) ;
|
---|
| 110 | if ( subparts.size() == 1 )
|
---|
| 111 | {
|
---|
| 112 | value = xml_embed ( p , "value" ) ;
|
---|
| 113 | }
|
---|
| 114 | else
|
---|
| 115 | {
|
---|
| 116 | key = xml_embed ( subparts[0] , "key" ) ;
|
---|
| 117 | subparts.erase ( subparts.begin() ) ;
|
---|
| 118 | value = xml_embed ( implode ( "=" , subparts ) , "value" ) ;
|
---|
| 119 | }
|
---|
| 120 | p = key + value ;
|
---|
| 121 | }
|
---|
| 122 | else p = xml_embed ( p , "value" ) ;
|
---|
| 123 |
|
---|
| 124 | string param = "number=\"" + val ( a ) + "\"" ;
|
---|
| 125 | if ( last ) param += " last=\"1\"" ;
|
---|
| 126 | x.text += xml_embed ( p , "wikiparameter" , param ) ;
|
---|
| 127 | }
|
---|
| 128 |
|
---|
| 129 | if ( mode == 'L' ) // Try link trail
|
---|
| 130 | {
|
---|
| 131 | string trail ;
|
---|
| 132 | for ( a = to+2 ; a < l.length() && is_text_char ( l[a] ) ; a++ )
|
---|
| 133 | trail += l[a] ;
|
---|
| 134 | to = a-2 ;
|
---|
| 135 | if ( trail != "" ) x.text += xml_embed ( trail , "trail" ) ;
|
---|
| 136 | }
|
---|
| 137 |
|
---|
| 138 | x.add_key_value ( "parameters" , val ( parts.size() ) ) ;
|
---|
| 139 | string replacement = x.get_string () ;
|
---|
| 140 | parse_line_sub ( replacement ) ;
|
---|
| 141 |
|
---|
| 142 | l.erase ( from-1 , to-from+3 ) ;
|
---|
| 143 | l.insert ( from-1 , replacement ) ;
|
---|
| 144 | if ( debug ) cout << "Link : " << link << endl << "Replacement : " << replacement << endl ;
|
---|
| 145 | if ( debug ) cout << "Result : " << l << endl << endl ;
|
---|
| 146 | from = from + replacement.length() - 2 ;
|
---|
| 147 | }
|
---|
| 148 |
|
---|
| 149 | bool WIKI2XML::is_list_char ( chart c ) // For now...
|
---|
| 150 | {
|
---|
| 151 | if ( c == '*' ) return true ;
|
---|
| 152 | if ( c == '#' ) return true ;
|
---|
| 153 | if ( c == ':' ) return true ;
|
---|
| 154 | return false ;
|
---|
| 155 | }
|
---|
| 156 |
|
---|
| 157 | string WIKI2XML::get_list_tag ( chart c , bool open )
|
---|
| 158 | {
|
---|
| 159 | string ret ;
|
---|
| 160 | if ( debug ) cout << "get_list_tag : " << c << endl ;
|
---|
| 161 | if ( c == '*' ) ret = "ul" ;
|
---|
| 162 | if ( c == '#' ) ret = "ol" ;
|
---|
| 163 | if ( c == ':' ) ret = "dl" ;
|
---|
| 164 | if ( ret != "" )
|
---|
| 165 | {
|
---|
| 166 | string itemname = "li" ;
|
---|
| 167 | if ( c == ':' ) itemname = "dd" ;
|
---|
| 168 | if ( open ) ret = "<" + ret + "><" + itemname + ">" ;
|
---|
| 169 | else ret = "</" + itemname + "></" + ret + ">" ;
|
---|
| 170 | }
|
---|
| 171 | return ret ;
|
---|
| 172 | }
|
---|
| 173 |
|
---|
| 174 | string WIKI2XML::fix_list ( string &l )
|
---|
| 175 | {
|
---|
| 176 | int a , b ;
|
---|
| 177 | for ( a = 0 ; a < l.length() && is_list_char ( l[a] ) ; a++ ) ;
|
---|
| 178 | string newlist , pre ;
|
---|
| 179 | if ( a > 0 )
|
---|
| 180 | {
|
---|
| 181 | newlist = left ( l , a ) ;
|
---|
| 182 | while ( a < l.length() && l[a] == ' ' ) a++ ; // Removing leading blanks
|
---|
| 183 | l = l.substr ( a , l.length() ) ;
|
---|
| 184 | }
|
---|
| 185 | if ( debug ) cout << "fix_list : " << l << endl ;
|
---|
| 186 | if ( list == "" && newlist == "" ) return "" ;
|
---|
| 187 | for ( a = 0 ; a < list.length() &&
|
---|
| 188 | a < newlist.length() &&
|
---|
| 189 | list[a] == newlist[a] ; a++ ) ; // The common part, if any
|
---|
| 190 |
|
---|
| 191 | for ( b = a ; b < list.length() ; b++ )
|
---|
| 192 | pre = get_list_tag ( list[b] , false ) + pre ; // Close old list tags
|
---|
| 193 | for ( b = a ; b < newlist.length() ; b++ )
|
---|
| 194 | pre += get_list_tag ( newlist[b] , true ) ; // Open new ones
|
---|
| 195 |
|
---|
| 196 | if ( debug ) cout << "pre : " << pre << endl ;
|
---|
| 197 | if ( debug ) cout << "newlist : " << newlist << endl ;
|
---|
| 198 | list = newlist ;
|
---|
| 199 | return pre ;
|
---|
| 200 | }
|
---|
| 201 |
|
---|
| 202 | void WIKI2XML::parse_line ( string &l )
|
---|
| 203 | {
|
---|
| 204 | int a , b ;
|
---|
| 205 | if ( debug ) cout << l << endl ;
|
---|
| 206 | string pre ;
|
---|
| 207 | string oldlist = list ;
|
---|
| 208 | pre += fix_list ( l ) ;
|
---|
| 209 | if ( list != "" && list == oldlist )
|
---|
| 210 | {
|
---|
| 211 | string itemname = "li" ;
|
---|
| 212 | if ( right ( list , 1 ) == ":" ) itemname = "dd" ;
|
---|
| 213 | pre = "</" + itemname + "><" + itemname + ">" + pre ;
|
---|
| 214 | }
|
---|
| 215 |
|
---|
| 216 | if ( l == "" ) // Paragraph
|
---|
| 217 | {
|
---|
| 218 | l = "<p/>" ;
|
---|
| 219 | }
|
---|
| 220 | else if ( left ( l , 4 ) == "----" ) // <hr>
|
---|
| 221 | {
|
---|
| 222 | for ( a = 0 ; a < l.length() && l[a] == l[0] ; a++ ) ;
|
---|
| 223 | pre += "<wikiurlcounter action=\"reset\"/><hr/>" ;
|
---|
| 224 | l = l.substr ( a , l.length() - a ) ;
|
---|
| 225 | }
|
---|
| 226 | else if ( l != "" && l[0] == '=' ) // Heading
|
---|
| 227 | {
|
---|
| 228 | for ( a = 0 ; a < l.length() && l[a] == '=' && l[l.length()-a-1] == '=' ; a++ ) ;
|
---|
| 229 | string h = "h0" ;
|
---|
| 230 | if ( a >= l.length() ) h = "" ; // No heading
|
---|
| 231 | // else if ( l[a] != ' ' ) h = "" ;
|
---|
| 232 | // else if ( l[l.length()-a-1] != ' ' ) h = "" ;
|
---|
| 233 | else if ( a < 1 || a > 9 ) h = "" ;
|
---|
| 234 | if ( h != "" )
|
---|
| 235 | {
|
---|
| 236 | l = l.substr ( a , l.length() - a*2 ) ;
|
---|
| 237 | h[1] += a ;
|
---|
| 238 | l = xml_embed ( l , h ) ;
|
---|
| 239 | }
|
---|
| 240 | }
|
---|
| 241 | else if ( l != "" && l[0] == ' ' ) // Pre-formatted text
|
---|
| 242 | {
|
---|
| 243 | for ( a = 0 ; a < l.length() && l[a] == ' ' ; a++ ) ;
|
---|
| 244 | l = l.substr ( a , l.length() ) ;
|
---|
| 245 | if ( l != "" )
|
---|
| 246 | {
|
---|
| 247 | pre += "<pre>" + l + "</pre>" ;
|
---|
| 248 | l = "" ;
|
---|
| 249 | }
|
---|
| 250 | }
|
---|
| 251 | else if ( left ( l , 2 ) == "{|" || left ( l , 2 ) == "|}" ||
|
---|
| 252 | ( tables.size() > 0 && l != "" && ( l[0] == '|' || l[0] == '!' ) ) )
|
---|
| 253 | {
|
---|
| 254 | pre += table_markup ( l ) ;
|
---|
| 255 | l = "" ;
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 |
|
---|
| 259 | if ( l != "" ) parse_line_sub ( l ) ;
|
---|
| 260 |
|
---|
| 261 | if ( pre != "" ) l = pre + l ;
|
---|
| 262 | }
|
---|
| 263 |
|
---|
| 264 | bool WIKI2XML::is_external_link_protocol ( string protocol )
|
---|
| 265 | {
|
---|
| 266 | if ( protocol == "HTTP" ) return true ;
|
---|
| 267 | if ( protocol == "FTP" ) return true ;
|
---|
| 268 | if ( protocol == "MAILTO" ) return true ;
|
---|
| 269 | return false ;
|
---|
| 270 | }
|
---|
| 271 |
|
---|
| 272 | int WIKI2XML::scan_url ( string &l , int from )
|
---|
| 273 | {
|
---|
| 274 | int a ;
|
---|
| 275 | for ( a = from ; a < l.length() ; a++ )
|
---|
| 276 | {
|
---|
| 277 | if ( l[a] == ':' || l[a] == '/' || l[a] == '.' ) continue ;
|
---|
| 278 | if ( l[a] >= '0' && l[a] <= '9' ) continue ;
|
---|
| 279 | if ( is_text_char ( l[a] ) ) continue ;
|
---|
| 280 | break ; // End of URL
|
---|
| 281 | }
|
---|
| 282 | return a ;
|
---|
| 283 | }
|
---|
| 284 |
|
---|
| 285 | void WIKI2XML::parse_external_freelink ( string &l , int &from )
|
---|
| 286 | {
|
---|
| 287 | int a ;
|
---|
| 288 | for ( a = from - 1 ; a >= 0 && is_text_char ( l[a] ) ; a-- ) ;
|
---|
| 289 | if ( a == -1 ) return ;
|
---|
| 290 | a++ ;
|
---|
| 291 | string protocol = upper ( l.substr ( a , from - a ) ) ;
|
---|
| 292 | if ( debug ) cout << "protocol : " << protocol << endl ;
|
---|
| 293 | if ( !is_external_link_protocol ( protocol ) ) return ;
|
---|
| 294 | int to = scan_url ( l , a ) ;
|
---|
| 295 | string url = l.substr ( a , to - a ) ;
|
---|
| 296 | string replacement ;
|
---|
| 297 | replacement += xml_embed ( url , "url" ) ;
|
---|
| 298 | replacement += xml_embed ( url , "title" ) ;
|
---|
| 299 | l = left ( l , a ) + replacement + l.substr ( to , l.length() - to ) ;
|
---|
| 300 | from = a + replacement.length() - 1 ;
|
---|
| 301 | }
|
---|
| 302 |
|
---|
| 303 | void WIKI2XML::parse_external_link ( string &l , int &from )
|
---|
| 304 | {
|
---|
| 305 | string protocol = upper ( before_first ( ':' , l.substr ( from + 1 , l.length() - from ) ) ) ;
|
---|
| 306 | if ( !is_external_link_protocol ( protocol ) ) return ;
|
---|
| 307 | int to ;
|
---|
| 308 | for ( to = from + 1 ; to < l.length() && l[to] != ']' ; to++ ) ;
|
---|
| 309 | if ( to == l.length() ) return ;
|
---|
| 310 | string url = l.substr ( from + 1 , to - from - 1 ) ;
|
---|
| 311 | string title = after_first ( ' ' , url ) ;
|
---|
| 312 | url = before_first ( ' ' , url ) ;
|
---|
| 313 | string replacement ;
|
---|
| 314 | replacement += xml_embed ( url , "url" ) ;
|
---|
| 315 | if ( title == "" )
|
---|
| 316 | replacement += xml_embed ( "<wikiurlcounter action=\"add\"/>" , "title" ) ;
|
---|
| 317 | else replacement += xml_embed ( title , "title" ) ;
|
---|
| 318 | replacement = xml_embed ( replacement , "wikilink" , "type='external' protocol='" + protocol + "'" ) ;
|
---|
| 319 | l = left ( l , from ) + replacement + l.substr ( to + 1 , l.length() - to ) ;
|
---|
| 320 | from = from + replacement.length() - 1 ;
|
---|
| 321 | }
|
---|
| 322 |
|
---|
| 323 | void WIKI2XML::parse_line_sub ( string &l )
|
---|
| 324 | {
|
---|
| 325 | int a ;
|
---|
| 326 | for ( a = 0 ; a < l.length() ; a++ )
|
---|
| 327 | {
|
---|
| 328 | if ( l[a] == '[' && a+1 < l.length() && l[a+1] == '[' ) // [[Link]]
|
---|
| 329 | parse_link ( l , a , 'L' ) ;
|
---|
| 330 | else if ( l[a] == '{' && a+1 < l.length() && l[a+1] == '{' ) // {{Template}}
|
---|
| 331 | parse_link ( l , a , 'T' ) ;
|
---|
| 332 | else if ( l[a] == '[' ) // External link
|
---|
| 333 | parse_external_link ( l , a ) ;
|
---|
| 334 | else if ( a+2 < l.length() && l[a] == ':' && l[a+1] == '/' && l[a+2] == '/' ) // External freelink
|
---|
| 335 | parse_external_freelink ( l , a ) ;
|
---|
| 336 | else if ( l[a] == SINGLE_QUOTE ) // Bold and italics
|
---|
| 337 | {
|
---|
| 338 | parse_symmetric ( l , a , "'''" , "'''" , "<b>" , "</b>" , true ) ;
|
---|
| 339 | parse_symmetric ( l , a , "''" , "''" , "<i>" , "</i>" ) ;
|
---|
| 340 | }
|
---|
| 341 | }
|
---|
| 342 | }
|
---|
| 343 |
|
---|
| 344 | void WIKI2XML::parse_lines ( vector <string> &lines )
|
---|
| 345 | {
|
---|
| 346 | int a ;
|
---|
| 347 | for ( a = 0 ; a < lines.size() ; a++ )
|
---|
| 348 | {
|
---|
| 349 | parse_line ( lines[a] ) ;
|
---|
| 350 | }
|
---|
| 351 |
|
---|
| 352 | string end ;
|
---|
| 353 |
|
---|
| 354 | // Cleanup lists
|
---|
| 355 | end = fix_list ( end ) ;
|
---|
| 356 | if ( end != "" ) lines.push_back ( end ) ;
|
---|
| 357 |
|
---|
| 358 | // Cleanup tables
|
---|
| 359 | end = "" ;
|
---|
| 360 | while ( tables.size() )
|
---|
| 361 | {
|
---|
| 362 | end += tables[tables.size()-1].close () ;
|
---|
| 363 | tables.pop_back () ;
|
---|
| 364 | }
|
---|
| 365 | if ( end != "" ) lines.push_back ( end ) ;
|
---|
| 366 | }
|
---|
| 367 |
|
---|
| 368 | void WIKI2XML::init ( string s )
|
---|
| 369 | {
|
---|
| 370 | list = "" ;
|
---|
| 371 | lines.clear () ;
|
---|
| 372 |
|
---|
| 373 | // Now we remove evil HTML
|
---|
| 374 | allowed_html.clear () ;
|
---|
| 375 | allowed_html.push_back ( "b" ) ;
|
---|
| 376 | allowed_html.push_back ( "i" ) ;
|
---|
| 377 | allowed_html.push_back ( "p" ) ;
|
---|
| 378 | allowed_html.push_back ( "b" ) ;
|
---|
| 379 | allowed_html.push_back ( "br" ) ;
|
---|
| 380 | allowed_html.push_back ( "hr" ) ;
|
---|
| 381 | allowed_html.push_back ( "tt" ) ;
|
---|
| 382 | allowed_html.push_back ( "pre" ) ;
|
---|
| 383 | allowed_html.push_back ( "nowiki" ) ;
|
---|
| 384 | allowed_html.push_back ( "math" ) ;
|
---|
| 385 | allowed_html.push_back ( "strike" ) ;
|
---|
| 386 | allowed_html.push_back ( "u" ) ;
|
---|
| 387 | allowed_html.push_back ( "table" ) ;
|
---|
| 388 | allowed_html.push_back ( "caption" ) ;
|
---|
| 389 | allowed_html.push_back ( "tr" ) ;
|
---|
| 390 | allowed_html.push_back ( "td" ) ;
|
---|
| 391 | allowed_html.push_back ( "th" ) ;
|
---|
| 392 | allowed_html.push_back ( "li" ) ;
|
---|
| 393 | allowed_html.push_back ( "ul" ) ;
|
---|
| 394 | allowed_html.push_back ( "ol" ) ;
|
---|
| 395 | allowed_html.push_back ( "dl" ) ;
|
---|
| 396 | allowed_html.push_back ( "dd" ) ;
|
---|
| 397 | allowed_html.push_back ( "dt" ) ;
|
---|
| 398 | allowed_html.push_back ( "div" ) ;
|
---|
| 399 | allowed_html.push_back ( "h1" ) ;
|
---|
| 400 | allowed_html.push_back ( "h2" ) ;
|
---|
| 401 | allowed_html.push_back ( "h3" ) ;
|
---|
| 402 | allowed_html.push_back ( "h4" ) ;
|
---|
| 403 | allowed_html.push_back ( "h5" ) ;
|
---|
| 404 | allowed_html.push_back ( "h6" ) ;
|
---|
| 405 | allowed_html.push_back ( "h7" ) ;
|
---|
| 406 | allowed_html.push_back ( "h8" ) ;
|
---|
| 407 | allowed_html.push_back ( "h9" ) ;
|
---|
| 408 | allowed_html.push_back ( "small" ) ;
|
---|
| 409 | allowed_html.push_back ( "center" ) ;
|
---|
| 410 | // allowed_html.push_back ( "" ) ;
|
---|
| 411 | int a ;
|
---|
| 412 | for ( a = 0 ; a < allowed_html.size() ; a++ )
|
---|
| 413 | allowed_html[a] = upper ( allowed_html[a] ) ;
|
---|
| 414 |
|
---|
| 415 | vector <TXML> taglist ;
|
---|
| 416 | make_tag_list ( s , taglist ) ;
|
---|
| 417 | remove_evil_html ( s , taglist ) ;
|
---|
| 418 |
|
---|
| 419 | // Now evaluate each line
|
---|
| 420 | explode ( '\n' , s , lines ) ;
|
---|
| 421 | }
|
---|
| 422 |
|
---|
| 423 | string WIKI2XML::get_xml ()
|
---|
| 424 | {
|
---|
| 425 | string xmlheader = "<?xml version='1.0' encoding='UTF-8'?>" ;
|
---|
| 426 | string ret = xmlheader + "<text>" + implode ( " " , lines ) + "</text>" ;
|
---|
| 427 |
|
---|
| 428 | // Invalidating mdash
|
---|
| 429 | int a = ret.find ( "—" ) ;
|
---|
| 430 | while ( a >= 0 && a < ret.length() )
|
---|
| 431 | {
|
---|
| 432 | ret[a] = '!' ;
|
---|
| 433 | a = ret.find ( "—" , a ) ;
|
---|
| 434 | }
|
---|
| 435 |
|
---|
| 436 | return ret ;
|
---|
| 437 | }
|
---|
| 438 |
|
---|
| 439 | void WIKI2XML::replace_part ( string &s , int from , int to , string with )
|
---|
| 440 | {
|
---|
| 441 | s = s.substr ( 0 , from ) + with + s.substr ( to + 1 , s.length() - to - 1 ) ;
|
---|
| 442 | }
|
---|
| 443 |
|
---|
| 444 | void WIKI2XML::replace_part_sync ( string &s , int from , int to , string with , vector <TXML> &list )
|
---|
| 445 | {
|
---|
| 446 | int a , b ;
|
---|
| 447 | replace_part ( s , from , to , with ) ;
|
---|
| 448 | for ( a = 0 ; a < list.size() ; a++ )
|
---|
| 449 | {
|
---|
| 450 | for ( b = 0 ; b < with.length() ; b++ ) list[a].insert_at ( from ) ;
|
---|
| 451 | for ( b = from ; b <= to ; b++ ) list[a].remove_at ( from ) ;
|
---|
| 452 | }
|
---|
| 453 | }
|
---|
| 454 |
|
---|
| 455 | // ATTENTION : this doesn't handle all HTML comments correctly!
|
---|
| 456 | void WIKI2XML::make_tag_list ( string &s , vector <TXML> &list )
|
---|
| 457 | {
|
---|
| 458 | list.clear () ;
|
---|
| 459 | int a , b ;
|
---|
| 460 | for ( a = 0 ; a < s.length() ; a++ )
|
---|
| 461 | {
|
---|
| 462 | if ( s[a] == '>' ) // Rouge >
|
---|
| 463 | {
|
---|
| 464 | s[a] = ';' ;
|
---|
| 465 | s.insert ( a , ">" ) ;
|
---|
| 466 | continue ;
|
---|
| 467 | }
|
---|
| 468 | else if ( s[a] != '<' ) continue ;
|
---|
| 469 | b = find_next_unquoted ( '>' , s , a ) ;
|
---|
| 470 | if ( b == -1 ) // Rouge <
|
---|
| 471 | {
|
---|
| 472 | s[a] = ';' ;
|
---|
| 473 | s.insert ( a , "<" ) ;
|
---|
| 474 | continue ;
|
---|
| 475 | }
|
---|
| 476 | list.push_back ( TXML ( a , b , s ) ) ;
|
---|
| 477 | a = list[list.size()-1].to ;
|
---|
| 478 | }
|
---|
| 479 | }
|
---|
| 480 |
|
---|
| 481 | void WIKI2XML::remove_evil_html ( string &s , vector <TXML> &taglist )
|
---|
| 482 | {
|
---|
| 483 | int a , b ;
|
---|
| 484 | for ( a = 0 ; a < taglist.size() ; a++ )
|
---|
| 485 | {
|
---|
| 486 | string tag = upper ( taglist[a].name ) ;
|
---|
| 487 | for ( b = 0 ; b < allowed_html.size() && tag != allowed_html[b] ; b++ ) ;
|
---|
| 488 | if ( b < allowed_html.size() ) continue ;
|
---|
| 489 | replace_part_sync ( s , taglist[a].from , taglist[a].from , "<" , taglist ) ;
|
---|
| 490 | replace_part_sync ( s , taglist[a].to , taglist[a].to , ">" , taglist ) ;
|
---|
| 491 | }
|
---|
| 492 | }
|
---|
| 493 |
|
---|
| 494 | string WIKI2XML::table_markup ( string &l )
|
---|
| 495 | {
|
---|
| 496 | int a ;
|
---|
| 497 | string ret ;
|
---|
| 498 | if ( left ( l , 2 ) == "{|" ) // Open table
|
---|
| 499 | {
|
---|
| 500 | ret = "<wikitable>" ;
|
---|
| 501 | ret += xml_embed ( l.substr ( 2 , l.length() - 2 ) , "wikiparameter" ) ;
|
---|
| 502 | tables.push_back ( TTableInfo () ) ;
|
---|
| 503 | }
|
---|
| 504 | else if ( left ( l , 2 ) == "|}" )
|
---|
| 505 | {
|
---|
| 506 | ret = tables[tables.size()-1].close () ;
|
---|
| 507 | tables.pop_back () ;
|
---|
| 508 | }
|
---|
| 509 | else if ( left ( l , 2 ) == "|-" )
|
---|
| 510 | {
|
---|
| 511 | ret = tables[tables.size()-1].new_row () ;
|
---|
| 512 | for ( a = 1 ; a < l.length() && l[a] == '-' ; a++ ) ;
|
---|
| 513 | ret += xml_params ( l.substr ( a , l.length() - a ) ) ;
|
---|
| 514 | }
|
---|
| 515 | else
|
---|
| 516 | {
|
---|
| 517 | string init ;
|
---|
| 518 | if ( left ( l , 2 ) == "|+" )
|
---|
| 519 | {
|
---|
| 520 | init = "caption" ;
|
---|
| 521 | l = l.substr ( 2 , l.length() - 2 ) ;
|
---|
| 522 | }
|
---|
| 523 | else if ( l[0] == '!' )
|
---|
| 524 | {
|
---|
| 525 | init = "header" ;
|
---|
| 526 | l = l.substr ( 1 , l.length() - 1 ) ;
|
---|
| 527 | }
|
---|
| 528 | else if ( l[0] == '|' )
|
---|
| 529 | {
|
---|
| 530 | init = "cell" ;
|
---|
| 531 | l = l.substr ( 1 , l.length() - 1 ) ;
|
---|
| 532 | }
|
---|
| 533 | vector <string> sublines ;
|
---|
| 534 | for ( a = 0 ; a + 1 < l.length() ; a++ )
|
---|
| 535 | {
|
---|
| 536 | if ( l[a] == '|' && l[a+1] == '|' )
|
---|
| 537 | {
|
---|
| 538 | sublines.push_back ( left ( l , a ) ) ;
|
---|
| 539 | l = l.substr ( a + 2 , l.length() - a ) ;
|
---|
| 540 | a = -1 ;
|
---|
| 541 | }
|
---|
| 542 | }
|
---|
| 543 | if ( l != "" ) sublines.push_back ( l ) ;
|
---|
| 544 | for ( a = 0 ; a < sublines.size() ; a++ )
|
---|
| 545 | {
|
---|
| 546 | l = sublines[a] ;
|
---|
| 547 | parse_line_sub ( l ) ;
|
---|
| 548 | string params ;
|
---|
| 549 | int b = find_next_unquoted ( '|' , l ) ;
|
---|
| 550 | if ( b != -1 )
|
---|
| 551 | {
|
---|
| 552 | params = left ( l , b ) ;
|
---|
| 553 | l = l.substr ( b + 1 , l.length() - b ) ;
|
---|
| 554 | }
|
---|
| 555 | if ( params != "" ) l = xml_params ( params ) + l ;
|
---|
| 556 | ret += tables[tables.size()-1].new_cell ( init ) ;
|
---|
| 557 | ret += l ;
|
---|
| 558 | }
|
---|
| 559 | }
|
---|
| 560 | return ret ;
|
---|
| 561 | }
|
---|