source: gs2-extensions/ngramj/src/wiki/wiki2xml/WIKI2XML.h@ 25141

Last change on this file since 25141 was 25141, checked in by papitha, 12 years ago

NGRAMJ PERL MODULE ADDED /MAORI LANGUAGE GUESSING WORKING WELL!!

File size: 1.8 KB
Line 
1#ifndef _WIKI2XML_H_
2#define _WIKI2XML_H_
3
4#include "global.h"
5#include "TXML.h"
6
7class TTableInfo
8 {
9 public :
10 TTableInfo () ;
11 virtual string new_cell ( string type ) ;
12 virtual string new_row () ;
13 virtual string close () ;
14 bool tr_open , td_open ;
15 string td_type ;
16 } ;
17
18class WIKI2XML
19 {
20 public :
21 WIKI2XML () {} ;
22 WIKI2XML ( string &s ) { init ( s ) ; }
23 WIKI2XML ( vector <string> &l ) { init ( l ) ; }
24 virtual void init ( string s ) ;
25 virtual void init ( vector <string> &l ) { init ( implode ( "\n" , l ) ) ; }
26 virtual void parse () { parse_lines ( lines ) ; }
27 virtual string get_xml () ;
28
29 private :
30 virtual void make_tag_list ( string &s , vector <TXML> &list ) ;
31 virtual void parse_symmetric ( string &l , int &from ,
32 string s1 , string s2 ,
33 string r1 , string r2 , bool extend = false ) ;
34 virtual void parse_link ( string &l , int &from , char mode = 'L' ) ;
35 virtual void parse_line_sub ( string &l ) ;
36 virtual void parse_line ( string &l ) ;
37 virtual void parse_lines ( vector <string> &lines ) ;
38 virtual string fix_list ( string &l ) ;
39 virtual string get_list_tag ( chart c , bool open ) ;
40 virtual bool is_list_char ( chart c ) ;
41 virtual void remove_evil_html ( string &s , vector <TXML> &taglist ) ;
42 virtual void replace_part ( string &s , int from , int to , string with ) ;
43 virtual void replace_part_sync ( string &s , int from , int to , string with , vector <TXML> &list ) ;
44 virtual void parse_external_freelink ( string &l , int &from ) ;
45 virtual void parse_external_link ( string &l , int &from ) ;
46 virtual bool is_external_link_protocol ( string protocol ) ;
47 virtual int scan_url ( string &l , int from ) ;
48 virtual string table_markup ( string &l ) ;
49
50 // Variables
51 vector <string> lines , allowed_html ;
52 vector <TTableInfo> tables ;
53 string list ;
54 } ;
55
56#endif
Note: See TracBrowser for help on using the repository browser.