source: gs2-extensions/ngramj/src/wiki/wiki2xml/global.cpp@ 25141

Last change on this file since 25141 was 25141, checked in by papitha, 12 years ago

NGRAMJ PERL MODULE ADDED /MAORI LANGUAGE GUESSING WORKING WELL!!

File size: 5.3 KB
Line 
1#include "global.h"
2
3// *****************************************************************************
4// *****************************************************************************
5//
6// global string functions
7//
8// *****************************************************************************
9// *****************************************************************************
10
11// The following functions should be language specific
12bool is_text_char ( chart ch )
13 {
14 if ( ch >= 'a' && ch <= 'z' ) return true ;
15 if ( ch >= 'A' && ch <= 'Z' ) return true ;
16 return false ;
17 }
18
19
20// These are not :
21
22string left ( string &s , int num )
23 {
24 if ( num <= 0 ) return "" ;
25 if ( num >= s.length() ) return s ;
26 return s.substr ( 0 , num ) ;
27 }
28
29string right ( string &s , int num )
30 {
31 if ( num <= 0 ) return "" ;
32 int from = s.length() - num ;
33 string ret ;
34 if ( from <= 0 ) ret = s ;
35 else ret = s.substr ( from , s.length() ) ;
36 return ret ;
37 }
38
39string upper ( string s ) // For internal purposes, will do...
40 {
41 int a ;
42 for ( a = 0 ; a < s.length() ; a++ )
43 {
44 if ( s[a] >= 'a' && s[a] <= 'z' ) s[a] = s[a] - 'a' + 'A' ;
45 }
46 return s ;
47 }
48
49void explode ( chart ch , string &l , vector <string> &parts )
50 {
51 parts.clear () ;
52 int a , b ;
53 for ( a = b = 0 ; a < l.length() ; a++ )
54 {
55 if ( l[a] == ch )
56 {
57 parts.push_back ( l.substr ( b , a - b ) ) ;
58 b = a+1 ;
59 }
60 }
61 parts.push_back ( l.substr ( b , a - b ) ) ;
62
63 if ( debug ) cout << "Explode : " << l << endl ;
64 for ( a = 0 ; a < parts.size() ; a++ )
65 if ( debug ) cout << a << " " << parts[a] << endl ;
66 if ( debug ) cout << endl ;
67 }
68
69string implode ( string mid , vector <string> &parts )
70 {
71 if ( parts.size() == 0 ) return "" ;
72 if ( parts.size() == 1 ) return parts[0] ;
73 string ret = parts[0] ;
74 for ( int a = 1 ; a < parts.size() ; a++ )
75 ret += mid + parts[a] ;
76 return ret ;
77 }
78
79string unquote ( chart quote , string &s )
80 {
81 int a ;
82 for ( a = 0 ; a < s.length() ; a++ )
83 {
84 if ( s[a] == quote && ( a == 0 || ( a > 0 && s[a-1] != '\\' ) ) )
85 {
86 s.insert ( a , "\\" ) ;
87 a++ ;
88 }
89 }
90 return s ;
91 }
92
93bool submatch ( string &main , string &sub , int from )
94 {
95 if ( from + sub.length() > main.length() ) return false ;
96 int a ;
97 for ( a = 0 ; a < sub.length() ; a++ )
98 {
99 if ( sub[a] != main[a+from] ) return false ;
100 }
101 return true ;
102 }
103
104int find_first ( chart c , string &s )
105 {
106 int a ;
107 for ( a = 0 ; a < s.length() && s[a] != c ; a++ ) ;
108 if ( a == s.length() ) return -1 ;
109 return a ;
110 }
111
112int find_last ( chart c , string &s )
113 {
114 int a , b = -1 ;
115 for ( a = 0 ; a < s.length() ; a++ )
116 {
117 if ( s[a] == c ) b = a ;
118 }
119 return b ;
120 }
121
122string before_first ( chart c , string s )
123 {
124 int pos = find_first ( c , s ) ;
125 if ( pos == -1 ) return s ;
126 return s.substr ( 0 , pos ) ;
127 }
128
129string before_last ( chart c , string s )
130 {
131 int pos = find_last ( c , s ) ;
132 if ( pos == -1 ) return "" ;
133 return s.substr ( 0 , pos ) ;
134 }
135
136string after_first ( chart c , string s )
137 {
138 int pos = find_first ( c , s ) ;
139 if ( pos == -1 ) return "" ;
140 return s.substr ( pos+1 , s.length() ) ;
141 }
142
143string after_last ( chart c , string s )
144 {
145 int pos = find_last ( c , s ) ;
146 if ( pos == -1 ) return s ;
147 return s.substr ( pos+1 , s.length() ) ;
148 }
149
150string trim ( string &s )
151 {
152 if ( s.length() == 0 ) return s ;
153 if ( s[0] != ' ' && s[s.length()-1] != ' ' ) return s ;
154 int a , b ;
155 for ( a = 0 ; a < s.length() && s[a] == ' ' ; a++ ) ;
156 for ( b = s.length()-1 ; b >= 0 && s[b] == ' ' ; b-- ) ;
157 return s.substr ( a , b - a + 1 ) ;
158 }
159
160int find_next_unquoted ( chart c , string &s , int start )
161 {
162 int a ;
163 chart lastquote = ' ' ;
164 for ( a = start ; a < s.length() ; a++ )
165 {
166 if ( s[a] == c && lastquote == ' ' ) return a ; // Success!
167 if ( s[a] != SINGLE_QUOTE && s[a] != DOUBLE_QUOTE ) continue ; // No quotes, next
168 if ( a > 0 && s[a-1] == '\\' ) continue ; // Ignore \' and \"
169 if ( lastquote == ' ' ) lastquote = s[a] ; // Remember opening quote, text now quoted
170 else if ( lastquote == s[a] ) lastquote = ' ' ; // Close quote, not quoted anymore
171 }
172 return -1 ;
173 }
174
175string val ( int a )
176 {
177 char t[20] ;
178 sprintf ( t , "%d" , a ) ;
179 return string ( t ) ;
180 }
181
182string xml_embed ( string inside , string tag , string param )
183 {
184 string ret ;
185 ret = "<" + tag ;
186 if ( param != "" ) ret += " " + param ;
187 if ( inside == "" ) return ret + "/>" ;
188 return ret + ">" + trim ( inside ) + "</" + tag + ">" ;
189 }
190
191string xml_params ( string l ) // Yes, this function is thin...
192 {
193 string ret ;
194 vector <string> params ;
195 while ( l != "" )
196 {
197 int p = find_next_unquoted ( ' ' , l ) ;
198 string first ;
199 if ( p == -1 )
200 {
201 first = l ;
202 l = "" ;
203 }
204 else
205 {
206 first = left ( l , p ) ;
207 l = l.substr ( p , l.length() - p ) ;
208 }
209 first = trim ( first ) ;
210 l = trim ( l ) ;
211 if ( first == "" ) continue ;
212
213 p = find_next_unquoted ( '=' , first ) ;
214 if ( p == -1 ) first = xml_embed ( first , "value" ) ;
215 else
216 {
217 first = xml_embed ( left ( first , p ) , "key" ) +
218 xml_embed ( first.substr ( p + 1 , first.length() - p ) , "value" ) ;
219 }
220 first = xml_embed ( first , "wikiparameter" ) ;
221 ret += first ;
222 }
223 return ret ;
224 }
225
Note: See TracBrowser for help on using the repository browser.