Last change
on this file since 25141 was 25141, checked in by papitha, 12 years ago |
NGRAMJ PERL MODULE ADDED /MAORI LANGUAGE GUESSING WORKING WELL!!
|
File size:
4.5 KB
|
Line | |
---|
1 | <?php
|
---|
2 | /**
|
---|
3 | * This file is to compensate for a bug in PHP4 and early PHP5 versions
|
---|
4 | * which do not replace some entities correctly
|
---|
5 | */
|
---|
6 |
|
---|
7 | $html_named_entities_mapping_mine = array (
|
---|
8 | // Obtained with:
|
---|
9 | // less /usr/share/xml/entities/xhtml/*.ent|grep '^<!ENTITY'|sed -e 's/^<\!ENTITY[ \t]*\([A-Za-z0-9]*\)[ \t]*"&#\([0-9]*\);".*$/"\1"=>\2,/' > /home/dom/data/2005/04/entities-table
|
---|
10 | 'nbsp'=>160,
|
---|
11 | 'iexcl'=>161,
|
---|
12 | 'cent'=>162,
|
---|
13 | 'pound'=>163,
|
---|
14 | 'curren'=>164,
|
---|
15 | 'yen'=>165,
|
---|
16 | 'brvbar'=>166,
|
---|
17 | 'sect'=>167,
|
---|
18 | 'uml'=>168,
|
---|
19 | 'copy'=>169,
|
---|
20 | 'ordf'=>170,
|
---|
21 | 'laquo'=>171,
|
---|
22 | 'not'=>172,
|
---|
23 | 'shy'=>173,
|
---|
24 | 'reg'=>174,
|
---|
25 | 'macr'=>175,
|
---|
26 | 'deg'=>176,
|
---|
27 | 'plusmn'=>177,
|
---|
28 | 'sup2'=>178,
|
---|
29 | 'sup3'=>179,
|
---|
30 | 'acute'=>180,
|
---|
31 | 'micro'=>181,
|
---|
32 | 'para'=>182,
|
---|
33 | 'middot'=>183,
|
---|
34 | 'cedil'=>184,
|
---|
35 | 'sup1'=>185,
|
---|
36 | 'ordm'=>186,
|
---|
37 | 'raquo'=>187,
|
---|
38 | 'frac14'=>188,
|
---|
39 | 'frac12'=>189,
|
---|
40 | 'frac34'=>190,
|
---|
41 | 'iquest'=>191,
|
---|
42 | 'Agrave'=>192,
|
---|
43 | 'Aacute'=>193,
|
---|
44 | 'Acirc'=>194,
|
---|
45 | 'Atilde'=>195,
|
---|
46 | 'Auml'=>196,
|
---|
47 | 'Aring'=>197,
|
---|
48 | 'AElig'=>198,
|
---|
49 | 'Ccedil'=>199,
|
---|
50 | 'Egrave'=>200,
|
---|
51 | 'Eacute'=>201,
|
---|
52 | 'Ecirc'=>202,
|
---|
53 | 'Euml'=>203,
|
---|
54 | 'Igrave'=>204,
|
---|
55 | 'Iacute'=>205,
|
---|
56 | 'Icirc'=>206,
|
---|
57 | 'Iuml'=>207,
|
---|
58 | 'ETH'=>208,
|
---|
59 | 'Ntilde'=>209,
|
---|
60 | 'Ograve'=>210,
|
---|
61 | 'Oacute'=>211,
|
---|
62 | 'Ocirc'=>212,
|
---|
63 | 'Otilde'=>213,
|
---|
64 | 'Ouml'=>214,
|
---|
65 | 'times'=>215,
|
---|
66 | 'Oslash'=>216,
|
---|
67 | 'Ugrave'=>217,
|
---|
68 | 'Uacute'=>218,
|
---|
69 | 'Ucirc'=>219,
|
---|
70 | 'Uuml'=>220,
|
---|
71 | 'Yacute'=>221,
|
---|
72 | 'THORN'=>222,
|
---|
73 | 'szlig'=>223,
|
---|
74 | 'agrave'=>224,
|
---|
75 | 'aacute'=>225,
|
---|
76 | 'acirc'=>226,
|
---|
77 | 'atilde'=>227,
|
---|
78 | 'auml'=>228,
|
---|
79 | 'aring'=>229,
|
---|
80 | 'aelig'=>230,
|
---|
81 | 'ccedil'=>231,
|
---|
82 | 'egrave'=>232,
|
---|
83 | 'eacute'=>233,
|
---|
84 | 'ecirc'=>234,
|
---|
85 | 'euml'=>235,
|
---|
86 | 'igrave'=>236,
|
---|
87 | 'iacute'=>237,
|
---|
88 | 'icirc'=>238,
|
---|
89 | 'iuml'=>239,
|
---|
90 | 'eth'=>240,
|
---|
91 | 'ntilde'=>241,
|
---|
92 | 'ograve'=>242,
|
---|
93 | 'oacute'=>243,
|
---|
94 | 'ocirc'=>244,
|
---|
95 | 'otilde'=>245,
|
---|
96 | 'ouml'=>246,
|
---|
97 | 'divide'=>247,
|
---|
98 | 'oslash'=>248,
|
---|
99 | 'ugrave'=>249,
|
---|
100 | 'uacute'=>250,
|
---|
101 | 'ucirc'=>251,
|
---|
102 | 'uuml'=>252,
|
---|
103 | 'yacute'=>253,
|
---|
104 | 'thorn'=>254,
|
---|
105 | 'yuml'=>255,
|
---|
106 | 'quot'=>34,
|
---|
107 | 'amp'=>38,
|
---|
108 | 'lt'=>60,
|
---|
109 | 'gt'=>62,
|
---|
110 | 'OElig'=>338,
|
---|
111 | 'oelig'=>339,
|
---|
112 | 'Scaron'=>352,
|
---|
113 | 'scaron'=>353,
|
---|
114 | 'Yuml'=>376,
|
---|
115 | 'circ'=>710,
|
---|
116 | 'tilde'=>732,
|
---|
117 | 'ensp'=>8194,
|
---|
118 | 'emsp'=>8195,
|
---|
119 | 'thinsp'=>8201,
|
---|
120 | 'zwnj'=>8204,
|
---|
121 | 'zwj'=>8205,
|
---|
122 | 'lrm'=>8206,
|
---|
123 | 'rlm'=>8207,
|
---|
124 | 'ndash'=>8211,
|
---|
125 | 'mdash'=>8212,
|
---|
126 | 'lsquo'=>8216,
|
---|
127 | 'rsquo'=>8217,
|
---|
128 | 'sbquo'=>8218,
|
---|
129 | 'ldquo'=>8220,
|
---|
130 | 'rdquo'=>8221,
|
---|
131 | 'bdquo'=>8222,
|
---|
132 | 'dagger'=>8224,
|
---|
133 | 'Dagger'=>8225,
|
---|
134 | 'permil'=>8240,
|
---|
135 | 'lsaquo'=>8249,
|
---|
136 | 'rsaquo'=>8250,
|
---|
137 | 'euro'=>8364,
|
---|
138 | 'fnof'=>402,
|
---|
139 | 'Alpha'=>913,
|
---|
140 | 'Beta'=>914,
|
---|
141 | 'Gamma'=>915,
|
---|
142 | 'Delta'=>916,
|
---|
143 | 'Epsilon'=>917,
|
---|
144 | 'Zeta'=>918,
|
---|
145 | 'Eta'=>919,
|
---|
146 | 'Theta'=>920,
|
---|
147 | 'Iota'=>921,
|
---|
148 | 'Kappa'=>922,
|
---|
149 | 'Lambda'=>923,
|
---|
150 | 'Mu'=>924,
|
---|
151 | 'Nu'=>925,
|
---|
152 | 'Xi'=>926,
|
---|
153 | 'Omicron'=>927,
|
---|
154 | 'Pi'=>928,
|
---|
155 | 'Rho'=>929,
|
---|
156 | 'Sigma'=>931,
|
---|
157 | 'Tau'=>932,
|
---|
158 | 'Upsilon'=>933,
|
---|
159 | 'Phi'=>934,
|
---|
160 | 'Chi'=>935,
|
---|
161 | 'Psi'=>936,
|
---|
162 | 'Omega'=>937,
|
---|
163 | 'alpha'=>945,
|
---|
164 | 'beta'=>946,
|
---|
165 | 'gamma'=>947,
|
---|
166 | 'delta'=>948,
|
---|
167 | 'epsilon'=>949,
|
---|
168 | 'zeta'=>950,
|
---|
169 | 'eta'=>951,
|
---|
170 | 'theta'=>952,
|
---|
171 | 'iota'=>953,
|
---|
172 | 'kappa'=>954,
|
---|
173 | 'lambda'=>955,
|
---|
174 | 'mu'=>956,
|
---|
175 | 'nu'=>957,
|
---|
176 | 'xi'=>958,
|
---|
177 | 'omicron'=>959,
|
---|
178 | 'pi'=>960,
|
---|
179 | 'rho'=>961,
|
---|
180 | 'sigmaf'=>962,
|
---|
181 | 'sigma'=>963,
|
---|
182 | 'tau'=>964,
|
---|
183 | 'upsilon'=>965,
|
---|
184 | 'phi'=>966,
|
---|
185 | 'chi'=>967,
|
---|
186 | 'psi'=>968,
|
---|
187 | 'omega'=>969,
|
---|
188 | 'thetasym'=>977,
|
---|
189 | 'upsih'=>978,
|
---|
190 | 'piv'=>982,
|
---|
191 | 'bull'=>8226,
|
---|
192 | 'hellip'=>8230,
|
---|
193 | 'prime'=>8242,
|
---|
194 | 'Prime'=>8243,
|
---|
195 | 'oline'=>8254,
|
---|
196 | 'frasl'=>8260,
|
---|
197 | 'weierp'=>8472,
|
---|
198 | 'image'=>8465,
|
---|
199 | 'real'=>8476,
|
---|
200 | 'trade'=>8482,
|
---|
201 | 'alefsym'=>8501,
|
---|
202 | 'larr'=>8592,
|
---|
203 | 'uarr'=>8593,
|
---|
204 | 'rarr'=>8594,
|
---|
205 | 'darr'=>8595,
|
---|
206 | 'harr'=>8596,
|
---|
207 | 'crarr'=>8629,
|
---|
208 | 'lArr'=>8656,
|
---|
209 | 'uArr'=>8657,
|
---|
210 | 'rArr'=>8658,
|
---|
211 | 'dArr'=>8659,
|
---|
212 | 'hArr'=>8660,
|
---|
213 | 'forall'=>8704,
|
---|
214 | 'part'=>8706,
|
---|
215 | 'exist'=>8707,
|
---|
216 | 'empty'=>8709,
|
---|
217 | 'nabla'=>8711,
|
---|
218 | 'isin'=>8712,
|
---|
219 | 'notin'=>8713,
|
---|
220 | 'ni'=>8715,
|
---|
221 | 'prod'=>8719,
|
---|
222 | 'sum'=>8721,
|
---|
223 | 'minus'=>8722,
|
---|
224 | 'lowast'=>8727,
|
---|
225 | 'radic'=>8730,
|
---|
226 | 'prop'=>8733,
|
---|
227 | 'infin'=>8734,
|
---|
228 | 'ang'=>8736,
|
---|
229 | 'and'=>8743,
|
---|
230 | 'or'=>8744,
|
---|
231 | 'cap'=>8745,
|
---|
232 | 'cup'=>8746,
|
---|
233 | 'int'=>8747,
|
---|
234 | 'there4'=>8756,
|
---|
235 | 'sim'=>8764,
|
---|
236 | 'cong'=>8773,
|
---|
237 | 'asymp'=>8776,
|
---|
238 | 'ne'=>8800,
|
---|
239 | 'equiv'=>8801,
|
---|
240 | 'le'=>8804,
|
---|
241 | 'ge'=>8805,
|
---|
242 | 'sub'=>8834,
|
---|
243 | 'sup'=>8835,
|
---|
244 | 'nsub'=>8836,
|
---|
245 | 'sube'=>8838,
|
---|
246 | 'supe'=>8839,
|
---|
247 | 'oplus'=>8853,
|
---|
248 | 'otimes'=>8855,
|
---|
249 | 'perp'=>8869,
|
---|
250 | 'sdot'=>8901,
|
---|
251 | 'lceil'=>8968,
|
---|
252 | 'rceil'=>8969,
|
---|
253 | 'lfloor'=>8970,
|
---|
254 | 'rfloor'=>8971,
|
---|
255 | 'lang'=>9001,
|
---|
256 | 'rang'=>9002,
|
---|
257 | 'loz'=>9674,
|
---|
258 | 'spades'=>9824,
|
---|
259 | 'clubs'=>9827,
|
---|
260 | 'hearts'=>9829,
|
---|
261 | 'diams'=>9830,
|
---|
262 | '32'=>32,
|
---|
263 | );
|
---|
264 |
|
---|
265 | function utf8_chr($code)
|
---|
266 | {
|
---|
267 | if($code<128) return chr($code);
|
---|
268 | else if($code<2048) return chr(($code>>6)+192).chr(($code&63)+128);
|
---|
269 | else if($code<65536) return chr(($code>>12)+224).chr((($code>>6)&63)+128).chr(($code&63)+128);
|
---|
270 | else if($code<2097152) return chr($code>>18+240).chr((($code>>12)&63)+128).chr(($code>>6)&63+128).chr($code&63+128);
|
---|
271 | }
|
---|
272 |
|
---|
273 | function filter_named_entities(&$content) {
|
---|
274 | global $html_named_entities_mapping_mine;
|
---|
275 | foreach($html_named_entities_mapping_mine as $name => $value) {
|
---|
276 | $content=str_replace('&'.$name.';',utf8_chr ( $value ),$content);
|
---|
277 | }
|
---|
278 | $content=str_replace('í','i',$content); # Ugly hack
|
---|
279 | }
|
---|
280 |
|
---|
281 | ?>
|
---|
Note:
See
TracBrowser
for help on using the repository browser.