1 | <?php
|
---|
2 | /**
|
---|
3 | * UTF8 helper functions
|
---|
4 | *
|
---|
5 | * @license LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html)
|
---|
6 | * @author Andreas Gohr <[email protected]>
|
---|
7 | */
|
---|
8 |
|
---|
9 | /**
|
---|
10 | * check for mb_string support
|
---|
11 | */
|
---|
12 | if(!defined('UTF8_MBSTRING')){
|
---|
13 | if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){
|
---|
14 | define('UTF8_MBSTRING',1);
|
---|
15 | }else{
|
---|
16 | define('UTF8_MBSTRING',0);
|
---|
17 | }
|
---|
18 | }
|
---|
19 |
|
---|
20 | if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); }
|
---|
21 |
|
---|
22 | if(!function_exists('utf8_isASCII')){
|
---|
23 | /**
|
---|
24 | * Checks if a string contains 7bit ASCII only
|
---|
25 | *
|
---|
26 | * @author Andreas Haerter <[email protected]>
|
---|
27 | */
|
---|
28 | function utf8_isASCII($str){
|
---|
29 | return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1);
|
---|
30 | }
|
---|
31 | }
|
---|
32 |
|
---|
33 | if(!function_exists('utf8_strip')){
|
---|
34 | /**
|
---|
35 | * Strips all highbyte chars
|
---|
36 | *
|
---|
37 | * Returns a pure ASCII7 string
|
---|
38 | *
|
---|
39 | * @author Andreas Gohr <[email protected]>
|
---|
40 | */
|
---|
41 | function utf8_strip($str){
|
---|
42 | $ascii = '';
|
---|
43 | $len = strlen($str);
|
---|
44 | for($i=0; $i<$len; $i++){
|
---|
45 | if(ord($str{$i}) <128){
|
---|
46 | $ascii .= $str{$i};
|
---|
47 | }
|
---|
48 | }
|
---|
49 | return $ascii;
|
---|
50 | }
|
---|
51 | }
|
---|
52 |
|
---|
53 | if(!function_exists('utf8_check')){
|
---|
54 | /**
|
---|
55 | * Tries to detect if a string is in Unicode encoding
|
---|
56 | *
|
---|
57 | * @author <[email protected]>
|
---|
58 | * @link http://www.php.net/manual/en/function.utf8-encode.php
|
---|
59 | */
|
---|
60 | function utf8_check($Str) {
|
---|
61 | $len = strlen($Str);
|
---|
62 | for ($i=0; $i<$len; $i++) {
|
---|
63 | $b = ord($Str[$i]);
|
---|
64 | if ($b < 0x80) continue; # 0bbbbbbb
|
---|
65 | elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb
|
---|
66 | elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb
|
---|
67 | elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb
|
---|
68 | elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb
|
---|
69 | elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b
|
---|
70 | else return false; # Does not match any model
|
---|
71 |
|
---|
72 | for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
|
---|
73 | if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80))
|
---|
74 | return false;
|
---|
75 | }
|
---|
76 | }
|
---|
77 | return true;
|
---|
78 | }
|
---|
79 | }
|
---|
80 |
|
---|
81 | if(!function_exists('utf8_strlen')){
|
---|
82 | /**
|
---|
83 | * Unicode aware replacement for strlen()
|
---|
84 | *
|
---|
85 | * utf8_decode() converts characters that are not in ISO-8859-1
|
---|
86 | * to '?', which, for the purpose of counting, is alright - It's
|
---|
87 | * even faster than mb_strlen.
|
---|
88 | *
|
---|
89 | * @author <chernyshevsky at hotmail dot com>
|
---|
90 | * @see strlen()
|
---|
91 | * @see utf8_decode()
|
---|
92 | */
|
---|
93 | function utf8_strlen($string){
|
---|
94 | return strlen(utf8_decode($string));
|
---|
95 | }
|
---|
96 | }
|
---|
97 |
|
---|
98 | if(!function_exists('utf8_substr')){
|
---|
99 | /**
|
---|
100 | * UTF-8 aware alternative to substr
|
---|
101 | *
|
---|
102 | * Return part of a string given character offset (and optionally length)
|
---|
103 | *
|
---|
104 | * @author Harry Fuecks <[email protected]>
|
---|
105 | * @author Chris Smith <[email protected]>
|
---|
106 | * @param string
|
---|
107 | * @param integer number of UTF-8 characters offset (from left)
|
---|
108 | * @param integer (optional) length in UTF-8 characters from offset
|
---|
109 | * @return mixed string or false if failure
|
---|
110 | */
|
---|
111 | function utf8_substr($str, $offset, $length = null) {
|
---|
112 | if(UTF8_MBSTRING){
|
---|
113 | if( $length === null ){
|
---|
114 | return mb_substr($str, $offset);
|
---|
115 | }else{
|
---|
116 | return mb_substr($str, $offset, $length);
|
---|
117 | }
|
---|
118 | }
|
---|
119 |
|
---|
120 | /*
|
---|
121 | * Notes:
|
---|
122 | *
|
---|
123 | * no mb string support, so we'll use pcre regex's with 'u' flag
|
---|
124 | * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
|
---|
125 | * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
|
---|
126 | *
|
---|
127 | * substr documentation states false can be returned in some cases (e.g. offset > string length)
|
---|
128 | * mb_substr never returns false, it will return an empty string instead.
|
---|
129 | *
|
---|
130 | * calculating the number of characters in the string is a relatively expensive operation, so
|
---|
131 | * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
|
---|
132 | */
|
---|
133 |
|
---|
134 | // cast parameters to appropriate types to avoid multiple notices/warnings
|
---|
135 | $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects
|
---|
136 | $offset = (int)$offset;
|
---|
137 | if (!is_null($length)) $length = (int)$length;
|
---|
138 |
|
---|
139 | // handle trivial cases
|
---|
140 | if ($length === 0) return '';
|
---|
141 | if ($offset < 0 && $length < 0 && $length < $offset) return '';
|
---|
142 |
|
---|
143 | $offset_pattern = '';
|
---|
144 | $length_pattern = '';
|
---|
145 |
|
---|
146 | // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
|
---|
147 | if ($offset < 0) {
|
---|
148 | $strlen = strlen(utf8_decode($str)); // see notes
|
---|
149 | $offset = $strlen + $offset;
|
---|
150 | if ($offset < 0) $offset = 0;
|
---|
151 | }
|
---|
152 |
|
---|
153 | // establish a pattern for offset, a non-captured group equal in length to offset
|
---|
154 | if ($offset > 0) {
|
---|
155 | $Ox = (int)($offset/65535);
|
---|
156 | $Oy = $offset%65535;
|
---|
157 |
|
---|
158 | if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}';
|
---|
159 | $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})';
|
---|
160 | } else {
|
---|
161 | $offset_pattern = '^'; // offset == 0; just anchor the pattern
|
---|
162 | }
|
---|
163 |
|
---|
164 | // establish a pattern for length
|
---|
165 | if (is_null($length)) {
|
---|
166 | $length_pattern = '(.*)$'; // the rest of the string
|
---|
167 | } else {
|
---|
168 |
|
---|
169 | if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes
|
---|
170 | if ($offset > $strlen) return ''; // another trivial case
|
---|
171 |
|
---|
172 | if ($length > 0) {
|
---|
173 |
|
---|
174 | $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string
|
---|
175 |
|
---|
176 | $Lx = (int)($length/65535);
|
---|
177 | $Ly = $length%65535;
|
---|
178 |
|
---|
179 | // +ve length requires ... a captured group of length characters
|
---|
180 | if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}';
|
---|
181 | $length_pattern = '('.$length_pattern.'.{'.$Ly.'})';
|
---|
182 |
|
---|
183 | } else if ($length < 0) {
|
---|
184 |
|
---|
185 | if ($length < ($offset - $strlen)) return '';
|
---|
186 |
|
---|
187 | $Lx = (int)((-$length)/65535);
|
---|
188 | $Ly = (-$length)%65535;
|
---|
189 |
|
---|
190 | // -ve length requires ... capture everything except a group of -length characters
|
---|
191 | // anchored at the tail-end of the string
|
---|
192 | if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}';
|
---|
193 | $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$';
|
---|
194 | }
|
---|
195 | }
|
---|
196 |
|
---|
197 | if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return '';
|
---|
198 | return $match[1];
|
---|
199 | }
|
---|
200 | }
|
---|
201 |
|
---|
202 | if(!function_exists('utf8_substr_replace')){
|
---|
203 | /**
|
---|
204 | * Unicode aware replacement for substr_replace()
|
---|
205 | *
|
---|
206 | * @author Andreas Gohr <[email protected]>
|
---|
207 | * @see substr_replace()
|
---|
208 | */
|
---|
209 | function utf8_substr_replace($string, $replacement, $start , $length=0 ){
|
---|
210 | $ret = '';
|
---|
211 | if($start>0) $ret .= utf8_substr($string, 0, $start);
|
---|
212 | $ret .= $replacement;
|
---|
213 | $ret .= utf8_substr($string, $start+$length);
|
---|
214 | return $ret;
|
---|
215 | }
|
---|
216 | }
|
---|
217 |
|
---|
218 | if(!function_exists('utf8_ltrim')){
|
---|
219 | /**
|
---|
220 | * Unicode aware replacement for ltrim()
|
---|
221 | *
|
---|
222 | * @author Andreas Gohr <[email protected]>
|
---|
223 | * @see ltrim()
|
---|
224 | * @return string
|
---|
225 | */
|
---|
226 | function utf8_ltrim($str,$charlist=''){
|
---|
227 | if($charlist == '') return ltrim($str);
|
---|
228 |
|
---|
229 | //quote charlist for use in a characterclass
|
---|
230 | $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist);
|
---|
231 |
|
---|
232 | return preg_replace('/^['.$charlist.']+/u','',$str);
|
---|
233 | }
|
---|
234 | }
|
---|
235 |
|
---|
236 | if(!function_exists('utf8_rtrim')){
|
---|
237 | /**
|
---|
238 | * Unicode aware replacement for rtrim()
|
---|
239 | *
|
---|
240 | * @author Andreas Gohr <[email protected]>
|
---|
241 | * @see rtrim()
|
---|
242 | * @return string
|
---|
243 | */
|
---|
244 | function utf8_rtrim($str,$charlist=''){
|
---|
245 | if($charlist == '') return rtrim($str);
|
---|
246 |
|
---|
247 | //quote charlist for use in a characterclass
|
---|
248 | $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist);
|
---|
249 |
|
---|
250 | return preg_replace('/['.$charlist.']+$/u','',$str);
|
---|
251 | }
|
---|
252 | }
|
---|
253 |
|
---|
254 | if(!function_exists('utf8_trim')){
|
---|
255 | /**
|
---|
256 | * Unicode aware replacement for trim()
|
---|
257 | *
|
---|
258 | * @author Andreas Gohr <[email protected]>
|
---|
259 | * @see trim()
|
---|
260 | * @return string
|
---|
261 | */
|
---|
262 | function utf8_trim($str,$charlist='') {
|
---|
263 | if($charlist == '') return trim($str);
|
---|
264 |
|
---|
265 | return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist);
|
---|
266 | }
|
---|
267 | }
|
---|
268 |
|
---|
269 | if(!function_exists('utf8_strtolower')){
|
---|
270 | /**
|
---|
271 | * This is a unicode aware replacement for strtolower()
|
---|
272 | *
|
---|
273 | * Uses mb_string extension if available
|
---|
274 | *
|
---|
275 | * @author Leo Feyer <[email protected]>
|
---|
276 | * @see strtolower()
|
---|
277 | * @see utf8_strtoupper()
|
---|
278 | */
|
---|
279 | function utf8_strtolower($string){
|
---|
280 | if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8');
|
---|
281 |
|
---|
282 | global $UTF8_UPPER_TO_LOWER;
|
---|
283 | return strtr($string,$UTF8_UPPER_TO_LOWER);
|
---|
284 | }
|
---|
285 | }
|
---|
286 |
|
---|
287 | if(!function_exists('utf8_strtoupper')){
|
---|
288 | /**
|
---|
289 | * This is a unicode aware replacement for strtoupper()
|
---|
290 | *
|
---|
291 | * Uses mb_string extension if available
|
---|
292 | *
|
---|
293 | * @author Leo Feyer <[email protected]>
|
---|
294 | * @see strtoupper()
|
---|
295 | * @see utf8_strtoupper()
|
---|
296 | */
|
---|
297 | function utf8_strtoupper($string){
|
---|
298 | if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8');
|
---|
299 |
|
---|
300 | global $UTF8_LOWER_TO_UPPER;
|
---|
301 | return strtr($string,$UTF8_LOWER_TO_UPPER);
|
---|
302 | }
|
---|
303 | }
|
---|
304 |
|
---|
305 | if(!function_exists('utf8_ucfirst')){
|
---|
306 | /**
|
---|
307 | * UTF-8 aware alternative to ucfirst
|
---|
308 | * Make a string's first character uppercase
|
---|
309 | *
|
---|
310 | * @author Harry Fuecks
|
---|
311 | * @param string
|
---|
312 | * @return string with first character as upper case (if applicable)
|
---|
313 | */
|
---|
314 | function utf8_ucfirst($str){
|
---|
315 | switch ( utf8_strlen($str) ) {
|
---|
316 | case 0:
|
---|
317 | return '';
|
---|
318 | case 1:
|
---|
319 | return utf8_strtoupper($str);
|
---|
320 | default:
|
---|
321 | preg_match('/^(.{1})(.*)$/us', $str, $matches);
|
---|
322 | return utf8_strtoupper($matches[1]).$matches[2];
|
---|
323 | }
|
---|
324 | }
|
---|
325 | }
|
---|
326 |
|
---|
327 | if(!function_exists('utf8_ucwords')){
|
---|
328 | /**
|
---|
329 | * UTF-8 aware alternative to ucwords
|
---|
330 | * Uppercase the first character of each word in a string
|
---|
331 | *
|
---|
332 | * @author Harry Fuecks
|
---|
333 | * @param string
|
---|
334 | * @return string with first char of each word uppercase
|
---|
335 | * @see http://www.php.net/ucwords
|
---|
336 | */
|
---|
337 | function utf8_ucwords($str) {
|
---|
338 | // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
|
---|
339 | // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
|
---|
340 | // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords
|
---|
341 | $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
|
---|
342 |
|
---|
343 | return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str);
|
---|
344 | }
|
---|
345 |
|
---|
346 | /**
|
---|
347 | * Callback function for preg_replace_callback call in utf8_ucwords
|
---|
348 | * You don't need to call this yourself
|
---|
349 | *
|
---|
350 | * @author Harry Fuecks
|
---|
351 | * @param array of matches corresponding to a single word
|
---|
352 | * @return string with first char of the word in uppercase
|
---|
353 | * @see utf8_ucwords
|
---|
354 | * @see utf8_strtoupper
|
---|
355 | */
|
---|
356 | function utf8_ucwords_callback($matches) {
|
---|
357 | $leadingws = $matches[2];
|
---|
358 | $ucfirst = utf8_strtoupper($matches[3]);
|
---|
359 | $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1);
|
---|
360 | return $leadingws . $ucword;
|
---|
361 | }
|
---|
362 | }
|
---|
363 |
|
---|
364 | if(!function_exists('utf8_deaccent')){
|
---|
365 | /**
|
---|
366 | * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents
|
---|
367 | *
|
---|
368 | * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
|
---|
369 | * letters. Default is to deaccent both cases ($case = 0)
|
---|
370 | *
|
---|
371 | * @author Andreas Gohr <[email protected]>
|
---|
372 | */
|
---|
373 | function utf8_deaccent($string,$case=0){
|
---|
374 | if($case <= 0){
|
---|
375 | global $UTF8_LOWER_ACCENTS;
|
---|
376 | $string = strtr($string,$UTF8_LOWER_ACCENTS);
|
---|
377 | }
|
---|
378 | if($case >= 0){
|
---|
379 | global $UTF8_UPPER_ACCENTS;
|
---|
380 | $string = strtr($string,$UTF8_UPPER_ACCENTS);
|
---|
381 | }
|
---|
382 | return $string;
|
---|
383 | }
|
---|
384 | }
|
---|
385 |
|
---|
386 | if(!function_exists('utf8_romanize')){
|
---|
387 | /**
|
---|
388 | * Romanize a non-latin string
|
---|
389 | *
|
---|
390 | * @author Andreas Gohr <[email protected]>
|
---|
391 | */
|
---|
392 | function utf8_romanize($string){
|
---|
393 | if(utf8_isASCII($string)) return $string; //nothing to do
|
---|
394 |
|
---|
395 | global $UTF8_ROMANIZATION;
|
---|
396 | return strtr($string,$UTF8_ROMANIZATION);
|
---|
397 | }
|
---|
398 | }
|
---|
399 |
|
---|
400 | if(!function_exists('utf8_stripspecials')){
|
---|
401 | /**
|
---|
402 | * Removes special characters (nonalphanumeric) from a UTF-8 string
|
---|
403 | *
|
---|
404 | * This function adds the controlchars 0x00 to 0x19 to the array of
|
---|
405 | * stripped chars (they are not included in $UTF8_SPECIAL_CHARS)
|
---|
406 | *
|
---|
407 | * @author Andreas Gohr <[email protected]>
|
---|
408 | * @param string $string The UTF8 string to strip of special chars
|
---|
409 | * @param string $repl Replace special with this string
|
---|
410 | * @param string $additional Additional chars to strip (used in regexp char class)
|
---|
411 | */
|
---|
412 | function utf8_stripspecials($string,$repl='',$additional=''){
|
---|
413 | global $UTF8_SPECIAL_CHARS;
|
---|
414 | global $UTF8_SPECIAL_CHARS2;
|
---|
415 |
|
---|
416 | static $specials = null;
|
---|
417 | if(is_null($specials)){
|
---|
418 | #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/');
|
---|
419 | $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/');
|
---|
420 | }
|
---|
421 |
|
---|
422 | return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string);
|
---|
423 | }
|
---|
424 | }
|
---|
425 |
|
---|
426 | if(!function_exists('utf8_strpos')){
|
---|
427 | /**
|
---|
428 | * This is an Unicode aware replacement for strpos
|
---|
429 | *
|
---|
430 | * @author Leo Feyer <[email protected]>
|
---|
431 | * @see strpos()
|
---|
432 | * @param string
|
---|
433 | * @param string
|
---|
434 | * @param integer
|
---|
435 | * @return integer
|
---|
436 | */
|
---|
437 | function utf8_strpos($haystack, $needle, $offset=0){
|
---|
438 | $comp = 0;
|
---|
439 | $length = null;
|
---|
440 |
|
---|
441 | while (is_null($length) || $length < $offset) {
|
---|
442 | $pos = strpos($haystack, $needle, $offset + $comp);
|
---|
443 |
|
---|
444 | if ($pos === false)
|
---|
445 | return false;
|
---|
446 |
|
---|
447 | $length = utf8_strlen(substr($haystack, 0, $pos));
|
---|
448 |
|
---|
449 | if ($length < $offset)
|
---|
450 | $comp = $pos - $length;
|
---|
451 | }
|
---|
452 |
|
---|
453 | return $length;
|
---|
454 | }
|
---|
455 | }
|
---|
456 |
|
---|
457 | if(!function_exists('utf8_tohtml')){
|
---|
458 | /**
|
---|
459 | * Encodes UTF-8 characters to HTML entities
|
---|
460 | *
|
---|
461 | * @author Tom N Harris <[email protected]>
|
---|
462 | * @author <vpribish at shopping dot com>
|
---|
463 | * @link http://www.php.net/manual/en/function.utf8-decode.php
|
---|
464 | */
|
---|
465 | function utf8_tohtml ($str) {
|
---|
466 | $ret = '';
|
---|
467 | foreach (utf8_to_unicode($str) as $cp) {
|
---|
468 | if ($cp < 0x80)
|
---|
469 | $ret .= chr($cp);
|
---|
470 | elseif ($cp < 0x100)
|
---|
471 | $ret .= "&#$cp;";
|
---|
472 | else
|
---|
473 | $ret .= '&#x'.dechex($cp).';';
|
---|
474 | }
|
---|
475 | return $ret;
|
---|
476 | }
|
---|
477 | }
|
---|
478 |
|
---|
479 | if(!function_exists('utf8_unhtml')){
|
---|
480 | /**
|
---|
481 | * Decodes HTML entities to UTF-8 characters
|
---|
482 | *
|
---|
483 | * Convert any &#..; entity to a codepoint,
|
---|
484 | * The entities flag defaults to only decoding numeric entities.
|
---|
485 | * Pass HTML_ENTITIES and named entities, including & < etc.
|
---|
486 | * are handled as well. Avoids the problem that would occur if you
|
---|
487 | * had to decode "&#38;&amp;#38;"
|
---|
488 | *
|
---|
489 | * unhtmlspecialchars(utf8_unhtml($s)) -> "&&"
|
---|
490 | * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;"
|
---|
491 | * what it should be -> "&&#38;"
|
---|
492 | *
|
---|
493 | * @author Tom N Harris <[email protected]>
|
---|
494 | * @param string $str UTF-8 encoded string
|
---|
495 | * @param boolean $entities Flag controlling decoding of named entities.
|
---|
496 | * @return UTF-8 encoded string with numeric (and named) entities replaced.
|
---|
497 | */
|
---|
498 | function utf8_unhtml($str, $entities=null) {
|
---|
499 | static $decoder = null;
|
---|
500 | if (is_null($decoder))
|
---|
501 | $decoder = new utf8_entity_decoder();
|
---|
502 | if (is_null($entities))
|
---|
503 | return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m',
|
---|
504 | 'utf8_decode_numeric', $str);
|
---|
505 | else
|
---|
506 | return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m',
|
---|
507 | array(&$decoder, 'decode'), $str);
|
---|
508 | }
|
---|
509 | }
|
---|
510 |
|
---|
511 | if(!function_exists('utf8_decode_numeric')){
|
---|
512 | function utf8_decode_numeric($ent) {
|
---|
513 | switch ($ent[2]) {
|
---|
514 | case 'X':
|
---|
515 | case 'x':
|
---|
516 | $cp = hexdec($ent[3]);
|
---|
517 | break;
|
---|
518 | default:
|
---|
519 | $cp = intval($ent[3]);
|
---|
520 | break;
|
---|
521 | }
|
---|
522 | return unicode_to_utf8(array($cp));
|
---|
523 | }
|
---|
524 | }
|
---|
525 |
|
---|
526 | if(!class_exists('utf8_entity_decoder')){
|
---|
527 | class utf8_entity_decoder {
|
---|
528 | var $table;
|
---|
529 | function utf8_entity_decoder() {
|
---|
530 | $table = get_html_translation_table(HTML_ENTITIES);
|
---|
531 | $table = array_flip($table);
|
---|
532 | $this->table = array_map(array(&$this,'makeutf8'), $table);
|
---|
533 | }
|
---|
534 | function makeutf8($c) {
|
---|
535 | return unicode_to_utf8(array(ord($c)));
|
---|
536 | }
|
---|
537 | function decode($ent) {
|
---|
538 | if ($ent[1] == '#') {
|
---|
539 | return utf8_decode_numeric($ent);
|
---|
540 | } elseif (array_key_exists($ent[0],$this->table)) {
|
---|
541 | return $this->table[$ent[0]];
|
---|
542 | } else {
|
---|
543 | return $ent[0];
|
---|
544 | }
|
---|
545 | }
|
---|
546 | }
|
---|
547 | }
|
---|
548 |
|
---|
549 | if(!function_exists('utf8_to_unicode')){
|
---|
550 | /**
|
---|
551 | * Takes an UTF-8 string and returns an array of ints representing the
|
---|
552 | * Unicode characters. Astral planes are supported ie. the ints in the
|
---|
553 | * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
|
---|
554 | * are not allowed.
|
---|
555 | *
|
---|
556 | * If $strict is set to true the function returns false if the input
|
---|
557 | * string isn't a valid UTF-8 octet sequence and raises a PHP error at
|
---|
558 | * level E_USER_WARNING
|
---|
559 | *
|
---|
560 | * Note: this function has been modified slightly in this library to
|
---|
561 | * trigger errors on encountering bad bytes
|
---|
562 | *
|
---|
563 | * @author <[email protected]>
|
---|
564 | * @author Harry Fuecks <[email protected]>
|
---|
565 | * @param string UTF-8 encoded string
|
---|
566 | * @param boolean Check for invalid sequences?
|
---|
567 | * @return mixed array of unicode code points or false if UTF-8 invalid
|
---|
568 | * @see unicode_to_utf8
|
---|
569 | * @link http://hsivonen.iki.fi/php-utf8/
|
---|
570 | * @link http://sourceforge.net/projects/phputf8/
|
---|
571 | */
|
---|
572 | function utf8_to_unicode($str,$strict=false) {
|
---|
573 | $mState = 0; // cached expected number of octets after the current octet
|
---|
574 | // until the beginning of the next UTF8 character sequence
|
---|
575 | $mUcs4 = 0; // cached Unicode character
|
---|
576 | $mBytes = 1; // cached expected number of octets in the current sequence
|
---|
577 |
|
---|
578 | $out = array();
|
---|
579 |
|
---|
580 | $len = strlen($str);
|
---|
581 |
|
---|
582 | for($i = 0; $i < $len; $i++) {
|
---|
583 |
|
---|
584 | $in = ord($str{$i});
|
---|
585 |
|
---|
586 | if ( $mState == 0) {
|
---|
587 |
|
---|
588 | // When mState is zero we expect either a US-ASCII character or a
|
---|
589 | // multi-octet sequence.
|
---|
590 | if (0 == (0x80 & ($in))) {
|
---|
591 | // US-ASCII, pass straight through.
|
---|
592 | $out[] = $in;
|
---|
593 | $mBytes = 1;
|
---|
594 |
|
---|
595 | } else if (0xC0 == (0xE0 & ($in))) {
|
---|
596 | // First octet of 2 octet sequence
|
---|
597 | $mUcs4 = ($in);
|
---|
598 | $mUcs4 = ($mUcs4 & 0x1F) << 6;
|
---|
599 | $mState = 1;
|
---|
600 | $mBytes = 2;
|
---|
601 |
|
---|
602 | } else if (0xE0 == (0xF0 & ($in))) {
|
---|
603 | // First octet of 3 octet sequence
|
---|
604 | $mUcs4 = ($in);
|
---|
605 | $mUcs4 = ($mUcs4 & 0x0F) << 12;
|
---|
606 | $mState = 2;
|
---|
607 | $mBytes = 3;
|
---|
608 |
|
---|
609 | } else if (0xF0 == (0xF8 & ($in))) {
|
---|
610 | // First octet of 4 octet sequence
|
---|
611 | $mUcs4 = ($in);
|
---|
612 | $mUcs4 = ($mUcs4 & 0x07) << 18;
|
---|
613 | $mState = 3;
|
---|
614 | $mBytes = 4;
|
---|
615 |
|
---|
616 | } else if (0xF8 == (0xFC & ($in))) {
|
---|
617 | /* First octet of 5 octet sequence.
|
---|
618 | *
|
---|
619 | * This is illegal because the encoded codepoint must be either
|
---|
620 | * (a) not the shortest form or
|
---|
621 | * (b) outside the Unicode range of 0-0x10FFFF.
|
---|
622 | * Rather than trying to resynchronize, we will carry on until the end
|
---|
623 | * of the sequence and let the later error handling code catch it.
|
---|
624 | */
|
---|
625 | $mUcs4 = ($in);
|
---|
626 | $mUcs4 = ($mUcs4 & 0x03) << 24;
|
---|
627 | $mState = 4;
|
---|
628 | $mBytes = 5;
|
---|
629 |
|
---|
630 | } else if (0xFC == (0xFE & ($in))) {
|
---|
631 | // First octet of 6 octet sequence, see comments for 5 octet sequence.
|
---|
632 | $mUcs4 = ($in);
|
---|
633 | $mUcs4 = ($mUcs4 & 1) << 30;
|
---|
634 | $mState = 5;
|
---|
635 | $mBytes = 6;
|
---|
636 |
|
---|
637 | } elseif($strict) {
|
---|
638 | /* Current octet is neither in the US-ASCII range nor a legal first
|
---|
639 | * octet of a multi-octet sequence.
|
---|
640 | */
|
---|
641 | trigger_error(
|
---|
642 | 'utf8_to_unicode: Illegal sequence identifier '.
|
---|
643 | 'in UTF-8 at byte '.$i,
|
---|
644 | E_USER_WARNING
|
---|
645 | );
|
---|
646 | return false;
|
---|
647 |
|
---|
648 | }
|
---|
649 |
|
---|
650 | } else {
|
---|
651 |
|
---|
652 | // When mState is non-zero, we expect a continuation of the multi-octet
|
---|
653 | // sequence
|
---|
654 | if (0x80 == (0xC0 & ($in))) {
|
---|
655 |
|
---|
656 | // Legal continuation.
|
---|
657 | $shift = ($mState - 1) * 6;
|
---|
658 | $tmp = $in;
|
---|
659 | $tmp = ($tmp & 0x0000003F) << $shift;
|
---|
660 | $mUcs4 |= $tmp;
|
---|
661 |
|
---|
662 | /**
|
---|
663 | * End of the multi-octet sequence. mUcs4 now contains the final
|
---|
664 | * Unicode codepoint to be output
|
---|
665 | */
|
---|
666 | if (0 == --$mState) {
|
---|
667 |
|
---|
668 | /*
|
---|
669 | * Check for illegal sequences and codepoints.
|
---|
670 | */
|
---|
671 | // From Unicode 3.1, non-shortest form is illegal
|
---|
672 | if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
|
---|
673 | ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
|
---|
674 | ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
|
---|
675 | (4 < $mBytes) ||
|
---|
676 | // From Unicode 3.2, surrogate characters are illegal
|
---|
677 | (($mUcs4 & 0xFFFFF800) == 0xD800) ||
|
---|
678 | // Codepoints outside the Unicode range are illegal
|
---|
679 | ($mUcs4 > 0x10FFFF)) {
|
---|
680 |
|
---|
681 | if($strict){
|
---|
682 | trigger_error(
|
---|
683 | 'utf8_to_unicode: Illegal sequence or codepoint '.
|
---|
684 | 'in UTF-8 at byte '.$i,
|
---|
685 | E_USER_WARNING
|
---|
686 | );
|
---|
687 |
|
---|
688 | return false;
|
---|
689 | }
|
---|
690 |
|
---|
691 | }
|
---|
692 |
|
---|
693 | if (0xFEFF != $mUcs4) {
|
---|
694 | // BOM is legal but we don't want to output it
|
---|
695 | $out[] = $mUcs4;
|
---|
696 | }
|
---|
697 |
|
---|
698 | //initialize UTF8 cache
|
---|
699 | $mState = 0;
|
---|
700 | $mUcs4 = 0;
|
---|
701 | $mBytes = 1;
|
---|
702 | }
|
---|
703 |
|
---|
704 | } elseif($strict) {
|
---|
705 | /**
|
---|
706 | *((0xC0 & (*in) != 0x80) && (mState != 0))
|
---|
707 | * Incomplete multi-octet sequence.
|
---|
708 | */
|
---|
709 | trigger_error(
|
---|
710 | 'utf8_to_unicode: Incomplete multi-octet '.
|
---|
711 | ' sequence in UTF-8 at byte '.$i,
|
---|
712 | E_USER_WARNING
|
---|
713 | );
|
---|
714 |
|
---|
715 | return false;
|
---|
716 | }
|
---|
717 | }
|
---|
718 | }
|
---|
719 | return $out;
|
---|
720 | }
|
---|
721 | }
|
---|
722 |
|
---|
723 | if(!function_exists('unicode_to_utf8')){
|
---|
724 | /**
|
---|
725 | * Takes an array of ints representing the Unicode characters and returns
|
---|
726 | * a UTF-8 string. Astral planes are supported ie. the ints in the
|
---|
727 | * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
|
---|
728 | * are not allowed.
|
---|
729 | *
|
---|
730 | * If $strict is set to true the function returns false if the input
|
---|
731 | * array contains ints that represent surrogates or are outside the
|
---|
732 | * Unicode range and raises a PHP error at level E_USER_WARNING
|
---|
733 | *
|
---|
734 | * Note: this function has been modified slightly in this library to use
|
---|
735 | * output buffering to concatenate the UTF-8 string (faster) as well as
|
---|
736 | * reference the array by it's keys
|
---|
737 | *
|
---|
738 | * @param array of unicode code points representing a string
|
---|
739 | * @param boolean Check for invalid sequences?
|
---|
740 | * @return mixed UTF-8 string or false if array contains invalid code points
|
---|
741 | * @author <[email protected]>
|
---|
742 | * @author Harry Fuecks <[email protected]>
|
---|
743 | * @see utf8_to_unicode
|
---|
744 | * @link http://hsivonen.iki.fi/php-utf8/
|
---|
745 | * @link http://sourceforge.net/projects/phputf8/
|
---|
746 | */
|
---|
747 | function unicode_to_utf8($arr,$strict=false) {
|
---|
748 | if (!is_array($arr)) return '';
|
---|
749 | ob_start();
|
---|
750 |
|
---|
751 | foreach (array_keys($arr) as $k) {
|
---|
752 |
|
---|
753 | if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) {
|
---|
754 | # ASCII range (including control chars)
|
---|
755 |
|
---|
756 | echo chr($arr[$k]);
|
---|
757 |
|
---|
758 | } else if ($arr[$k] <= 0x07ff) {
|
---|
759 | # 2 byte sequence
|
---|
760 |
|
---|
761 | echo chr(0xc0 | ($arr[$k] >> 6));
|
---|
762 | echo chr(0x80 | ($arr[$k] & 0x003f));
|
---|
763 |
|
---|
764 | } else if($arr[$k] == 0xFEFF) {
|
---|
765 | # Byte order mark (skip)
|
---|
766 |
|
---|
767 | // nop -- zap the BOM
|
---|
768 |
|
---|
769 | } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) {
|
---|
770 | # Test for illegal surrogates
|
---|
771 |
|
---|
772 | // found a surrogate
|
---|
773 | if($strict){
|
---|
774 | trigger_error(
|
---|
775 | 'unicode_to_utf8: Illegal surrogate '.
|
---|
776 | 'at index: '.$k.', value: '.$arr[$k],
|
---|
777 | E_USER_WARNING
|
---|
778 | );
|
---|
779 | return false;
|
---|
780 | }
|
---|
781 |
|
---|
782 | } else if ($arr[$k] <= 0xffff) {
|
---|
783 | # 3 byte sequence
|
---|
784 |
|
---|
785 | echo chr(0xe0 | ($arr[$k] >> 12));
|
---|
786 | echo chr(0x80 | (($arr[$k] >> 6) & 0x003f));
|
---|
787 | echo chr(0x80 | ($arr[$k] & 0x003f));
|
---|
788 |
|
---|
789 | } else if ($arr[$k] <= 0x10ffff) {
|
---|
790 | # 4 byte sequence
|
---|
791 |
|
---|
792 | echo chr(0xf0 | ($arr[$k] >> 18));
|
---|
793 | echo chr(0x80 | (($arr[$k] >> 12) & 0x3f));
|
---|
794 | echo chr(0x80 | (($arr[$k] >> 6) & 0x3f));
|
---|
795 | echo chr(0x80 | ($arr[$k] & 0x3f));
|
---|
796 |
|
---|
797 | } elseif($strict) {
|
---|
798 |
|
---|
799 | trigger_error(
|
---|
800 | 'unicode_to_utf8: Codepoint out of Unicode range '.
|
---|
801 | 'at index: '.$k.', value: '.$arr[$k],
|
---|
802 | E_USER_WARNING
|
---|
803 | );
|
---|
804 |
|
---|
805 | // out of range
|
---|
806 | return false;
|
---|
807 | }
|
---|
808 | }
|
---|
809 |
|
---|
810 | $result = ob_get_contents();
|
---|
811 | ob_end_clean();
|
---|
812 | return $result;
|
---|
813 | }
|
---|
814 | }
|
---|
815 |
|
---|
816 | if(!function_exists('utf8_to_utf16be')){
|
---|
817 | /**
|
---|
818 | * UTF-8 to UTF-16BE conversion.
|
---|
819 | *
|
---|
820 | * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
|
---|
821 | */
|
---|
822 | function utf8_to_utf16be(&$str, $bom = false) {
|
---|
823 | $out = $bom ? "\xFE\xFF" : '';
|
---|
824 | if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
|
---|
825 |
|
---|
826 | $uni = utf8_to_unicode($str);
|
---|
827 | foreach($uni as $cp){
|
---|
828 | $out .= pack('n',$cp);
|
---|
829 | }
|
---|
830 | return $out;
|
---|
831 | }
|
---|
832 | }
|
---|
833 |
|
---|
834 | if(!function_exists('utf16be_to_utf8')){
|
---|
835 | /**
|
---|
836 | * UTF-8 to UTF-16BE conversion.
|
---|
837 | *
|
---|
838 | * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
|
---|
839 | */
|
---|
840 | function utf16be_to_utf8(&$str) {
|
---|
841 | $uni = unpack('n*',$str);
|
---|
842 | return unicode_to_utf8($uni);
|
---|
843 | }
|
---|
844 | }
|
---|
845 |
|
---|
846 | if(!function_exists('utf8_bad_replace')){
|
---|
847 | /**
|
---|
848 | * Replace bad bytes with an alternative character
|
---|
849 | *
|
---|
850 | * ASCII character is recommended for replacement char
|
---|
851 | *
|
---|
852 | * PCRE Pattern to locate bad bytes in a UTF-8 string
|
---|
853 | * Comes from W3 FAQ: Multilingual Forms
|
---|
854 | * Note: modified to include full ASCII range including control chars
|
---|
855 | *
|
---|
856 | * @author Harry Fuecks <[email protected]>
|
---|
857 | * @see http://www.w3.org/International/questions/qa-forms-utf-8
|
---|
858 | * @param string to search
|
---|
859 | * @param string to replace bad bytes with (defaults to '?') - use ASCII
|
---|
860 | * @return string
|
---|
861 | */
|
---|
862 | function utf8_bad_replace($str, $replace = '') {
|
---|
863 | $UTF8_BAD =
|
---|
864 | '([\x00-\x7F]'. # ASCII (including control chars)
|
---|
865 | '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte
|
---|
866 | '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs
|
---|
867 | '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte
|
---|
868 | '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates
|
---|
869 | '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3
|
---|
870 | '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15
|
---|
871 | '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16
|
---|
872 | '|(.{1}))'; # invalid byte
|
---|
873 | ob_start();
|
---|
874 | while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) {
|
---|
875 | if ( !isset($matches[2])) {
|
---|
876 | echo $matches[0];
|
---|
877 | } else {
|
---|
878 | echo $replace;
|
---|
879 | }
|
---|
880 | $str = substr($str,strlen($matches[0]));
|
---|
881 | }
|
---|
882 | $result = ob_get_contents();
|
---|
883 | ob_end_clean();
|
---|
884 | return $result;
|
---|
885 | }
|
---|
886 | }
|
---|
887 |
|
---|
888 | if(!function_exists('utf8_correctIdx')){
|
---|
889 | /**
|
---|
890 | * adjust a byte index into a utf8 string to a utf8 character boundary
|
---|
891 | *
|
---|
892 | * @param $str string utf8 character string
|
---|
893 | * @param $i int byte index into $str
|
---|
894 | * @param $next bool direction to search for boundary,
|
---|
895 | * false = up (current character)
|
---|
896 | * true = down (next character)
|
---|
897 | *
|
---|
898 | * @return int byte index into $str now pointing to a utf8 character boundary
|
---|
899 | *
|
---|
900 | * @author chris smith <[email protected]>
|
---|
901 | */
|
---|
902 | function utf8_correctIdx(&$str,$i,$next=false) {
|
---|
903 |
|
---|
904 | if ($i <= 0) return 0;
|
---|
905 |
|
---|
906 | $limit = strlen($str);
|
---|
907 | if ($i>=$limit) return $limit;
|
---|
908 |
|
---|
909 | if ($next) {
|
---|
910 | while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++;
|
---|
911 | } else {
|
---|
912 | while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--;
|
---|
913 | }
|
---|
914 |
|
---|
915 | return $i;
|
---|
916 | }
|
---|
917 | }
|
---|
918 |
|
---|
919 | // only needed if no mb_string available
|
---|
920 | if(!UTF8_MBSTRING){
|
---|
921 | /**
|
---|
922 | * UTF-8 Case lookup table
|
---|
923 | *
|
---|
924 | * This lookuptable defines the upper case letters to their correspponding
|
---|
925 | * lower case letter in UTF-8
|
---|
926 | *
|
---|
927 | * @author Andreas Gohr <[email protected]>
|
---|
928 | */
|
---|
929 | global $UTF8_LOWER_TO_UPPER;
|
---|
930 | if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array(
|
---|
931 | "ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"",
|
---|
932 | "ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"ïŒ","ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"",
|
---|
933 | "ïœ"=>"","ïœ
|
---|
934 | "=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"","ïœ"=>"","ῳ"=>"á¿Œ","á¿¥"=>"Ῥ","á¿¡"=>"á¿©","á¿"=>"á¿",
|
---|
935 | "á¿"=>"á¿","á¿"=>"á¿","៟"=>"Î","៳"=>"៌","៱"=>"៹","៰"=>"៞","៧"=>"៯","៊"=>"៮","៥"=>"áŸ","ៀ"=>"៬",
|
---|
936 | "៣"=>"៫","២"=>"៪","១"=>"៩","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ",
|
---|
937 | "áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ
|
---|
938 | "=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","᜜"=>"á¿»",
|
---|
939 | "ᜌ"=>"Ὼ","᜻"=>"á¿«","᜺"=>"Ὺ","᜹"=>"Ό","᜞"=>"á¿ž","᜷"=>"á¿","᜶"=>"á¿","᜵"=>"á¿","ᜎ"=>"á¿","ᜳ"=>"á¿",
|
---|
940 | "ᜲ"=>"á¿","ᜱ"=>"៻","ᜰ"=>"៺","ᜧ"=>"ᜯ","ᜊ"=>"ᜮ","ᜥ"=>"áœ","ᜀ"=>"ᜬ","ᜣ"=>"ᜫ","ᜢ"=>"ᜪ","ᜡ"=>"ᜩ",
|
---|
941 | "áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ
|
---|
942 | "=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ",
|
---|
943 | "ጷ"=>"ጿ","ጶ"=>"ጟ","ጵ"=>"ጜ","ጎ"=>"ጌ","ጳ"=>"ጻ","ጲ"=>"ጺ","ጱ"=>"ጹ","ጰ"=>"ጞ","ጧ"=>"ጯ","ጊ"=>"ጮ",
|
---|
944 | "ጥ"=>"áŒ","ጀ"=>"ጬ","ጣ"=>"ጫ","ጢ"=>"ጪ","ጡ"=>"ጩ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ",
|
---|
945 | "áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ
|
---|
946 | "=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","ỹ"=>"Ở",
|
---|
947 | "á»·"=>"Ỷ","ỵ"=>"Ỏ","ỳ"=>"Ỳ","á»±"=>"á»°","ữ"=>"á»®","á»"=>"Ử","ừ"=>"Ừ","ứ"=>"Ớ","ủ"=>"Ị","ụ"=>"Ề",
|
---|
948 | "ợ"=>"Ợ","ỡ"=>"á» ","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»",
|
---|
949 | "á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»
|
---|
950 | "=>"á»","á»"=>"á»","á»"=>"á»","ế"=>"ẟ","ẜ"=>"Ẍ",
|
---|
951 | "ẻ"=>"Ẻ","ẹ"=>"ẞ","ặ"=>"Ặ","ẵ"=>"Ẏ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","áº"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"ẚ",
|
---|
952 | "ầ"=>"Ẋ","ấ"=>"Ẁ","ả"=>"Ả","ạ"=>"Ạ","áº"=>"á¹ ","áº"=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº",
|
---|
953 | "áº"=>"áº","áº"=>"áº","áº"=>"áº","áº
|
---|
954 | "=>"áº","áº"=>"áº","áº"=>"áº","ṿ"=>"ṟ","Ṝ"=>"Ṍ","á¹»"=>"Ṻ","á¹¹"=>"Ṟ",
|
---|
955 | "á¹·"=>"Ṷ","á¹µ"=>"Ṏ","á¹³"=>"á¹²","á¹±"=>"á¹°","ṯ"=>"á¹®","á¹"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṛ","ṧ"=>"Ṋ","á¹¥"=>"á¹€",
|
---|
956 | "á¹£"=>"á¹¢","ṡ"=>"á¹ ","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹",
|
---|
957 | "á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹
|
---|
958 | "=>"á¹","á¹"=>"á¹","á¹"=>"á¹","áž¿"=>"ស","ážœ"=>"ឌ",
|
---|
959 | "áž»"=>"ឺ","áž¹"=>"ážž","áž·"=>"ា","ážµ"=>"ណ","áž³"=>"áž²","áž±"=>"áž°","ឯ"=>"áž®","áž"=>"ឬ","áž«"=>"ឪ","áž©"=>"ážš",
|
---|
960 | "ឧ"=>"ដ","ឥ"=>"ក","ឣ"=>"អ","áž¡"=>"áž ","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž",
|
---|
961 | "áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž
|
---|
962 | "=>"áž","áž"=>"áž","áž"=>"áž",
|
---|
963 | "Ö"=>"Õ","Ö
|
---|
964 | "=>"Õ","Ö"=>"Õ","Ö"=>"Õ","Ö"=>"Õ","Ö"=>"Õ","Ö"=>"Õ","Õ¿"=>"Õ","ÕŸ"=>"Õ","Õœ"=>"Õ",
|
---|
965 | "ÕŒ"=>"Õ","Õ»"=>"Õ","Õº"=>"Õ","Õ¹"=>"Õ","Õž"=>"Õ","Õ·"=>"Õ","Õ¶"=>"Õ","Õµ"=>"Õ
|
---|
966 | ","ÕŽ"=>"Õ","Õ³"=>"Õ",
|
---|
967 | "Õ²"=>"Õ","Õ±"=>"Õ","Õ°"=>"Õ","Õ¯"=>"Ô¿","Õ®"=>"ÔŸ","Õ"=>"Ôœ","Õ¬"=>"ÔŒ","Õ«"=>"Ô»","Õª"=>"Ôº","Õ©"=>"Ô¹",
|
---|
968 | "Õš"=>"Ôž","Õ§"=>"Ô·","ÕŠ"=>"Ô¶","Õ¥"=>"Ôµ","Õ€"=>"ÔŽ","Õ£"=>"Ô³","Õ¢"=>"Ô²","Õ¡"=>"Ô±","Ô"=>"Ô","Ô"=>"Ô",
|
---|
969 | "Ô"=>"Ô","Ô"=>"Ô","Ô"=>"Ô","Ô
|
---|
970 | "=>"Ô","Ô"=>"Ô","Ô"=>"Ô","Ó¹"=>"Óž","Óµ"=>"ÓŽ","Ó³"=>"Ó²","Ó±"=>"Ó°",
|
---|
971 | "Ó¯"=>"Ó®","Ó"=>"Ó¬","Ó«"=>"Óª","Ó©"=>"Óš","Ó§"=>"ÓŠ","Ó¥"=>"Ó€","Ó£"=>"Ó¢","Ó¡"=>"Ó ","Ó"=>"Ó","Ó"=>"Ó",
|
---|
972 | "Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó",
|
---|
973 | "Ó"=>"Ó
|
---|
974 | ","Ó"=>"Ó","Ó"=>"Ó","Ò¿"=>"ÒŸ","Òœ"=>"ÒŒ","Ò»"=>"Òº","Ò¹"=>"Òž","Ò·"=>"Ò¶","Òµ"=>"ÒŽ","Ò³"=>"Ò²",
|
---|
975 | "Ò±"=>"Ò°","Ò¯"=>"Ò®","Ò"=>"Ò¬","Ò«"=>"Òª","Ò©"=>"Òš","Ò§"=>"ÒŠ","Ò¥"=>"Ò€","Ò£"=>"Ò¢","Ò¡"=>"Ò ","Ò"=>"Ò",
|
---|
976 | "Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò",
|
---|
977 | "Ò"=>"Ò","Ñ¿"=>"ÑŸ","Ñœ"=>"ÑŒ","Ñ»"=>"Ѻ","ѹ"=>"Ñž","Ñ·"=>"Ѷ","ѵ"=>"ÑŽ","ѳ"=>"Ѳ","ѱ"=>"Ñ°","ѯ"=>"Ñ®",
|
---|
978 | "Ñ"=>"Ѭ","Ñ«"=>"Ѫ","Ñ©"=>"Ñš","ѧ"=>"ÑŠ","Ñ¥"=>"Ñ€","Ñ£"=>"Ñ¢","Ñ¡"=>"Ñ ","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð",
|
---|
979 | "Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð
|
---|
980 | ","Ñ"=>"Ð","Ñ"=>"Ð",
|
---|
981 | "Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Я","Ñ"=>"Ю","Ñ"=>"Ð","Ñ"=>"Ь","Ñ"=>"Ы","Ñ"=>"Ъ","Ñ"=>"Щ",
|
---|
982 | "Ñ"=>"К","Ñ"=>"Ч","Ñ"=>"Њ","Ñ
|
---|
983 | "=>"Ð¥","Ñ"=>"Ѐ","Ñ"=>"У","Ñ"=>"Т","Ñ"=>"С","Ñ"=>"Ð ","п"=>"Ð",
|
---|
984 | "П"=>"Ð","Ðœ"=>"Ð","ÐŒ"=>"Ð","л"=>"Ð","к"=>"Ð","й"=>"Ð","О"=>"Ð","з"=>"Ð","ж"=>"Ð","е"=>"Ð",
|
---|
985 | "ÐŽ"=>"Ð","г"=>"Ð","в"=>"Ð","б"=>"Ð","а"=>"Ð","ϵ"=>"Î","ϲ"=>"Σ","ϱ"=>"Ρ","Ï°"=>"Î","ϯ"=>"Ï®",
|
---|
986 | "Ï"=>"Ϭ","Ï«"=>"Ϫ","Ï©"=>"Ïš","ϧ"=>"ÏŠ","Ï¥"=>"Ï€","Ï£"=>"Ï¢","Ï¡"=>"Ï ","Ï"=>"Ï","Ï"=>"Ï","Ï"=>"Ï",
|
---|
987 | "Ï"=>"Ï","Ï"=>"Î ","Ï"=>"Ί","Ï"=>"Î","Ï"=>"Î","Ï"=>"Î","Ï"=>"Î","Ï"=>"Î","Ï"=>"Ϋ","Ï"=>"Ϊ",
|
---|
988 | "Ï"=>"Ω","Ï"=>"Κ","Ï"=>"Χ","Ï"=>"Ί","Ï
|
---|
989 | "=>"Î¥","Ï"=>"΀","Ï"=>"Σ","Ï"=>"Σ","Ï"=>"Ρ","Ï"=>"Î ",
|
---|
990 | "ο"=>"Î","Ο"=>"Î","Îœ"=>"Î","ÎŒ"=>"Î","λ"=>"Î","κ"=>"Î","ι"=>"Î","Ξ"=>"Î","η"=>"Î","ζ"=>"Î",
|
---|
991 | "ε"=>"Î","ÎŽ"=>"Î","γ"=>"Î","β"=>"Î","α"=>"Î","ί"=>"Î","ή"=>"Î","Î"=>"Î","ά"=>"Î","Ê"=>"Æ·",
|
---|
992 | "Ê"=>"Ʋ","Ê"=>"Ʊ","Ê"=>"Æ®","Ê"=>"Æ©","Ê"=>"ÆŠ","ɵ"=>"Æ","ɲ"=>"Æ","ɯ"=>"Æ","É©"=>"Æ","Éš"=>"Æ",
|
---|
993 | "É£"=>"Æ","É"=>"Æ","É"=>"Æ","É"=>"Æ","É"=>"Æ","É"=>"Æ","É"=>"Æ","ȳ"=>"Ȳ","ȱ"=>"È°","ȯ"=>"È®",
|
---|
994 | "È"=>"Ȭ","È«"=>"Ȫ","È©"=>"Èš","ȧ"=>"ÈŠ","È¥"=>"È€","È£"=>"È¢","È"=>"È","È"=>"È","È"=>"È","È"=>"È",
|
---|
995 | "È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È
|
---|
996 | "=>"È",
|
---|
997 | "È"=>"È","È"=>"È","Ç¿"=>"ÇŸ","Çœ"=>"ÇŒ","Ç»"=>"Ǻ","ǹ"=>"Çž","ǵ"=>"ÇŽ","dz"=>"Dz","ǯ"=>"Ç®","Ç"=>"Ǭ",
|
---|
998 | "Ç«"=>"Ǫ","Ç©"=>"Çš","ǧ"=>"ÇŠ","Ç¥"=>"Ç€","Ç£"=>"Ç¢","Ç¡"=>"Ç ","Ç"=>"Ç","Ç"=>"Æ","Ç"=>"Ç","Ç"=>"Ç",
|
---|
999 | "Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç
|
---|
1000 | ","Æ¿"=>"Ç·",
|
---|
1001 | "Æœ"=>"ÆŒ","ƹ"=>"Æž","ƶ"=>"Ƶ","ÆŽ"=>"Ƴ","Æ°"=>"Ư","Æ"=>"Ƭ","Æš"=>"Ƨ","Æ¥"=>"Æ€","Æ£"=>"Æ¢","Æ¡"=>"Æ ",
|
---|
1002 | "Æ"=>"È ","Æ"=>"Æ","Æ"=>"Ƕ","Æ"=>"Æ","Æ"=>"Æ","Æ"=>"Æ","Æ
|
---|
1003 | "=>"Æ","Æ"=>"Æ","Å¿"=>"S","ÅŸ"=>"Åœ",
|
---|
1004 | "ÅŒ"=>"Å»","ź"=>"Ź","Å·"=>"Ŷ","ŵ"=>"ÅŽ","ų"=>"Ų","ű"=>"Å°","ů"=>"Å®","Å"=>"Ŭ","Å«"=>"Ū","Å©"=>"Åš",
|
---|
1005 | "ŧ"=>"ÅŠ","Å¥"=>"Å€","Å£"=>"Å¢","Å¡"=>"Å ","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å",
|
---|
1006 | "Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å
|
---|
1007 | ","Å"=>"Å","Å"=>"Å","Å"=>"Ä¿",
|
---|
1008 | "ÄŸ"=>"Äœ","ÄŒ"=>"Ä»","ĺ"=>"Ĺ","Ä·"=>"Ķ","ĵ"=>"ÄŽ","ij"=>"IJ","ı"=>"I","į"=>"Ä®","Ä"=>"Ĭ","Ä«"=>"Ī",
|
---|
1009 | "Ä©"=>"Äš","ħ"=>"ÄŠ","Ä¥"=>"Ä€","Ä£"=>"Ä¢","Ä¡"=>"Ä ","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä",
|
---|
1010 | "Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä
|
---|
1011 | "=>"Ä","Ä"=>"Ä",
|
---|
1012 | "Ä"=>"Ä","ÿ"=>"Åž","ß"=>"Ã","Ãœ"=>"Ã","ÃŒ"=>"Ã","û"=>"Ã","ú"=>"Ã","ù"=>"Ã","Þ"=>"Ã","ö"=>"Ã",
|
---|
1013 | "õ"=>"Ã","ÃŽ"=>"Ã","ó"=>"Ã","ò"=>"Ã","ñ"=>"Ã","ð"=>"Ã","ï"=>"Ã","î"=>"Ã","Ã"=>"Ã","ì"=>"Ã",
|
---|
1014 | "ë"=>"Ã","ê"=>"Ã","é"=>"Ã","Ú"=>"Ã","ç"=>"Ã","Ê"=>"Ã","Ã¥"=>"Ã
|
---|
1015 | ","À"=>"Ã","ã"=>"Ã","â"=>"Ã",
|
---|
1016 | "á"=>"Ã","à "=>"Ã","µ"=>"Î","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T",
|
---|
1017 | "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J",
|
---|
1018 | "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A"
|
---|
1019 | );
|
---|
1020 |
|
---|
1021 | /**
|
---|
1022 | * UTF-8 Case lookup table
|
---|
1023 | *
|
---|
1024 | * This lookuptable defines the lower case letters to their correspponding
|
---|
1025 | * upper case letter in UTF-8
|
---|
1026 | *
|
---|
1027 | * @author Andreas Gohr <[email protected]>
|
---|
1028 | */
|
---|
1029 | global $UTF8_UPPER_TO_LOWER;
|
---|
1030 | if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array (
|
---|
1031 | ""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",
|
---|
1032 | ""=>"ïœ",""=>"ïœ",""=>"ïœ","ïŒ"=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ",
|
---|
1033 | ""=>"ïœ",""=>"ïœ
|
---|
1034 | ",""=>"ïœ",""=>"ïœ",""=>"ïœ",""=>"ïœ","á¿Œ"=>"ῳ","Ῥ"=>"á¿¥","á¿©"=>"á¿¡","á¿"=>"á¿",
|
---|
1035 | "á¿"=>"á¿","á¿"=>"á¿","Î"=>"៟","៌"=>"៳","៹"=>"៱","៞"=>"៰","៯"=>"៧","៮"=>"៊","áŸ"=>"៥","៬"=>"ៀ",
|
---|
1036 | "៫"=>"៣","៪"=>"២","៩"=>"១","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ",
|
---|
1037 | "áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ
|
---|
1038 | ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","á¿»"=>"᜜",
|
---|
1039 | "Ὼ"=>"ᜌ","á¿«"=>"᜻","Ὺ"=>"᜺","Ό"=>"᜹","á¿ž"=>"᜞","á¿"=>"᜷","á¿"=>"᜶","á¿"=>"᜵","á¿"=>"ᜎ","á¿"=>"ᜳ",
|
---|
1040 | "á¿"=>"ᜲ","៻"=>"ᜱ","៺"=>"ᜰ","ᜯ"=>"ᜧ","ᜮ"=>"ᜊ","áœ"=>"ᜥ","ᜬ"=>"ᜀ","ᜫ"=>"ᜣ","ᜪ"=>"ᜢ","ᜩ"=>"ᜡ",
|
---|
1041 | "áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ
|
---|
1042 | ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ",
|
---|
1043 | "ጿ"=>"ጷ","ጟ"=>"ጶ","ጜ"=>"ጵ","ጌ"=>"ጎ","ጻ"=>"ጳ","ጺ"=>"ጲ","ጹ"=>"ጱ","ጞ"=>"ጰ","ጯ"=>"ጧ","ጮ"=>"ጊ",
|
---|
1044 | "áŒ"=>"ጥ","ጬ"=>"ጀ","ጫ"=>"ጣ","ጪ"=>"ጢ","ጩ"=>"ጡ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ",
|
---|
1045 | "áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ
|
---|
1046 | ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","Ở"=>"ỹ",
|
---|
1047 | "Ỷ"=>"á»·","Ỏ"=>"ỵ","Ỳ"=>"ỳ","á»°"=>"á»±","á»®"=>"ữ","Ử"=>"á»","Ừ"=>"ừ","Ớ"=>"ứ","Ị"=>"ủ","Ề"=>"ụ",
|
---|
1048 | "Ợ"=>"ợ","á» "=>"ỡ","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»",
|
---|
1049 | "á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»
|
---|
1050 | ","á»"=>"á»","á»"=>"á»","ẟ"=>"ế","Ẍ"=>"ẜ",
|
---|
1051 | "Ẻ"=>"ẻ","ẞ"=>"ẹ","Ặ"=>"ặ","Ẏ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"áº","Ẫ"=>"ẫ","ẚ"=>"ẩ",
|
---|
1052 | "Ẋ"=>"ầ","Ẁ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","á¹ "=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº",
|
---|
1053 | "áº"=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº
|
---|
1054 | ","áº"=>"áº","áº"=>"áº","ṟ"=>"ṿ","Ṍ"=>"Ṝ","Ṻ"=>"á¹»","Ṟ"=>"á¹¹",
|
---|
1055 | "Ṷ"=>"á¹·","Ṏ"=>"á¹µ","á¹²"=>"á¹³","á¹°"=>"á¹±","á¹®"=>"ṯ","Ṭ"=>"á¹","Ṫ"=>"ṫ","Ṛ"=>"ṩ","Ṋ"=>"ṧ","á¹€"=>"á¹¥",
|
---|
1056 | "á¹¢"=>"á¹£","á¹ "=>"ṡ","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹",
|
---|
1057 | "á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹
|
---|
1058 | ","á¹"=>"á¹","á¹"=>"á¹","ស"=>"áž¿","ឌ"=>"ážœ",
|
---|
1059 | "ឺ"=>"áž»","ážž"=>"áž¹","ា"=>"áž·","ណ"=>"ážµ","áž²"=>"áž³","áž°"=>"áž±","áž®"=>"ឯ","ឬ"=>"áž","ឪ"=>"áž«","ážš"=>"áž©",
|
---|
1060 | "ដ"=>"ឧ","ក"=>"ឥ","អ"=>"ឣ","áž "=>"áž¡","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž",
|
---|
1061 | "áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž
|
---|
1062 | ","áž"=>"áž","áž"=>"áž",
|
---|
1063 | "Õ"=>"Ö","Õ"=>"Ö
|
---|
1064 | ","Õ"=>"Ö","Õ"=>"Ö","Õ"=>"Ö","Õ"=>"Ö","Õ"=>"Ö","Õ"=>"Õ¿","Õ"=>"ÕŸ","Õ"=>"Õœ",
|
---|
1065 | "Õ"=>"ÕŒ","Õ"=>"Õ»","Õ"=>"Õº","Õ"=>"Õ¹","Õ"=>"Õž","Õ"=>"Õ·","Õ"=>"Õ¶","Õ
|
---|
1066 | "=>"Õµ","Õ"=>"ÕŽ","Õ"=>"Õ³",
|
---|
1067 | "Õ"=>"Õ²","Õ"=>"Õ±","Õ"=>"Õ°","Ô¿"=>"Õ¯","ÔŸ"=>"Õ®","Ôœ"=>"Õ","ÔŒ"=>"Õ¬","Ô»"=>"Õ«","Ôº"=>"Õª","Ô¹"=>"Õ©",
|
---|
1068 | "Ôž"=>"Õš","Ô·"=>"Õ§","Ô¶"=>"ÕŠ","Ôµ"=>"Õ¥","ÔŽ"=>"Õ€","Ô³"=>"Õ£","Ô²"=>"Õ¢","Ô±"=>"Õ¡","Ô"=>"Ô","Ô"=>"Ô",
|
---|
1069 | "Ô"=>"Ô","Ô"=>"Ô","Ô"=>"Ô","Ô"=>"Ô
|
---|
1070 | ","Ô"=>"Ô","Ô"=>"Ô","Óž"=>"Ó¹","ÓŽ"=>"Óµ","Ó²"=>"Ó³","Ó°"=>"Ó±",
|
---|
1071 | "Ó®"=>"Ó¯","Ó¬"=>"Ó","Óª"=>"Ó«","Óš"=>"Ó©","ÓŠ"=>"Ó§","Ó€"=>"Ó¥","Ó¢"=>"Ó£","Ó "=>"Ó¡","Ó"=>"Ó","Ó"=>"Ó",
|
---|
1072 | "Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó",
|
---|
1073 | "Ó
|
---|
1074 | "=>"Ó","Ó"=>"Ó","Ó"=>"Ó","ÒŸ"=>"Ò¿","ÒŒ"=>"Òœ","Òº"=>"Ò»","Òž"=>"Ò¹","Ò¶"=>"Ò·","ÒŽ"=>"Òµ","Ò²"=>"Ò³",
|
---|
1075 | "Ò°"=>"Ò±","Ò®"=>"Ò¯","Ò¬"=>"Ò","Òª"=>"Ò«","Òš"=>"Ò©","ÒŠ"=>"Ò§","Ò€"=>"Ò¥","Ò¢"=>"Ò£","Ò "=>"Ò¡","Ò"=>"Ò",
|
---|
1076 | "Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò",
|
---|
1077 | "Ò"=>"Ò","ÑŸ"=>"Ñ¿","ÑŒ"=>"Ñœ","Ѻ"=>"Ñ»","Ñž"=>"ѹ","Ѷ"=>"Ñ·","ÑŽ"=>"ѵ","Ѳ"=>"ѳ","Ñ°"=>"ѱ","Ñ®"=>"ѯ",
|
---|
1078 | "Ѭ"=>"Ñ","Ѫ"=>"Ñ«","Ñš"=>"Ñ©","ÑŠ"=>"ѧ","Ñ€"=>"Ñ¥","Ñ¢"=>"Ñ£","Ñ "=>"Ñ¡","Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ",
|
---|
1079 | "Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð
|
---|
1080 | "=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ",
|
---|
1081 | "Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Я"=>"Ñ","Ю"=>"Ñ","Ð"=>"Ñ","Ь"=>"Ñ","Ы"=>"Ñ","Ъ"=>"Ñ","Щ"=>"Ñ",
|
---|
1082 | "К"=>"Ñ","Ч"=>"Ñ","Њ"=>"Ñ","Ð¥"=>"Ñ
|
---|
1083 | ","Ѐ"=>"Ñ","У"=>"Ñ","Т"=>"Ñ","С"=>"Ñ","Ð "=>"Ñ","Ð"=>"п",
|
---|
1084 | "Ð"=>"П","Ð"=>"Ðœ","Ð"=>"ÐŒ","Ð"=>"л","Ð"=>"к","Ð"=>"й","Ð"=>"О","Ð"=>"з","Ð"=>"ж","Ð"=>"е",
|
---|
1085 | "Ð"=>"ÐŽ","Ð"=>"г","Ð"=>"в","Ð"=>"б","Ð"=>"а","Î"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Î"=>"Ï°","Ï®"=>"ϯ",
|
---|
1086 | "Ϭ"=>"Ï","Ϫ"=>"Ï«","Ïš"=>"Ï©","ÏŠ"=>"ϧ","Ï€"=>"Ï¥","Ï¢"=>"Ï£","Ï "=>"Ï¡","Ï"=>"Ï","Ï"=>"Ï","Ï"=>"Ï",
|
---|
1087 | "Ï"=>"Ï","Î "=>"Ï","Ί"=>"Ï","Î"=>"Ï","Î"=>"Ï","Î"=>"Ï","Î"=>"Ï","Î"=>"Ï","Ϋ"=>"Ï","Ϊ"=>"Ï",
|
---|
1088 | "Ω"=>"Ï","Κ"=>"Ï","Χ"=>"Ï","Ί"=>"Ï","Î¥"=>"Ï
|
---|
1089 | ","΀"=>"Ï","Σ"=>"Ï","Σ"=>"Ï","Ρ"=>"Ï","Î "=>"Ï",
|
---|
1090 | "Î"=>"ο","Î"=>"Ο","Î"=>"Îœ","Î"=>"ÎŒ","Î"=>"λ","Î"=>"κ","Î"=>"ι","Î"=>"Ξ","Î"=>"η","Î"=>"ζ",
|
---|
1091 | "Î"=>"ε","Î"=>"ÎŽ","Î"=>"γ","Î"=>"β","Î"=>"α","Î"=>"ί","Î"=>"ή","Î"=>"Î","Î"=>"ά","Æ·"=>"Ê",
|
---|
1092 | "Ʋ"=>"Ê","Ʊ"=>"Ê","Æ®"=>"Ê","Æ©"=>"Ê","ÆŠ"=>"Ê","Æ"=>"ɵ","Æ"=>"ɲ","Æ"=>"ɯ","Æ"=>"É©","Æ"=>"Éš",
|
---|
1093 | "Æ"=>"É£","Æ"=>"É","Æ"=>"É","Æ"=>"É","Æ"=>"É","Æ"=>"É","Æ"=>"É","Ȳ"=>"ȳ","È°"=>"ȱ","È®"=>"ȯ",
|
---|
1094 | "Ȭ"=>"È","Ȫ"=>"È«","Èš"=>"È©","ÈŠ"=>"ȧ","È€"=>"È¥","È¢"=>"È£","È"=>"È","È"=>"È","È"=>"È","È"=>"È",
|
---|
1095 | "È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È
|
---|
1096 | ",
|
---|
1097 | "È"=>"È","È"=>"È","ÇŸ"=>"Ç¿","ÇŒ"=>"Çœ","Ǻ"=>"Ç»","Çž"=>"ǹ","ÇŽ"=>"ǵ","Dz"=>"dz","Ç®"=>"ǯ","Ǭ"=>"Ç",
|
---|
1098 | "Ǫ"=>"Ç«","Çš"=>"Ç©","ÇŠ"=>"ǧ","Ç€"=>"Ç¥","Ç¢"=>"Ç£","Ç "=>"Ç¡","Ç"=>"Ç","Æ"=>"Ç","Ç"=>"Ç","Ç"=>"Ç",
|
---|
1099 | "Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç
|
---|
1100 | "=>"Ç","Ç·"=>"Æ¿",
|
---|
1101 | "ÆŒ"=>"Æœ","Æž"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ÆŽ","Ư"=>"Æ°","Ƭ"=>"Æ","Ƨ"=>"Æš","Æ€"=>"Æ¥","Æ¢"=>"Æ£","Æ "=>"Æ¡",
|
---|
1102 | "È "=>"Æ","Æ"=>"Æ","Ƕ"=>"Æ","Æ"=>"Æ","Æ"=>"Æ","Æ"=>"Æ","Æ"=>"Æ
|
---|
1103 | ","Æ"=>"Æ","S"=>"Å¿","Åœ"=>"ÅŸ",
|
---|
1104 | "Å»"=>"ÅŒ","Ź"=>"ź","Ŷ"=>"Å·","ÅŽ"=>"ŵ","Ų"=>"ų","Å°"=>"ű","Å®"=>"ů","Ŭ"=>"Å","Ū"=>"Å«","Åš"=>"Å©",
|
---|
1105 | "ÅŠ"=>"ŧ","Å€"=>"Å¥","Å¢"=>"Å£","Å "=>"Å¡","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å",
|
---|
1106 | "Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å
|
---|
1107 | "=>"Å","Å"=>"Å","Å"=>"Å","Ä¿"=>"Å",
|
---|
1108 | "Äœ"=>"ÄŸ","Ä»"=>"ÄŒ","Ĺ"=>"ĺ","Ķ"=>"Ä·","ÄŽ"=>"ĵ","IJ"=>"ij","I"=>"ı","Ä®"=>"į","Ĭ"=>"Ä","Ī"=>"Ä«",
|
---|
1109 | "Äš"=>"Ä©","ÄŠ"=>"ħ","Ä€"=>"Ä¥","Ä¢"=>"Ä£","Ä "=>"Ä¡","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä",
|
---|
1110 | "Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä
|
---|
1111 | ","Ä"=>"Ä",
|
---|
1112 | "Ä"=>"Ä","Åž"=>"ÿ","Ã"=>"ß","Ã"=>"Ãœ","Ã"=>"ÃŒ","Ã"=>"û","Ã"=>"ú","Ã"=>"ù","Ã"=>"Þ","Ã"=>"ö",
|
---|
1113 | "Ã"=>"õ","Ã"=>"ÃŽ","Ã"=>"ó","Ã"=>"ò","Ã"=>"ñ","Ã"=>"ð","Ã"=>"ï","Ã"=>"î","Ã"=>"Ã","Ã"=>"ì",
|
---|
1114 | "Ã"=>"ë","Ã"=>"ê","Ã"=>"é","Ã"=>"Ú","Ã"=>"ç","Ã"=>"Ê","Ã
|
---|
1115 | "=>"Ã¥","Ã"=>"À","Ã"=>"ã","Ã"=>"â",
|
---|
1116 | "Ã"=>"á","Ã"=>"à ","Î"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t",
|
---|
1117 | "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j",
|
---|
1118 | "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a"
|
---|
1119 | );
|
---|
1120 | }; // end of case lookup tables
|
---|
1121 |
|
---|
1122 | /**
|
---|
1123 | * UTF-8 lookup table for lower case accented letters
|
---|
1124 | *
|
---|
1125 | * This lookuptable defines replacements for accented characters from the ASCII-7
|
---|
1126 | * range. This are lower case letters only.
|
---|
1127 | *
|
---|
1128 | * @author Andreas Gohr <[email protected]>
|
---|
1129 | * @see utf8_deaccent()
|
---|
1130 | */
|
---|
1131 | global $UTF8_LOWER_ACCENTS;
|
---|
1132 | if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array(
|
---|
1133 | 'à ' => 'a', 'ÃŽ' => 'o', 'Ä' => 'd', 'áž' => 'f', 'ë' => 'e', 'Å¡' => 's', 'Æ¡' => 'o',
|
---|
1134 | 'Ã' => 'ss', 'Ä' => 'a', 'Å' => 'r', 'È' => 't', 'Å' => 'n', 'Ä' => 'a', 'Ä·' => 'k',
|
---|
1135 | 'Å' => 's', 'ỳ' => 'y', 'Å' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'á¹' => 'p', 'ó' => 'o',
|
---|
1136 | 'ú' => 'u', 'Ä' => 'e', 'é' => 'e', 'ç' => 'c', 'áº' => 'w', 'Ä' => 'c', 'õ' => 'o',
|
---|
1137 | 'ṡ' => 's', 'Þ' => 'o', 'Ä£' => 'g', 'ŧ' => 't', 'È' => 's', 'Ä' => 'e', 'Ä' => 'c',
|
---|
1138 | 'Å' => 's', 'î' => 'i', 'ű' => 'u', 'Ä' => 'c', 'Ä' => 'e', 'ŵ' => 'w', 'ṫ' => 't',
|
---|
1139 | 'Å«' => 'u', 'Ä' => 'c', 'ö' => 'oe', 'Ú' => 'e', 'Å·' => 'y', 'Ä
|
---|
1140 | ' => 'a', 'Å' => 'l',
|
---|
1141 | 'ų' => 'u', 'ů' => 'u', 'Å' => 's', 'Ä' => 'g', 'ÄŒ' => 'l', 'Æ' => 'f', 'ÅŸ' => 'z',
|
---|
1142 | 'áº' => 'w', 'áž' => 'b', 'Ã¥' => 'a', 'ì' => 'i', 'ï' => 'i', 'áž' => 'd', 'Å¥' => 't',
|
---|
1143 | 'Å' => 'r', 'À' => 'ae', 'Ã' => 'i', 'Å' => 'r', 'ê' => 'e', 'ÃŒ' => 'ue', 'ò' => 'o',
|
---|
1144 | 'Ä' => 'e', 'ñ' => 'n', 'Å' => 'n', 'Ä¥' => 'h', 'Ä' => 'g', 'Ä' => 'd', 'ĵ' => 'j',
|
---|
1145 | 'ÿ' => 'y', 'Å©' => 'u', 'Å' => 'u', 'Æ°' => 'u', 'Å£' => 't', 'Ãœ' => 'y', 'Å' => 'o',
|
---|
1146 | 'â' => 'a', 'ÄŸ' => 'l', 'áº
|
---|
1147 | ' => 'w', 'Ō' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g',
|
---|
1148 | 'á¹' => 'm', 'Å' => 'o', 'Ä©' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a',
|
---|
1149 | 'û' => 'u', 'ß' => 'th', 'ð' => 'dh', 'Ê' => 'ae', 'µ' => 'u', 'Ä' => 'e',
|
---|
1150 | );
|
---|
1151 |
|
---|
1152 | /**
|
---|
1153 | * UTF-8 lookup table for upper case accented letters
|
---|
1154 | *
|
---|
1155 | * This lookuptable defines replacements for accented characters from the ASCII-7
|
---|
1156 | * range. This are upper case letters only.
|
---|
1157 | *
|
---|
1158 | * @author Andreas Gohr <[email protected]>
|
---|
1159 | * @see utf8_deaccent()
|
---|
1160 | */
|
---|
1161 | global $UTF8_UPPER_ACCENTS;
|
---|
1162 | if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array(
|
---|
1163 | 'Ã' => 'A', 'Ã' => 'O', 'Ä' => 'D', 'áž' => 'F', 'Ã' => 'E', 'Å ' => 'S', 'Æ ' => 'O',
|
---|
1164 | 'Ä' => 'A', 'Å' => 'R', 'È' => 'T', 'Å' => 'N', 'Ä' => 'A', 'Ķ' => 'K',
|
---|
1165 | 'Å' => 'S', 'Ỳ' => 'Y', 'Å
|
---|
1166 | ' => 'N', 'Ĺ' => 'L', 'ÄŠ' => 'H', 'á¹' => 'P', 'Ã' => 'O',
|
---|
1167 | 'Ã' => 'U', 'Ä' => 'E', 'Ã' => 'E', 'Ã' => 'C', 'áº' => 'W', 'Ä' => 'C', 'Ã' => 'O',
|
---|
1168 | 'á¹ ' => 'S', 'Ã' => 'O', 'Ä¢' => 'G', 'ÅŠ' => 'T', 'È' => 'S', 'Ä' => 'E', 'Ä' => 'C',
|
---|
1169 | 'Å' => 'S', 'Ã' => 'I', 'Å°' => 'U', 'Ä' => 'C', 'Ä' => 'E', 'ÅŽ' => 'W', 'Ṫ' => 'T',
|
---|
1170 | 'Ū' => 'U', 'Ä' => 'C', 'Ã' => 'Oe', 'Ã' => 'E', 'Ŷ' => 'Y', 'Ä' => 'A', 'Å' => 'L',
|
---|
1171 | 'Ų' => 'U', 'Å®' => 'U', 'Å' => 'S', 'Ä' => 'G', 'Ä»' => 'L', 'Æ' => 'F', 'Åœ' => 'Z',
|
---|
1172 | 'áº' => 'W', 'áž' => 'B', 'Ã
|
---|
1173 | ' => 'A', 'Ã' => 'I', 'Ã' => 'I', 'áž' => 'D', 'Å€' => 'T',
|
---|
1174 | 'Å' => 'R', 'Ã' => 'Ae', 'Ã' => 'I', 'Å' => 'R', 'Ã' => 'E', 'Ã' => 'Ue', 'Ã' => 'O',
|
---|
1175 | 'Ä' => 'E', 'Ã' => 'N', 'Å' => 'N', 'Ä€' => 'H', 'Ä' => 'G', 'Ä' => 'D', 'ÄŽ' => 'J',
|
---|
1176 | 'Åž' => 'Y', 'Åš' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Å¢' => 'T', 'Ã' => 'Y', 'Å' => 'O',
|
---|
1177 | 'Ã' => 'A', 'Äœ' => 'L', 'áº' => 'W', 'Å»' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ä ' => 'G',
|
---|
1178 | 'á¹' => 'M', 'Å' => 'O', 'Äš' => 'I', 'Ã' => 'U', 'Ä®' => 'I', 'Ź' => 'Z', 'Ã' => 'A',
|
---|
1179 | 'Ã' => 'U', 'Ã' => 'Th', 'Ã' => 'Dh', 'Ã' => 'Ae', 'Ä' => 'E',
|
---|
1180 | );
|
---|
1181 |
|
---|
1182 | /**
|
---|
1183 | * UTF-8 array of common special characters
|
---|
1184 | *
|
---|
1185 | * This array should contain all special characters (not a letter or digit)
|
---|
1186 | * defined in the various local charsets - it's not a complete list of non-alphanum
|
---|
1187 | * characters in UTF-8. It's not perfect but should match most cases of special
|
---|
1188 | * chars.
|
---|
1189 | *
|
---|
1190 | * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is!
|
---|
1191 | * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a
|
---|
1192 | *
|
---|
1193 | * @author Andreas Gohr <[email protected]>
|
---|
1194 | * @see utf8_stripspecials()
|
---|
1195 | */
|
---|
1196 | global $UTF8_SPECIAL_CHARS;
|
---|
1197 | if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array(
|
---|
1198 | 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023,
|
---|
1199 | 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c,
|
---|
1200 | 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b,
|
---|
1201 | 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e,
|
---|
1202 | 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088,
|
---|
1203 | 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092,
|
---|
1204 | 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c,
|
---|
1205 | 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6,
|
---|
1206 | 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0,
|
---|
1207 | 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba,
|
---|
1208 | 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9,
|
---|
1209 | 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384,
|
---|
1210 | 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1,
|
---|
1211 | 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc,
|
---|
1212 | 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c,
|
---|
1213 | 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651,
|
---|
1214 | 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015,
|
---|
1215 | 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022,
|
---|
1216 | 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab,
|
---|
1217 | 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193,
|
---|
1218 | 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202,
|
---|
1219 | 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212,
|
---|
1220 | 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229,
|
---|
1221 | 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265,
|
---|
1222 | 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310,
|
---|
1223 | 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514,
|
---|
1224 | 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553,
|
---|
1225 | 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d,
|
---|
1226 | 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567,
|
---|
1227 | 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
|
---|
1228 | 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7,
|
---|
1229 | 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702,
|
---|
1230 | 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f,
|
---|
1231 | 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719,
|
---|
1232 | 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723,
|
---|
1233 | 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e,
|
---|
1234 | 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738,
|
---|
1235 | 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742,
|
---|
1236 | 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d,
|
---|
1237 | 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c,
|
---|
1238 | 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f,
|
---|
1239 | 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e,
|
---|
1240 | 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8,
|
---|
1241 | 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3,
|
---|
1242 | 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd,
|
---|
1243 | 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c,
|
---|
1244 | 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017,
|
---|
1245 | 0x3018, 0x3019, 0x301a, 0x301b, 0x3036,
|
---|
1246 | 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc,
|
---|
1247 | 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6,
|
---|
1248 | 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0,
|
---|
1249 | 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa,
|
---|
1250 | 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d,
|
---|
1251 | 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09,
|
---|
1252 | 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c,
|
---|
1253 | 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b,
|
---|
1254 | 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65,
|
---|
1255 | 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea,
|
---|
1256 | 0xffeb, 0xffec, 0xffed, 0xffee,
|
---|
1257 | 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703,
|
---|
1258 | 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b,
|
---|
1259 | 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713,
|
---|
1260 | 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b,
|
---|
1261 | 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf,
|
---|
1262 | );
|
---|
1263 |
|
---|
1264 | // utf8 version of above data
|
---|
1265 | global $UTF8_SPECIAL_CHARS2;
|
---|
1266 | if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 =
|
---|
1267 | "\x1A".' |
---|
1268 | |
---|
1269 | |
---|
1270 | !"#$%&\'()+,/;<=>?@[\]^`{|}~ÂÂÂÂÂÂ
|
---|
1271 | ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂï¿œ'.
|
---|
1272 | 'ᅵ ¡¢£€¥Š§š©ª«¬Â®¯°±²³Žµ¶·ž¹º»Œœᅵ'.
|
---|
1273 | 'ᅵ¿Ã÷ËËËËËËËÌÌÌÌÌ£ÎÎ
|
---|
1274 | ÎÏְֱֲֳ֎ֵֶַֹֻ֞֌ֿ֜֟ᅵ'.
|
---|
1275 | 'ï¿œ×××׳׎ØØØÙÙÙÙÙÙÙÙÙÙªàž¿âââââââââââââï¿œ'.
|
---|
1276 | 'ᅵᅵâ â¡â¢âŠâ°â²â³â¹âºââ§âªâ«â¬âââ¢âŠâµâââââââµ'.
|
---|
1277 | 'âââââââââ
|
---|
1278 | âââââââââââââââ â§âšï¿œ'.
|
---|
1279 | 'ï¿œâªâ«âŽâŒâ
|
---|
1280 | ââ â¡â€â¥ââââââââ¥â
|
---|
1281 | ââ â¡â©âªâ©âï¿œ'.
|
---|
1282 | 'ᅵᅵââââââ€â¬âŽâŒâââââââââââââââââ '.
|
---|
1283 | 'â¡â¢â£â€â¥âŠâ§âšâ©âªâ«â¬âââââââââ â²âŒâââï¿œ'.
|
---|
1284 | 'ï¿œâ
|
---|
1285 | ââââ â£â¥âŠââââââââââââââââââï¿œ'.
|
---|
1286 | 'ᅵᅵââââââââââ â¡â¢â£â€â¥âŠâ§â©âªâ«â¬ââ®â¯â°â±'.
|
---|
1287 | 'â²â³âŽâµâ¶â·âžâ¹âºâ»âŒâœâŸâ¿ââââââ
|
---|
1288 | ââââââï¿œ'.
|
---|
1289 | 'ï¿œâââââââââââââ¡â¢â£â€â¥âŠâ§â¿ââââââï¿œ'.
|
---|
1290 | 'ᅵᅵâââââ â¡â¢â£â€â¥âŠâ§âšâ©âªâ«â¬ââ®â¯â±â²â³âŽâµâ¶'.
|
---|
1291 | 'â·âžâ¹âºâ»âŒâœâŸ'.
|
---|
1292 | 'ãããããããããããããããããããããããã¶'.
|
---|
1293 | 'ïïïï£ï£ï£ï£ï£ï£ï£ï£ï£ï£ ᅵ'.
|
---|
1294 | 'ᅵï£ï£®ï£¯ï£°ï£±ï£²ï£³ï£Žï£µï£¶ï£·ï£žï£¹ï£ºï£»ï£Œï£œï£Ÿï¹Œï¹œ'.
|
---|
1295 | 'ïŒïŒïŒïŒïŒ
|
---|
1296 | ïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒ ïœïœïœïœïœ'.
|
---|
1297 | 'ïœïœ ïœ¡ïœ¢ïœ£ïœ€ïœ¥ï¿ ï¿¡ï¿¢ï¿£ï¿€ï¿¥ï¿Šï¿šï¿©ï¿ªï¿«ï¿¬ï¿ï¿®'.
|
---|
1298 | 'ðŒðœðŸð¿ðððððð
|
---|
1299 | ðððððððððððððððððððððð'.
|
---|
1300 | ' ââ¯â ';
|
---|
1301 |
|
---|
1302 | /**
|
---|
1303 | * Romanization lookup table
|
---|
1304 | *
|
---|
1305 | * This lookup tables provides a way to transform strings written in a language
|
---|
1306 | * different from the ones based upon latin letters into plain ASCII.
|
---|
1307 | *
|
---|
1308 | * Please note: this is not a scientific transliteration table. It only works
|
---|
1309 | * oneway from nonlatin to ASCII and it works by simple character replacement
|
---|
1310 | * only. Specialities of each language are not supported.
|
---|
1311 | *
|
---|
1312 | * @author Andreas Gohr <[email protected]>
|
---|
1313 | * @author Vitaly Blokhin <[email protected]>
|
---|
1314 | * @link http://www.uconv.com/translit.htm
|
---|
1315 | * @author Bisqwit <[email protected]>
|
---|
1316 | * @link http://kanjidict.stc.cx/hiragana.php?src=2
|
---|
1317 | * @link http://www.translatum.gr/converter/greek-transliteration.htm
|
---|
1318 | * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription
|
---|
1319 | * @link http://www.btranslations.com/resources/romanization/korean.asp
|
---|
1320 | * @author Arthit Suriyawongkul <[email protected]>
|
---|
1321 | * @author Denis Scheither <[email protected]>
|
---|
1322 | */
|
---|
1323 | global $UTF8_ROMANIZATION;
|
---|
1324 | if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array(
|
---|
1325 | // scandinavian - differs from what we do in deaccent
|
---|
1326 | 'Ã¥'=>'a','Ã
|
---|
1327 | '=>'A','À'=>'a','Ã'=>'A','ö'=>'o','Ã'=>'O',
|
---|
1328 |
|
---|
1329 | //russian cyrillic
|
---|
1330 | 'а'=>'a','Ð'=>'A','б'=>'b','Ð'=>'B','в'=>'v','Ð'=>'V','г'=>'g','Ð'=>'G',
|
---|
1331 | 'ÐŽ'=>'d','Ð'=>'D','е'=>'e','Ð'=>'E','Ñ'=>'jo','Ð'=>'Jo','ж'=>'zh','Ð'=>'Zh',
|
---|
1332 | 'з'=>'z','Ð'=>'Z','О'=>'i','Ð'=>'I','й'=>'j','Ð'=>'J','к'=>'k','Ð'=>'K',
|
---|
1333 | 'л'=>'l','Ð'=>'L','ÐŒ'=>'m','Ð'=>'M','Ðœ'=>'n','Ð'=>'N','П'=>'o','Ð'=>'O',
|
---|
1334 | 'п'=>'p','Ð'=>'P','Ñ'=>'r','Ð '=>'R','Ñ'=>'s','С'=>'S','Ñ'=>'t','Т'=>'T',
|
---|
1335 | 'Ñ'=>'u','У'=>'U','Ñ'=>'f','Ѐ'=>'F','Ñ
|
---|
1336 | '=>'x','Ð¥'=>'X','Ñ'=>'c','Њ'=>'C',
|
---|
1337 | 'Ñ'=>'ch','Ч'=>'Ch','Ñ'=>'sh','К'=>'Sh','Ñ'=>'sch','Щ'=>'Sch','Ñ'=>'',
|
---|
1338 | 'Ъ'=>'','Ñ'=>'y','Ы'=>'Y','Ñ'=>'','Ь'=>'','Ñ'=>'eh','Ð'=>'Eh','Ñ'=>'ju',
|
---|
1339 | 'Ю'=>'Ju','Ñ'=>'ja','Я'=>'Ja',
|
---|
1340 | // Ukrainian cyrillic
|
---|
1341 | 'Ò'=>'Gh','Ò'=>'gh','Ð'=>'Je','Ñ'=>'je','Ð'=>'I','Ñ'=>'i','Ð'=>'Ji','Ñ'=>'ji',
|
---|
1342 | // Georgian
|
---|
1343 | 'á'=>'a','á'=>'b','á'=>'g','á'=>'d','á'=>'e','á'=>'v','á'=>'z','á'=>'th',
|
---|
1344 | 'á'=>'i','á'=>'p','á'=>'l','á'=>'m','á'=>'n','á'=>'o','á'=>'p','á'=>'zh',
|
---|
1345 | 'á '=>'r','á¡'=>'s','á¢'=>'t','á£'=>'u','á€'=>'ph','á¥'=>'kh','áŠ'=>'gh','á§'=>'q',
|
---|
1346 | 'áš'=>'sh','á©'=>'ch','áª'=>'c','á«'=>'dh','á¬'=>'w','á'=>'j','á®'=>'x','á¯'=>'jh',
|
---|
1347 | 'á°'=>'xh',
|
---|
1348 | //Sanskrit
|
---|
1349 | 'à€
|
---|
1350 | '=>'a','à€'=>'ah','à€'=>'i','à€'=>'ih','à€'=>'u','à€'=>'uh','à€'=>'ry',
|
---|
1351 | 'ॠ'=>'ryh','à€'=>'ly','ॡ'=>'lyh','à€'=>'e','à€'=>'ay','à€'=>'o','à€'=>'aw',
|
---|
1352 | 'à€
|
---|
1353 | à€'=>'amh','à€
|
---|
1354 | à€'=>'aq','à€'=>'k','à€'=>'kh','à€'=>'g','à€'=>'gh','à€'=>'nh',
|
---|
1355 | 'à€'=>'c','à€'=>'ch','à€'=>'j','à€'=>'jh','à€'=>'ny','à€'=>'tq','à€ '=>'tqh',
|
---|
1356 | 'à€¡'=>'dq','à€¢'=>'dqh','à€£'=>'nq','à€€'=>'t','à€¥'=>'th','à€Š'=>'d','à€§'=>'dh',
|
---|
1357 | 'à€š'=>'n','à€ª'=>'p','à€«'=>'ph','à€¬'=>'b','à€'=>'bh','à€®'=>'m','à€¯'=>'z','à€°'=>'r',
|
---|
1358 | 'à€²'=>'l','à€µ'=>'v','à€¶'=>'sh','à€·'=>'sqh','à€ž'=>'s','à€¹'=>'x',
|
---|
1359 | //Hebrew
|
---|
1360 | '×'=>'a', '×'=>'b','×'=>'g','×'=>'d','×'=>'h','×'=>'v','×'=>'z','×'=>'kh','×'=>'th',
|
---|
1361 | '×'=>'y','×'=>'h','×'=>'k','×'=>'l','×'=>'m','×'=>'m','×'=>'n','× '=>'n',
|
---|
1362 | 'ס'=>'s','ע'=>'ah','ף'=>'f','׀'=>'p','ץ'=>'c','׊'=>'c','ק'=>'q','ך'=>'r',
|
---|
1363 | 'ש'=>'sh','ת'=>'t',
|
---|
1364 | //Arabic
|
---|
1365 | 'ا'=>'a','Øš'=>'b','ت'=>'t','Ø«'=>'th','ج'=>'g','Ø'=>'xh','Ø®'=>'x','د'=>'d',
|
---|
1366 | 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','؎'=>'sh','ص'=>'s\'','ض'=>'d\'',
|
---|
1367 | 'Ø·'=>'t\'','Øž'=>'z\'','ع'=>'y','غ'=>'gh','Ù'=>'f','Ù'=>'q','Ù'=>'k',
|
---|
1368 | 'Ù'=>'l','Ù
|
---|
1369 | '=>'m','Ù'=>'n','Ù'=>'x\'','Ù'=>'u','Ù'=>'i',
|
---|
1370 |
|
---|
1371 | // Japanese characters (last update: 2008-05-09)
|
---|
1372 |
|
---|
1373 | // Japanese hiragana
|
---|
1374 |
|
---|
1375 | // 3 character syllables, 㣠doubles the consonant after
|
---|
1376 | 'ã£ã¡ã'=>'ccha','ã£ã¡ã'=>'cche','ã£ã¡ã'=>'ccho','ã£ã¡ã
|
---|
1377 | '=>'cchu',
|
---|
1378 | 'ã£ã³ã'=>'bbya','ã£ã³ã'=>'bbye','ã£ã³ã'=>'bbyi','ã£ã³ã'=>'bbyo','ã£ã³ã
|
---|
1379 | '=>'bbyu',
|
---|
1380 | 'ã£ãŽã'=>'ppya','ã£ãŽã'=>'ppye','ã£ãŽã'=>'ppyi','ã£ãŽã'=>'ppyo','ã£ãŽã
|
---|
1381 | '=>'ppyu',
|
---|
1382 | 'ã£ã¡ã'=>'ccha','ã£ã¡ã'=>'cche','ã£ã¡'=>'cchi','ã£ã¡ã'=>'ccho','ã£ã¡ã
|
---|
1383 | '=>'cchu',
|
---|
1384 | // 'ã£ã²ã'=>'hya','ã£ã²ã'=>'hye','ã£ã²ã'=>'hyi','ã£ã²ã'=>'hyo','ã£ã²ã
|
---|
1385 | '=>'hyu',
|
---|
1386 | 'ã£ãã'=>'kkya','ã£ãã'=>'kkye','ã£ãã'=>'kkyi','ã£ãã'=>'kkyo','ã£ãã
|
---|
1387 | '=>'kkyu',
|
---|
1388 | 'ã£ãã'=>'ggya','ã£ãã'=>'ggye','ã£ãã'=>'ggyi','ã£ãã'=>'ggyo','ã£ãã
|
---|
1389 | '=>'ggyu',
|
---|
1390 | 'ã£ã¿ã'=>'mmya','ã£ã¿ã'=>'mmye','ã£ã¿ã'=>'mmyi','ã£ã¿ã'=>'mmyo','ã£ã¿ã
|
---|
1391 | '=>'mmyu',
|
---|
1392 | 'ã£ã«ã'=>'nnya','ã£ã«ã'=>'nnye','ã£ã«ã'=>'nnyi','ã£ã«ã'=>'nnyo','ã£ã«ã
|
---|
1393 | '=>'nnyu',
|
---|
1394 | 'ã£ãã'=>'rrya','ã£ãã'=>'rrye','ã£ãã'=>'rryi','ã£ãã'=>'rryo','ã£ãã
|
---|
1395 | '=>'rryu',
|
---|
1396 | 'ã£ãã'=>'ssha','ã£ãã'=>'sshe','ã£ã'=>'sshi','ã£ãã'=>'ssho','ã£ãã
|
---|
1397 | '=>'sshu',
|
---|
1398 |
|
---|
1399 | // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway)
|
---|
1400 | 'ãã'=>'n_a','ãã'=>'n_e','ãã'=>'n_i','ãã'=>'n_o','ãã'=>'n_u',
|
---|
1401 | 'ãã'=>'n_ya','ãã'=>'n_yo','ãã'=>'n_yu',
|
---|
1402 |
|
---|
1403 | // 2 character syllables - normal
|
---|
1404 | 'ãµã'=>'fa','ãµã'=>'fe','ãµã'=>'fi','ãµã'=>'fo',
|
---|
1405 | 'ã¡ã'=>'cha','ã¡ã'=>'che','ã¡'=>'chi','ã¡ã'=>'cho','ã¡ã
|
---|
1406 | '=>'chu',
|
---|
1407 | 'ã²ã'=>'hya','ã²ã'=>'hye','ã²ã'=>'hyi','ã²ã'=>'hyo','ã²ã
|
---|
1408 | '=>'hyu',
|
---|
1409 | 'ã³ã'=>'bya','ã³ã'=>'bye','ã³ã'=>'byi','ã³ã'=>'byo','ã³ã
|
---|
1410 | '=>'byu',
|
---|
1411 | 'ãŽã'=>'pya','ãŽã'=>'pye','ãŽã'=>'pyi','ãŽã'=>'pyo','ãŽã
|
---|
1412 | '=>'pyu',
|
---|
1413 | 'ãã'=>'kya','ãã'=>'kye','ãã'=>'kyi','ãã'=>'kyo','ãã
|
---|
1414 | '=>'kyu',
|
---|
1415 | 'ãã'=>'gya','ãã'=>'gye','ãã'=>'gyi','ãã'=>'gyo','ãã
|
---|
1416 | '=>'gyu',
|
---|
1417 | 'ã¿ã'=>'mya','ã¿ã'=>'mye','ã¿ã'=>'myi','ã¿ã'=>'myo','ã¿ã
|
---|
1418 | '=>'myu',
|
---|
1419 | 'ã«ã'=>'nya','ã«ã'=>'nye','ã«ã'=>'nyi','ã«ã'=>'nyo','ã«ã
|
---|
1420 | '=>'nyu',
|
---|
1421 | 'ãã'=>'rya','ãã'=>'rye','ãã'=>'ryi','ãã'=>'ryo','ãã
|
---|
1422 | '=>'ryu',
|
---|
1423 | 'ãã'=>'sha','ãã'=>'she','ã'=>'shi','ãã'=>'sho','ãã
|
---|
1424 | '=>'shu',
|
---|
1425 | 'ãã'=>'ja','ãã'=>'je','ãã'=>'jo','ãã
|
---|
1426 | '=>'ju',
|
---|
1427 | 'ãã'=>'we','ãã'=>'wi',
|
---|
1428 | 'ãã'=>'ye',
|
---|
1429 |
|
---|
1430 | // 2 character syllables, 㣠doubles the consonant after
|
---|
1431 | 'ã£ã°'=>'bba','ã£ã¹'=>'bbe','ã£ã³'=>'bbi','ã£ãŒ'=>'bbo','ã£ã¶'=>'bbu',
|
---|
1432 | 'ã£ã±'=>'ppa','ã£ãº'=>'ppe','ã£ãŽ'=>'ppi','ã£ãœ'=>'ppo','ã£ã·'=>'ppu',
|
---|
1433 | 'ã£ã'=>'tta','ã£ãŠ'=>'tte','ã£ã¡'=>'cchi','ã£ãš'=>'tto','ã£ã€'=>'ttsu',
|
---|
1434 | 'ã£ã '=>'dda','ã£ã§'=>'dde','ã£ã¢'=>'ddi','ã£ã©'=>'ddo','ã£ã¥'=>'ddu',
|
---|
1435 | 'ã£ã'=>'gga','ã£ã'=>'gge','ã£ã'=>'ggi','ã£ã'=>'ggo','ã£ã'=>'ggu',
|
---|
1436 | 'ã£ã'=>'kka','ã£ã'=>'kke','ã£ã'=>'kki','ã£ã'=>'kko','ã£ã'=>'kku',
|
---|
1437 | 'ã£ãŸ'=>'mma','ã£ã'=>'mme','ã£ã¿'=>'mmi','ã£ã'=>'mmo','ã£ã'=>'mmu',
|
---|
1438 | 'ã£ãª'=>'nna','ã£ã'=>'nne','ã£ã«'=>'nni','ã£ã®'=>'nno','ã£ã¬'=>'nnu',
|
---|
1439 | 'ã£ã'=>'rra','ã£ã'=>'rre','ã£ã'=>'rri','ã£ã'=>'rro','ã£ã'=>'rru',
|
---|
1440 | 'ã£ã'=>'ssa','ã£ã'=>'sse','ã£ã'=>'sshi','ã£ã'=>'sso','ã£ã'=>'ssu',
|
---|
1441 | 'ã£ã'=>'zza','ã£ã'=>'zze','ã£ã'=>'jji','ã£ã'=>'zzo','ã£ã'=>'zzu',
|
---|
1442 |
|
---|
1443 | // 1 character syllabels
|
---|
1444 | 'ã'=>'a','ã'=>'e','ã'=>'i','ã'=>'o','ã'=>'u','ã'=>'n',
|
---|
1445 | 'ã¯'=>'ha','ãž'=>'he','ã²'=>'hi','ã»'=>'ho','ãµ'=>'fu',
|
---|
1446 | 'ã°'=>'ba','ã¹'=>'be','ã³'=>'bi','ãŒ'=>'bo','ã¶'=>'bu',
|
---|
1447 | 'ã±'=>'pa','ãº'=>'pe','ãŽ'=>'pi','ãœ'=>'po','ã·'=>'pu',
|
---|
1448 | 'ã'=>'ta','ãŠ'=>'te','ã¡'=>'chi','ãš'=>'to','ã€'=>'tsu',
|
---|
1449 | 'ã '=>'da','ã§'=>'de','ã¢'=>'di','ã©'=>'do','ã¥'=>'du',
|
---|
1450 | 'ã'=>'ga','ã'=>'ge','ã'=>'gi','ã'=>'go','ã'=>'gu',
|
---|
1451 | 'ã'=>'ka','ã'=>'ke','ã'=>'ki','ã'=>'ko','ã'=>'ku',
|
---|
1452 | 'ãŸ'=>'ma','ã'=>'me','ã¿'=>'mi','ã'=>'mo','ã'=>'mu',
|
---|
1453 | 'ãª'=>'na','ã'=>'ne','ã«'=>'ni','ã®'=>'no','ã¬'=>'nu',
|
---|
1454 | 'ã'=>'ra','ã'=>'re','ã'=>'ri','ã'=>'ro','ã'=>'ru',
|
---|
1455 | 'ã'=>'sa','ã'=>'se','ã'=>'shi','ã'=>'so','ã'=>'su',
|
---|
1456 | 'ã'=>'wa','ã'=>'wo',
|
---|
1457 | 'ã'=>'za','ã'=>'ze','ã'=>'ji','ã'=>'zo','ã'=>'zu',
|
---|
1458 | 'ã'=>'ya','ã'=>'yo','ã'=>'yu',
|
---|
1459 | // old characters
|
---|
1460 | 'ã'=>'we','ã'=>'wi',
|
---|
1461 |
|
---|
1462 | // convert what's left (probably only kicks in when something's missing above)
|
---|
1463 | // 'ã'=>'a','ã'=>'e','ã'=>'i','ã'=>'o','ã
|
---|
1464 | '=>'u',
|
---|
1465 | // 'ã'=>'ya','ã'=>'yo','ã
|
---|
1466 | '=>'yu',
|
---|
1467 |
|
---|
1468 | // never seen one of those (disabled for the moment)
|
---|
1469 | // 'ãŽã'=>'va','ãŽã'=>'ve','ãŽã'=>'vi','ãŽã'=>'vo','ãŽ'=>'vu',
|
---|
1470 | // 'ã§ã'=>'dha','ã§ã'=>'dhe','ã§ã'=>'dhi','ã§ã'=>'dho','ã§ã
|
---|
1471 | '=>'dhu',
|
---|
1472 | // 'ã©ã'=>'dwa','ã©ã'=>'dwe','ã©ã'=>'dwi','ã©ã'=>'dwo','ã©ã
|
---|
1473 | '=>'dwu',
|
---|
1474 | // 'ã¢ã'=>'dya','ã¢ã'=>'dye','ã¢ã'=>'dyi','ã¢ã'=>'dyo','ã¢ã
|
---|
1475 | '=>'dyu',
|
---|
1476 | // 'ãµã'=>'fwa','ãµã'=>'fwe','ãµã'=>'fwi','ãµã'=>'fwo','ãµã
|
---|
1477 | '=>'fwu',
|
---|
1478 | // 'ãµã'=>'fya','ãµã'=>'fye','ãµã'=>'fyi','ãµã'=>'fyo','ãµã
|
---|
1479 | '=>'fyu',
|
---|
1480 | // 'ãã'=>'swa','ãã'=>'swe','ãã'=>'swi','ãã'=>'swo','ãã
|
---|
1481 | '=>'swu',
|
---|
1482 | // 'ãŠã'=>'tha','ãŠã'=>'the','ãŠã'=>'thi','ãŠã'=>'tho','ãŠã
|
---|
1483 | '=>'thu',
|
---|
1484 | // 'ã€ã'=>'tsa','ã€ã'=>'tse','ã€ã'=>'tsi','ã€ã'=>'tso','ã€'=>'tsu',
|
---|
1485 | // 'ãšã'=>'twa','ãšã'=>'twe','ãšã'=>'twi','ãšã'=>'two','ãšã
|
---|
1486 | '=>'twu',
|
---|
1487 | // 'ãŽã'=>'vya','ãŽã'=>'vye','ãŽã'=>'vyi','ãŽã'=>'vyo','ãŽã
|
---|
1488 | '=>'vyu',
|
---|
1489 | // 'ãã'=>'wha','ãã'=>'whe','ãã'=>'whi','ãã'=>'who','ãã
|
---|
1490 | '=>'whu',
|
---|
1491 | // 'ãã'=>'zha','ãã'=>'zhe','ãã'=>'zhi','ãã'=>'zho','ãã
|
---|
1492 | '=>'zhu',
|
---|
1493 | // 'ãã'=>'zya','ãã'=>'zye','ãã'=>'zyi','ãã'=>'zyo','ãã
|
---|
1494 | '=>'zyu',
|
---|
1495 |
|
---|
1496 | // 'spare' characters from other romanization systems
|
---|
1497 | // 'ã '=>'da','ã§'=>'de','ã¢'=>'di','ã©'=>'do','ã¥'=>'du',
|
---|
1498 | // 'ã'=>'la','ã'=>'le','ã'=>'li','ã'=>'lo','ã'=>'lu',
|
---|
1499 | // 'ã'=>'sa','ã'=>'se','ã'=>'si','ã'=>'so','ã'=>'su',
|
---|
1500 | // 'ã¡ã'=>'cya','ã¡ã'=>'cye','ã¡ã'=>'cyi','ã¡ã'=>'cyo','ã¡ã
|
---|
1501 | '=>'cyu',
|
---|
1502 | //'ãã'=>'jya','ãã'=>'jye','ãã'=>'jyi','ãã'=>'jyo','ãã
|
---|
1503 | '=>'jyu',
|
---|
1504 | //'ãã'=>'lya','ãã'=>'lye','ãã'=>'lyi','ãã'=>'lyo','ãã
|
---|
1505 | '=>'lyu',
|
---|
1506 | //'ãã'=>'sya','ãã'=>'sye','ãã'=>'syi','ãã'=>'syo','ãã
|
---|
1507 | '=>'syu',
|
---|
1508 | //'ã¡ã'=>'tya','ã¡ã'=>'tye','ã¡ã'=>'tyi','ã¡ã'=>'tyo','ã¡ã
|
---|
1509 | '=>'tyu',
|
---|
1510 | //'ã'=>'ci',,ã'=>'yi','ã¢'=>'dzi',
|
---|
1511 | //'ã£ãã'=>'jja','ã£ãã'=>'jje','ã£ã'=>'jji','ã£ãã'=>'jjo','ã£ãã
|
---|
1512 | '=>'jju',
|
---|
1513 |
|
---|
1514 |
|
---|
1515 | // Japanese katakana
|
---|
1516 |
|
---|
1517 | // 4 character syllables: ã doubles the consonant after, ㌠doubles the vowel before (usualy written with macron, but we don't want that in our URLs)
|
---|
1518 | 'ããã£ãŒ'=>'bbyaa','ããã§ãŒ'=>'bbyee','ããã£ãŒ'=>'bbyii','ããã§ãŒ'=>'bbyoo','ããã¥ãŒ'=>'bbyuu',
|
---|
1519 | 'ããã£ãŒ'=>'ppyaa','ããã§ãŒ'=>'ppyee','ããã£ãŒ'=>'ppyii','ããã§ãŒ'=>'ppyoo','ããã¥ãŒ'=>'ppyuu',
|
---|
1520 | 'ããã£ãŒ'=>'kkyaa','ããã§ãŒ'=>'kkyee','ããã£ãŒ'=>'kkyii','ããã§ãŒ'=>'kkyoo','ããã¥ãŒ'=>'kkyuu',
|
---|
1521 | 'ãã®ã£ãŒ'=>'ggyaa','ãã®ã§ãŒ'=>'ggyee','ãã®ã£ãŒ'=>'ggyii','ãã®ã§ãŒ'=>'ggyoo','ãã®ã¥ãŒ'=>'ggyuu',
|
---|
1522 | 'ããã£ãŒ'=>'mmyaa','ããã§ãŒ'=>'mmyee','ããã£ãŒ'=>'mmyii','ããã§ãŒ'=>'mmyoo','ããã¥ãŒ'=>'mmyuu',
|
---|
1523 | 'ããã£ãŒ'=>'nnyaa','ããã§ãŒ'=>'nnyee','ããã£ãŒ'=>'nnyii','ããã§ãŒ'=>'nnyoo','ããã¥ãŒ'=>'nnyuu',
|
---|
1524 | 'ããªã£ãŒ'=>'rryaa','ããªã§ãŒ'=>'rryee','ããªã£ãŒ'=>'rryii','ããªã§ãŒ'=>'rryoo','ããªã¥ãŒ'=>'rryuu',
|
---|
1525 | 'ãã·ã£ãŒ'=>'sshaa','ãã·ã§ãŒ'=>'sshee','ãã·ãŒ'=>'sshii','ãã·ã§ãŒ'=>'sshoo','ãã·ã¥ãŒ'=>'sshuu',
|
---|
1526 | 'ããã£ãŒ'=>'cchaa','ããã§ãŒ'=>'cchee','ãããŒ'=>'cchii','ããã§ãŒ'=>'cchoo','ããã¥ãŒ'=>'cchuu',
|
---|
1527 | 'ããã£ãŒ'=>'ttii',
|
---|
1528 | 'ããã£ãŒ'=>'ddii',
|
---|
1529 |
|
---|
1530 | // 3 character syllables - doubled vowels
|
---|
1531 | 'ãã¡ãŒ'=>'faa','ãã§ãŒ'=>'fee','ãã£ãŒ'=>'fii','ãã©ãŒ'=>'foo',
|
---|
1532 | 'ãã£ãŒ'=>'fyaa','ãã§ãŒ'=>'fyee','ãã£ãŒ'=>'fyii','ãã§ãŒ'=>'fyoo','ãã¥ãŒ'=>'fyuu',
|
---|
1533 | 'ãã£ãŒ'=>'hyaa','ãã§ãŒ'=>'hyee','ãã£ãŒ'=>'hyii','ãã§ãŒ'=>'hyoo','ãã¥ãŒ'=>'hyuu',
|
---|
1534 | 'ãã£ãŒ'=>'byaa','ãã§ãŒ'=>'byee','ãã£ãŒ'=>'byii','ãã§ãŒ'=>'byoo','ãã¥ãŒ'=>'byuu',
|
---|
1535 | 'ãã£ãŒ'=>'pyaa','ãã§ãŒ'=>'pyee','ãã£ãŒ'=>'pyii','ãã§ãŒ'=>'pyoo','ãã¥ãŒ'=>'pyuu',
|
---|
1536 | 'ãã£ãŒ'=>'kyaa','ãã§ãŒ'=>'kyee','ãã£ãŒ'=>'kyii','ãã§ãŒ'=>'kyoo','ãã¥ãŒ'=>'kyuu',
|
---|
1537 | 'ã®ã£ãŒ'=>'gyaa','ã®ã§ãŒ'=>'gyee','ã®ã£ãŒ'=>'gyii','ã®ã§ãŒ'=>'gyoo','ã®ã¥ãŒ'=>'gyuu',
|
---|
1538 | 'ãã£ãŒ'=>'myaa','ãã§ãŒ'=>'myee','ãã£ãŒ'=>'myii','ãã§ãŒ'=>'myoo','ãã¥ãŒ'=>'myuu',
|
---|
1539 | 'ãã£ãŒ'=>'nyaa','ãã§ãŒ'=>'nyee','ãã£ãŒ'=>'nyii','ãã§ãŒ'=>'nyoo','ãã¥ãŒ'=>'nyuu',
|
---|
1540 | 'ãªã£ãŒ'=>'ryaa','ãªã§ãŒ'=>'ryee','ãªã£ãŒ'=>'ryii','ãªã§ãŒ'=>'ryoo','ãªã¥ãŒ'=>'ryuu',
|
---|
1541 | 'ã·ã£ãŒ'=>'shaa','ã·ã§ãŒ'=>'shee','ã·ãŒ'=>'shii','ã·ã§ãŒ'=>'shoo','ã·ã¥ãŒ'=>'shuu',
|
---|
1542 | 'ãžã£ãŒ'=>'jaa','ãžã§ãŒ'=>'jee','ãžãŒ'=>'jii','ãžã§ãŒ'=>'joo','ãžã¥ãŒ'=>'juu',
|
---|
1543 | 'ã¹ã¡ãŒ'=>'swaa','ã¹ã§ãŒ'=>'swee','ã¹ã£ãŒ'=>'swii','ã¹ã©ãŒ'=>'swoo','ã¹ã¥ãŒ'=>'swuu',
|
---|
1544 | 'ãã¡ãŒ'=>'daa','ãã§ãŒ'=>'dee','ãã£ãŒ'=>'dii','ãã©ãŒ'=>'doo','ãã¥ãŒ'=>'duu',
|
---|
1545 | 'ãã£ãŒ'=>'chaa','ãã§ãŒ'=>'chee','ããŒ'=>'chii','ãã§ãŒ'=>'choo','ãã¥ãŒ'=>'chuu',
|
---|
1546 | 'ãã£ãŒ'=>'dyaa','ãã§ãŒ'=>'dyee','ãã£ãŒ'=>'dyii','ãã§ãŒ'=>'dyoo','ãã¥ãŒ'=>'dyuu',
|
---|
1547 | 'ãã£ãŒ'=>'tsaa','ãã§ãŒ'=>'tsee','ãã£ãŒ'=>'tsii','ãã§ãŒ'=>'tsoo','ããŒ'=>'tsuu',
|
---|
1548 | 'ãã¡ãŒ'=>'twaa','ãã§ãŒ'=>'twee','ãã£ãŒ'=>'twii','ãã©ãŒ'=>'twoo','ãã¥ãŒ'=>'twuu',
|
---|
1549 | 'ãã¡ãŒ'=>'dwaa','ãã§ãŒ'=>'dwee','ãã£ãŒ'=>'dwii','ãã©ãŒ'=>'dwoo','ãã¥ãŒ'=>'dwuu',
|
---|
1550 | 'ãŠã¡ãŒ'=>'whaa','ãŠã§ãŒ'=>'whee','ãŠã£ãŒ'=>'whii','ãŠã©ãŒ'=>'whoo','ãŠã¥ãŒ'=>'whuu',
|
---|
1551 | 'ãŽã£ãŒ'=>'vyaa','ãŽã§ãŒ'=>'vyee','ãŽã£ãŒ'=>'vyii','ãŽã§ãŒ'=>'vyoo','ãŽã¥ãŒ'=>'vyuu',
|
---|
1552 | 'ãŽã¡ãŒ'=>'vaa','ãŽã§ãŒ'=>'vee','ãŽã£ãŒ'=>'vii','ãŽã©ãŒ'=>'voo','ãŽãŒ'=>'vuu',
|
---|
1553 | 'ãŠã§ãŒ'=>'wee','ãŠã£ãŒ'=>'wii',
|
---|
1554 | 'ã€ã§ãŒ'=>'yee',
|
---|
1555 | 'ãã£ãŒ'=>'tii',
|
---|
1556 | 'ãã£ãŒ'=>'dii',
|
---|
1557 |
|
---|
1558 | // 3 character syllables - doubled consonants
|
---|
1559 | 'ããã£'=>'bbya','ããã§'=>'bbye','ããã£'=>'bbyi','ããã§'=>'bbyo','ããã¥'=>'bbyu',
|
---|
1560 | 'ããã£'=>'ppya','ããã§'=>'ppye','ããã£'=>'ppyi','ããã§'=>'ppyo','ããã¥'=>'ppyu',
|
---|
1561 | 'ããã£'=>'kkya','ããã§'=>'kkye','ããã£'=>'kkyi','ããã§'=>'kkyo','ããã¥'=>'kkyu',
|
---|
1562 | 'ãã®ã£'=>'ggya','ãã®ã§'=>'ggye','ãã®ã£'=>'ggyi','ãã®ã§'=>'ggyo','ãã®ã¥'=>'ggyu',
|
---|
1563 | 'ããã£'=>'mmya','ããã§'=>'mmye','ããã£'=>'mmyi','ããã§'=>'mmyo','ããã¥'=>'mmyu',
|
---|
1564 | 'ããã£'=>'nnya','ããã§'=>'nnye','ããã£'=>'nnyi','ããã§'=>'nnyo','ããã¥'=>'nnyu',
|
---|
1565 | 'ããªã£'=>'rrya','ããªã§'=>'rrye','ããªã£'=>'rryi','ããªã§'=>'rryo','ããªã¥'=>'rryu',
|
---|
1566 | 'ãã·ã£'=>'ssha','ãã·ã§'=>'sshe','ãã·'=>'sshi','ãã·ã§'=>'ssho','ãã·ã¥'=>'sshu',
|
---|
1567 | 'ããã£'=>'ccha','ããã§'=>'cche','ãã'=>'cchi','ããã§'=>'ccho','ããã¥'=>'cchu',
|
---|
1568 | 'ããã£'=>'tti',
|
---|
1569 | 'ããã£'=>'ddi',
|
---|
1570 |
|
---|
1571 | // 3 character syllables - doubled vowel and consonants
|
---|
1572 | 'ãããŒ'=>'bbaa','ãããŒ'=>'bbee','ãããŒ'=>'bbii','ãããŒ'=>'bboo','ãããŒ'=>'bbuu',
|
---|
1573 | 'ãããŒ'=>'ppaa','ãããŒ'=>'ppee','ãããŒ'=>'ppii','ãããŒ'=>'ppoo','ãããŒ'=>'ppuu',
|
---|
1574 | 'ãã±ãŒ'=>'kkee','ãããŒ'=>'kkii','ãã³ãŒ'=>'kkoo','ãã¯ãŒ'=>'kkuu','ãã«ãŒ'=>'kkaa',
|
---|
1575 | 'ãã¬ãŒ'=>'ggaa','ãã²ãŒ'=>'ggee','ãã®ãŒ'=>'ggii','ããŽãŒ'=>'ggoo','ãã°ãŒ'=>'gguu',
|
---|
1576 | 'ãããŒ'=>'maa','ãã¡ãŒ'=>'mee','ãããŒ'=>'mii','ãã¢ãŒ'=>'moo','ãã ãŒ'=>'muu',
|
---|
1577 | 'ãããŒ'=>'nnaa','ãããŒ'=>'nnee','ãããŒ'=>'nnii','ãããŒ'=>'nnoo','ãããŒ'=>'nnuu',
|
---|
1578 | 'ãã©ãŒ'=>'rraa','ãã¬ãŒ'=>'rree','ããªãŒ'=>'rrii','ãããŒ'=>'rroo','ãã«ãŒ'=>'rruu',
|
---|
1579 | 'ããµãŒ'=>'ssaa','ãã»ãŒ'=>'ssee','ãã·ãŒ'=>'sshii','ããœãŒ'=>'ssoo','ãã¹ãŒ'=>'ssuu',
|
---|
1580 | 'ãã¶ãŒ'=>'zzaa','ããŒãŒ'=>'zzee','ããžãŒ'=>'jjii','ããŸãŒ'=>'zzoo','ããºãŒ'=>'zzuu',
|
---|
1581 | 'ãã¿ãŒ'=>'ttaa','ãããŒ'=>'ttee','ãããŒ'=>'chii','ãããŒ'=>'ttoo','ãããŒ'=>'ttsuu',
|
---|
1582 | 'ãããŒ'=>'ddaa','ãããŒ'=>'ddee','ãããŒ'=>'ddii','ãããŒ'=>'ddoo','ãã
|
---|
1583 | ãŒ'=>'dduu',
|
---|
1584 |
|
---|
1585 | // 2 character syllables - normal
|
---|
1586 | 'ãã¡'=>'fa','ãã§'=>'fe','ãã£'=>'fi','ãã©'=>'fo','ãã¥'=>'fu',
|
---|
1587 | // 'ãã£'=>'fya','ãã§'=>'fye','ãã£'=>'fyi','ãã§'=>'fyo','ãã¥'=>'fyu',
|
---|
1588 | 'ãã£'=>'fa','ãã§'=>'fe','ãã£'=>'fi','ãã§'=>'fo','ãã¥'=>'fu',
|
---|
1589 | 'ãã£'=>'hya','ãã§'=>'hye','ãã£'=>'hyi','ãã§'=>'hyo','ãã¥'=>'hyu',
|
---|
1590 | 'ãã£'=>'bya','ãã§'=>'bye','ãã£'=>'byi','ãã§'=>'byo','ãã¥'=>'byu',
|
---|
1591 | 'ãã£'=>'pya','ãã§'=>'pye','ãã£'=>'pyi','ãã§'=>'pyo','ãã¥'=>'pyu',
|
---|
1592 | 'ãã£'=>'kya','ãã§'=>'kye','ãã£'=>'kyi','ãã§'=>'kyo','ãã¥'=>'kyu',
|
---|
1593 | 'ã®ã£'=>'gya','ã®ã§'=>'gye','ã®ã£'=>'gyi','ã®ã§'=>'gyo','ã®ã¥'=>'gyu',
|
---|
1594 | 'ãã£'=>'mya','ãã§'=>'mye','ãã£'=>'myi','ãã§'=>'myo','ãã¥'=>'myu',
|
---|
1595 | 'ãã£'=>'nya','ãã§'=>'nye','ãã£'=>'nyi','ãã§'=>'nyo','ãã¥'=>'nyu',
|
---|
1596 | 'ãªã£'=>'rya','ãªã§'=>'rye','ãªã£'=>'ryi','ãªã§'=>'ryo','ãªã¥'=>'ryu',
|
---|
1597 | 'ã·ã£'=>'sha','ã·ã§'=>'she','ã·ã§'=>'sho','ã·ã¥'=>'shu',
|
---|
1598 | 'ãžã£'=>'ja','ãžã§'=>'je','ãžã§'=>'jo','ãžã¥'=>'ju',
|
---|
1599 | 'ã¹ã¡'=>'swa','ã¹ã§'=>'swe','ã¹ã£'=>'swi','ã¹ã©'=>'swo','ã¹ã¥'=>'swu',
|
---|
1600 | 'ãã¡'=>'da','ãã§'=>'de','ãã£'=>'di','ãã©'=>'do','ãã¥'=>'du',
|
---|
1601 | 'ãã£'=>'cha','ãã§'=>'che','ã'=>'chi','ãã§'=>'cho','ãã¥'=>'chu',
|
---|
1602 | // 'ãã£'=>'dya','ãã§'=>'dye','ãã£'=>'dyi','ãã§'=>'dyo','ãã¥'=>'dyu',
|
---|
1603 | 'ãã£'=>'tsa','ãã§'=>'tse','ãã£'=>'tsi','ãã§'=>'tso','ã'=>'tsu',
|
---|
1604 | 'ãã¡'=>'twa','ãã§'=>'twe','ãã£'=>'twi','ãã©'=>'two','ãã¥'=>'twu',
|
---|
1605 | 'ãã¡'=>'dwa','ãã§'=>'dwe','ãã£'=>'dwi','ãã©'=>'dwo','ãã¥'=>'dwu',
|
---|
1606 | 'ãŠã¡'=>'wha','ãŠã§'=>'whe','ãŠã£'=>'whi','ãŠã©'=>'who','ãŠã¥'=>'whu',
|
---|
1607 | 'ãŽã£'=>'vya','ãŽã§'=>'vye','ãŽã£'=>'vyi','ãŽã§'=>'vyo','ãŽã¥'=>'vyu',
|
---|
1608 | 'ãŽã¡'=>'va','ãŽã§'=>'ve','ãŽã£'=>'vi','ãŽã©'=>'vo','ãŽ'=>'vu',
|
---|
1609 | 'ãŠã§'=>'we','ãŠã£'=>'wi',
|
---|
1610 | 'ã€ã§'=>'ye',
|
---|
1611 | 'ãã£'=>'ti',
|
---|
1612 | 'ãã£'=>'di',
|
---|
1613 |
|
---|
1614 | // 2 character syllables - doubled vocal
|
---|
1615 | 'ã¢ãŒ'=>'aa','ãšãŒ'=>'ee','ã€ãŒ'=>'ii','ãªãŒ'=>'oo','ãŠãŒ'=>'uu',
|
---|
1616 | 'ããŒ'=>'daa','ããŒ'=>'dee','ããŒ'=>'dii','ããŒ'=>'doo','ã
|
---|
1617 | ãŒ'=>'duu',
|
---|
1618 | 'ããŒ'=>'haa','ããŒ'=>'hee','ããŒ'=>'hii','ããŒ'=>'hoo','ããŒ'=>'fuu',
|
---|
1619 | 'ããŒ'=>'baa','ããŒ'=>'bee','ããŒ'=>'bii','ããŒ'=>'boo','ããŒ'=>'buu',
|
---|
1620 | 'ããŒ'=>'paa','ããŒ'=>'pee','ããŒ'=>'pii','ããŒ'=>'poo','ããŒ'=>'puu',
|
---|
1621 | 'ã±ãŒ'=>'kee','ããŒ'=>'kii','ã³ãŒ'=>'koo','ã¯ãŒ'=>'kuu','ã«ãŒ'=>'kaa',
|
---|
1622 | 'ã¬ãŒ'=>'gaa','ã²ãŒ'=>'gee','ã®ãŒ'=>'gii','ãŽãŒ'=>'goo','ã°ãŒ'=>'guu',
|
---|
1623 | 'ããŒ'=>'maa','ã¡ãŒ'=>'mee','ããŒ'=>'mii','ã¢ãŒ'=>'moo','ã ãŒ'=>'muu',
|
---|
1624 | 'ããŒ'=>'naa','ããŒ'=>'nee','ããŒ'=>'nii','ããŒ'=>'noo','ããŒ'=>'nuu',
|
---|
1625 | 'ã©ãŒ'=>'raa','ã¬ãŒ'=>'ree','ãªãŒ'=>'rii','ããŒ'=>'roo','ã«ãŒ'=>'ruu',
|
---|
1626 | 'ãµãŒ'=>'saa','ã»ãŒ'=>'see','ã·ãŒ'=>'shii','ãœãŒ'=>'soo','ã¹ãŒ'=>'suu',
|
---|
1627 | 'ã¶ãŒ'=>'zaa','ãŒãŒ'=>'zee','ãžãŒ'=>'jii','ãŸãŒ'=>'zoo','ãºãŒ'=>'zuu',
|
---|
1628 | 'ã¿ãŒ'=>'taa','ããŒ'=>'tee','ããŒ'=>'chii','ããŒ'=>'too','ããŒ'=>'tsuu',
|
---|
1629 | 'ã¯ãŒ'=>'waa','ã²ãŒ'=>'woo',
|
---|
1630 | 'ã€ãŒ'=>'yaa','ãšãŒ'=>'yoo','ãŠãŒ'=>'yuu',
|
---|
1631 | 'ãµãŒ'=>'kaa','ã¶ãŒ'=>'kee',
|
---|
1632 | // old characters
|
---|
1633 | 'ã±ãŒ'=>'wee','ã°ãŒ'=>'wii',
|
---|
1634 |
|
---|
1635 | // seperate katakana 'n'
|
---|
1636 | 'ã³ã¢'=>'n_a','ã³ãš'=>'n_e','ã³ã€'=>'n_i','ã³ãª'=>'n_o','ã³ãŠ'=>'n_u',
|
---|
1637 | 'ã³ã€'=>'n_ya','ã³ãš'=>'n_yo','ã³ãŠ'=>'n_yu',
|
---|
1638 |
|
---|
1639 | // 2 character syllables - doubled consonants
|
---|
1640 | 'ãã'=>'bba','ãã'=>'bbe','ãã'=>'bbi','ãã'=>'bbo','ãã'=>'bbu',
|
---|
1641 | 'ãã'=>'ppa','ãã'=>'ppe','ãã'=>'ppi','ãã'=>'ppo','ãã'=>'ppu',
|
---|
1642 | 'ãã±'=>'kke','ãã'=>'kki','ãã³'=>'kko','ãã¯'=>'kku','ãã«'=>'kka',
|
---|
1643 | 'ãã¬'=>'gga','ãã²'=>'gge','ãã®'=>'ggi','ããŽ'=>'ggo','ãã°'=>'ggu',
|
---|
1644 | 'ãã'=>'ma','ãã¡'=>'me','ãã'=>'mi','ãã¢'=>'mo','ãã '=>'mu',
|
---|
1645 | 'ãã'=>'nna','ãã'=>'nne','ãã'=>'nni','ãã'=>'nno','ãã'=>'nnu',
|
---|
1646 | 'ãã©'=>'rra','ãã¬'=>'rre','ããª'=>'rri','ãã'=>'rro','ãã«'=>'rru',
|
---|
1647 | 'ããµ'=>'ssa','ãã»'=>'sse','ãã·'=>'sshi','ããœ'=>'sso','ãã¹'=>'ssu',
|
---|
1648 | 'ãã¶'=>'zza','ããŒ'=>'zze','ããž'=>'jji','ããŸ'=>'zzo','ããº'=>'zzu',
|
---|
1649 | 'ãã¿'=>'tta','ãã'=>'tte','ãã'=>'cchi','ãã'=>'tto','ãã'=>'ttsu',
|
---|
1650 | 'ãã'=>'dda','ãã'=>'dde','ãã'=>'ddi','ãã'=>'ddo','ãã
|
---|
1651 | '=>'ddu',
|
---|
1652 |
|
---|
1653 | // 1 character syllables
|
---|
1654 | 'ã¢'=>'a','ãš'=>'e','ã€'=>'i','ãª'=>'o','ãŠ'=>'u','ã³'=>'n',
|
---|
1655 | 'ã'=>'ha','ã'=>'he','ã'=>'hi','ã'=>'ho','ã'=>'fu',
|
---|
1656 | 'ã'=>'ba','ã'=>'be','ã'=>'bi','ã'=>'bo','ã'=>'bu',
|
---|
1657 | 'ã'=>'pa','ã'=>'pe','ã'=>'pi','ã'=>'po','ã'=>'pu',
|
---|
1658 | 'ã±'=>'ke','ã'=>'ki','ã³'=>'ko','ã¯'=>'ku','ã«'=>'ka',
|
---|
1659 | 'ã¬'=>'ga','ã²'=>'ge','ã®'=>'gi','ãŽ'=>'go','ã°'=>'gu',
|
---|
1660 | 'ã'=>'ma','ã¡'=>'me','ã'=>'mi','ã¢'=>'mo','ã '=>'mu',
|
---|
1661 | 'ã'=>'na','ã'=>'ne','ã'=>'ni','ã'=>'no','ã'=>'nu',
|
---|
1662 | 'ã©'=>'ra','ã¬'=>'re','ãª'=>'ri','ã'=>'ro','ã«'=>'ru',
|
---|
1663 | 'ãµ'=>'sa','ã»'=>'se','ã·'=>'shi','ãœ'=>'so','ã¹'=>'su',
|
---|
1664 | 'ã¶'=>'za','ãŒ'=>'ze','ãž'=>'ji','ãŸ'=>'zo','ãº'=>'zu',
|
---|
1665 | 'ã¿'=>'ta','ã'=>'te','ã'=>'chi','ã'=>'to','ã'=>'tsu',
|
---|
1666 | 'ã'=>'da','ã'=>'de','ã'=>'di','ã'=>'do','ã
|
---|
1667 | '=>'du',
|
---|
1668 | 'ã¯'=>'wa','ã²'=>'wo',
|
---|
1669 | 'ã€'=>'ya','ãš'=>'yo','ãŠ'=>'yu',
|
---|
1670 | 'ãµ'=>'ka','ã¶'=>'ke',
|
---|
1671 | // old characters
|
---|
1672 | 'ã±'=>'we','ã°'=>'wi',
|
---|
1673 |
|
---|
1674 | // convert what's left (probably only kicks in when something's missing above)
|
---|
1675 | 'ã¡'=>'a','ã§'=>'e','ã£'=>'i','ã©'=>'o','ã¥'=>'u',
|
---|
1676 | 'ã£'=>'ya','ã§'=>'yo','ã¥'=>'yu',
|
---|
1677 |
|
---|
1678 | // special characters
|
---|
1679 | 'ã»'=>'_','ã'=>'_',
|
---|
1680 | 'ãŒ'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise
|
---|
1681 |
|
---|
1682 | // 'ã©'=>'la','ã¬'=>'le','ãª'=>'li','ã'=>'lo','ã«'=>'lu',
|
---|
1683 | // 'ãã£'=>'cya','ãã§'=>'cye','ãã£'=>'cyi','ãã§'=>'cyo','ãã¥'=>'cyu',
|
---|
1684 | //'ãã£'=>'dha','ãã§'=>'dhe','ãã£'=>'dhi','ãã§'=>'dho','ãã¥'=>'dhu',
|
---|
1685 | // 'ãªã£'=>'lya','ãªã§'=>'lye','ãªã£'=>'lyi','ãªã§'=>'lyo','ãªã¥'=>'lyu',
|
---|
1686 | // 'ãã£'=>'tha','ãã§'=>'the','ãã£'=>'thi','ãã§'=>'tho','ãã¥'=>'thu',
|
---|
1687 | //'ãã¡'=>'fwa','ãã§'=>'fwe','ãã£'=>'fwi','ãã©'=>'fwo','ãã¥'=>'fwu',
|
---|
1688 | //'ãã£'=>'tya','ãã§'=>'tye','ãã£'=>'tyi','ãã§'=>'tyo','ãã¥'=>'tyu',
|
---|
1689 | // 'ãžã£'=>'jya','ãžã§'=>'jye','ãžã£'=>'jyi','ãžã§'=>'jyo','ãžã¥'=>'jyu',
|
---|
1690 | // 'ãžã£'=>'zha','ãžã§'=>'zhe','ãžã£'=>'zhi','ãžã§'=>'zho','ãžã¥'=>'zhu',
|
---|
1691 | //'ãžã£'=>'zya','ãžã§'=>'zye','ãžã£'=>'zyi','ãžã§'=>'zyo','ãžã¥'=>'zyu',
|
---|
1692 | //'ã·ã£'=>'sya','ã·ã§'=>'sye','ã·ã£'=>'syi','ã·ã§'=>'syo','ã·ã¥'=>'syu',
|
---|
1693 | //'ã·'=>'ci','ã'=>'hu',ã·'=>'si','ã'=>'ti','ã'=>'tu','ã€'=>'yi','ã'=>'dzi',
|
---|
1694 |
|
---|
1695 | // "Greeklish"
|
---|
1696 | 'Î'=>'G','Î'=>'E','Î'=>'Th','Î'=>'L','Î'=>'X','Î '=>'P','Σ'=>'S','Ί'=>'F','Κ'=>'Ps',
|
---|
1697 | 'γ'=>'g','ÎŽ'=>'e','Ξ'=>'th','λ'=>'l','Ο'=>'x','Ï'=>'p','Ï'=>'s','Ï'=>'f','Ï'=>'ps',
|
---|
1698 |
|
---|
1699 | // Thai
|
---|
1700 | 'àž'=>'k','àž'=>'kh','àž'=>'kh','àž'=>'kh','àž
|
---|
1701 | '=>'kh','àž'=>'kh','àž'=>'ng','àž'=>'ch',
|
---|
1702 | 'àž'=>'ch','àž'=>'ch','àž'=>'s','àž'=>'ch','àž'=>'y','àž'=>'d','àž'=>'t','àž'=>'th',
|
---|
1703 | 'àž'=>'d','àž'=>'th','àž'=>'n','àž'=>'d','àž'=>'t','àž'=>'th','àž'=>'th','àž'=>'th',
|
---|
1704 | 'àž'=>'n','àž'=>'b','àž'=>'p','àž'=>'ph','àž'=>'f','àž'=>'ph','àž'=>'f','àž '=>'ph',
|
---|
1705 | 'àž¡'=>'m','àž¢'=>'y','àž£'=>'r','àž€'=>'rue','àž€à¹
|
---|
1706 | '=>'rue','àž¥'=>'l','àžŠ'=>'lue',
|
---|
1707 | 'àžŠà¹
|
---|
1708 | '=>'lue','àž§'=>'w','àžš'=>'s','àž©'=>'s','àžª'=>'s','àž«'=>'h','àž¬'=>'l','àž®'=>'h',
|
---|
1709 | 'àž°'=>'a','àž±'=>'a','àž£àž£'=>'a','àž²'=>'a','à¹
|
---|
1710 | '=>'a','àž³'=>'am','à¹àž²'=>'am',
|
---|
1711 | 'àžŽ'=>'i','àžµ'=>'i','àž¶'=>'ue','àžµ'=>'ue','àžž'=>'u','àž¹'=>'u',
|
---|
1712 | 'à¹'=>'e','à¹'=>'ae','à¹'=>'o','àž'=>'o',
|
---|
1713 | 'àžµàž¢àž°'=>'ia','àžµàž¢'=>'ia','àž·àžàž°'=>'uea','àž·àž'=>'uea','àž±àž§àž°'=>'ua','àž±àž§'=>'ua',
|
---|
1714 | 'à¹'=>'ai','à¹'=>'ai','àž±àž¢'=>'ai','àž²àž¢'=>'ai','àž²àž§'=>'ao',
|
---|
1715 | 'àžžàž¢'=>'ui','àžàž¢'=>'oi','àž·àžàž¢'=>'ueai','àž§àž¢'=>'uai',
|
---|
1716 | 'àžŽàž§'=>'io','à¹àž§'=>'eo','àžµàž¢àž§'=>'iao',
|
---|
1717 | 'à¹'=>'','à¹'=>'','à¹'=>'','à¹'=>'','à¹'=>'',
|
---|
1718 | 'à¹'=>'','à¹'=>'','à¹'=>'','àžº'=>'',
|
---|
1719 | 'à¹'=>'2','à¹'=>'o','àž¯'=>'-','à¹'=>'-','à¹'=>'-',
|
---|
1720 | 'à¹'=>'0','à¹'=>'1','à¹'=>'2','à¹'=>'3','à¹'=>'4',
|
---|
1721 | 'à¹'=>'5','à¹'=>'6','à¹'=>'7','à¹'=>'8','à¹'=>'9',
|
---|
1722 |
|
---|
1723 | // Korean
|
---|
1724 | 'ã±'=>'k','ã
|
---|
1725 | '=>'kh','ã²'=>'kk','ã·'=>'t','ã
|
---|
1726 | '=>'th','ãž'=>'tt','ã
|
---|
1727 | '=>'p',
|
---|
1728 | 'ã
|
---|
1729 | '=>'ph','ã
|
---|
1730 | '=>'pp','ã
|
---|
1731 | '=>'c','ã
|
---|
1732 | '=>'ch','ã
|
---|
1733 | '=>'cc','ã
|
---|
1734 |
|
---|
1735 | '=>'s','ã
|
---|
1736 | '=>'ss',
|
---|
1737 | 'ã
|
---|
1738 | '=>'h','ã
|
---|
1739 | '=>'ng','ãŽ'=>'n','ã¹'=>'l','ã
|
---|
1740 | '=>'m', 'ã
|
---|
1741 | '=>'a','ã
|
---|
1742 | '=>'e','ã
|
---|
1743 | '=>'o',
|
---|
1744 | 'ã
|
---|
1745 | '=>'wu','ã
|
---|
1746 | ¡'=>'u','ã
|
---|
1747 | £'=>'i','ã
|
---|
1748 | '=>'ay','ã
|
---|
1749 | '=>'ey','ã
|
---|
1750 | '=>'oy','ã
|
---|
1751 | '=>'wa','ã
|
---|
1752 | '=>'we',
|
---|
1753 | 'ã
|
---|
1754 | '=>'wi','ã
|
---|
1755 | '=>'way','ã
|
---|
1756 | '=>'wey','ã
|
---|
1757 | ¢'=>'uy','ã
|
---|
1758 | '=>'ya','ã
|
---|
1759 | '=>'ye','ã
|
---|
1760 | '=>'oy',
|
---|
1761 | 'ã
|
---|
1762 | '=>'yu','ã
|
---|
1763 | '=>'yay','ã
|
---|
1764 | '=>'yey',
|
---|
1765 | );
|
---|
1766 |
|
---|
1767 |
|
---|