Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: documentation/trunk/packages/dokuwiki-2011-05-25a/inc/utf8.php@ 30098

Last change on this file since 30098 was 25027, checked in by jmt12, 12 years ago
Adding the packages directory, and within it a configured version of dokuwiki all ready to run
File size: 83.4 KB

Line
1	<?php
2	/**
3	* UTF8 helper functions
4	*
5	* @license LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html)
6	* @author Andreas Gohr <[email protected]>
7	*/
8
9	/**
10	* check for mb_string support
11	*/
12	if(!defined('UTF8_MBSTRING')){
13	if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){
14	define('UTF8_MBSTRING',1);
15	}else{
16	define('UTF8_MBSTRING',0);
17	}
18	}
19
20	if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); }
21
22	if(!function_exists('utf8_isASCII')){
23	/**
24	* Checks if a string contains 7bit ASCII only
25	*
26	* @author Andreas Haerter <[email protected]>
27	*/
28	function utf8_isASCII($str){
29	return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1);
30	}
31	}
32
33	if(!function_exists('utf8_strip')){
34	/**
35	* Strips all highbyte chars
36	*
37	* Returns a pure ASCII7 string
38	*
39	* @author Andreas Gohr <[email protected]>
40	*/
41	function utf8_strip($str){
42	$ascii = '';
43	$len = strlen($str);
44	for($i=0; $i<$len; $i++){
45	if(ord($str{$i}) <128){
46	$ascii .= $str{$i};
47	}
48	}
49	return $ascii;
50	}
51	}
52
53	if(!function_exists('utf8_check')){
54	/**
55	* Tries to detect if a string is in Unicode encoding
56	*
57	* @author <[email protected]>
58	* @link http://www.php.net/manual/en/function.utf8-encode.php
59	*/
60	function utf8_check($Str) {
61	$len = strlen($Str);
62	for ($i=0; $i<$len; $i++) {
63	$b = ord($Str[$i]);
64	if ($b < 0x80) continue; # 0bbbbbbb
65	elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb
66	elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb
67	elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb
68	elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb
69	elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b
70	else return false; # Does not match any model
71
72	for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
73	if ((++$i == $len) \|\| ((ord($Str[$i]) & 0xC0) != 0x80))
74	return false;
75	}
76	}
77	return true;
78	}
79	}
80
81	if(!function_exists('utf8_strlen')){
82	/**
83	* Unicode aware replacement for strlen()
84	*
85	* utf8_decode() converts characters that are not in ISO-8859-1
86	* to '?', which, for the purpose of counting, is alright - It's
87	* even faster than mb_strlen.
88	*
89	* @author <chernyshevsky at hotmail dot com>
90	* @see strlen()
91	* @see utf8_decode()
92	*/
93	function utf8_strlen($string){
94	return strlen(utf8_decode($string));
95	}
96	}
97
98	if(!function_exists('utf8_substr')){
99	/**
100	* UTF-8 aware alternative to substr
101	*
102	* Return part of a string given character offset (and optionally length)
103	*
104	* @author Harry Fuecks <[email protected]>
105	* @author Chris Smith <[email protected]>
106	* @param string
107	* @param integer number of UTF-8 characters offset (from left)
108	* @param integer (optional) length in UTF-8 characters from offset
109	* @return mixed string or false if failure
110	*/
111	function utf8_substr($str, $offset, $length = null) {
112	if(UTF8_MBSTRING){
113	if( $length === null ){
114	return mb_substr($str, $offset);
115	}else{
116	return mb_substr($str, $offset, $length);
117	}
118	}
119
120	/*
121	* Notes:
122	*
123	* no mb string support, so we'll use pcre regex's with 'u' flag
124	* pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
125	* offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
126	*
127	* substr documentation states false can be returned in some cases (e.g. offset > string length)
128	* mb_substr never returns false, it will return an empty string instead.
129	*
130	* calculating the number of characters in the string is a relatively expensive operation, so
131	* we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
132	*/
133
134	// cast parameters to appropriate types to avoid multiple notices/warnings
135	$str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects
136	$offset = (int)$offset;
137	if (!is_null($length)) $length = (int)$length;
138
139	// handle trivial cases
140	if ($length === 0) return '';
141	if ($offset < 0 && $length < 0 && $length < $offset) return '';
142
143	$offset_pattern = '';
144	$length_pattern = '';
145
146	// normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
147	if ($offset < 0) {
148	$strlen = strlen(utf8_decode($str)); // see notes
149	$offset = $strlen + $offset;
150	if ($offset < 0) $offset = 0;
151	}
152
153	// establish a pattern for offset, a non-captured group equal in length to offset
154	if ($offset > 0) {
155	$Ox = (int)($offset/65535);
156	$Oy = $offset%65535;
157
158	if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}';
159	$offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})';
160	} else {
161	$offset_pattern = '^'; // offset == 0; just anchor the pattern
162	}
163
164	// establish a pattern for length
165	if (is_null($length)) {
166	$length_pattern = '(.*)$'; // the rest of the string
167	} else {
168
169	if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes
170	if ($offset > $strlen) return ''; // another trivial case
171
172	if ($length > 0) {
173
174	$length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string
175
176	$Lx = (int)($length/65535);
177	$Ly = $length%65535;
178
179	// +ve length requires ... a captured group of length characters
180	if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}';
181	$length_pattern = '('.$length_pattern.'.{'.$Ly.'})';
182
183	} else if ($length < 0) {
184
185	if ($length < ($offset - $strlen)) return '';
186
187	$Lx = (int)((-$length)/65535);
188	$Ly = (-$length)%65535;
189
190	// -ve length requires ... capture everything except a group of -length characters
191	// anchored at the tail-end of the string
192	if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}';
193	$length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$';
194	}
195	}
196
197	if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return '';
198	return $match[1];
199	}
200	}
201
202	if(!function_exists('utf8_substr_replace')){
203	/**
204	* Unicode aware replacement for substr_replace()
205	*
206	* @author Andreas Gohr <[email protected]>
207	* @see substr_replace()
208	*/
209	function utf8_substr_replace($string, $replacement, $start , $length=0 ){
210	$ret = '';
211	if($start>0) $ret .= utf8_substr($string, 0, $start);
212	$ret .= $replacement;
213	$ret .= utf8_substr($string, $start+$length);
214	return $ret;
215	}
216	}
217
218	if(!function_exists('utf8_ltrim')){
219	/**
220	* Unicode aware replacement for ltrim()
221	*
222	* @author Andreas Gohr <[email protected]>
223	* @see ltrim()
224	* @return string
225	*/
226	function utf8_ltrim($str,$charlist=''){
227	if($charlist == '') return ltrim($str);
228
229	//quote charlist for use in a characterclass
230	$charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist);
231
232	return preg_replace('/^['.$charlist.']+/u','',$str);
233	}
234	}
235
236	if(!function_exists('utf8_rtrim')){
237	/**
238	* Unicode aware replacement for rtrim()
239	*
240	* @author Andreas Gohr <[email protected]>
241	* @see rtrim()
242	* @return string
243	*/
244	function utf8_rtrim($str,$charlist=''){
245	if($charlist == '') return rtrim($str);
246
247	//quote charlist for use in a characterclass
248	$charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist);
249
250	return preg_replace('/['.$charlist.']+$/u','',$str);
251	}
252	}
253
254	if(!function_exists('utf8_trim')){
255	/**
256	* Unicode aware replacement for trim()
257	*
258	* @author Andreas Gohr <[email protected]>
259	* @see trim()
260	* @return string
261	*/
262	function utf8_trim($str,$charlist='') {
263	if($charlist == '') return trim($str);
264
265	return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist);
266	}
267	}
268
269	if(!function_exists('utf8_strtolower')){
270	/**
271	* This is a unicode aware replacement for strtolower()
272	*
273	* Uses mb_string extension if available
274	*
275	* @author Leo Feyer <[email protected]>
276	* @see strtolower()
277	* @see utf8_strtoupper()
278	*/
279	function utf8_strtolower($string){
280	if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8');
281
282	global $UTF8_UPPER_TO_LOWER;
283	return strtr($string,$UTF8_UPPER_TO_LOWER);
284	}
285	}
286
287	if(!function_exists('utf8_strtoupper')){
288	/**
289	* This is a unicode aware replacement for strtoupper()
290	*
291	* Uses mb_string extension if available
292	*
293	* @author Leo Feyer <[email protected]>
294	* @see strtoupper()
295	* @see utf8_strtoupper()
296	*/
297	function utf8_strtoupper($string){
298	if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8');
299
300	global $UTF8_LOWER_TO_UPPER;
301	return strtr($string,$UTF8_LOWER_TO_UPPER);
302	}
303	}
304
305	if(!function_exists('utf8_ucfirst')){
306	/**
307	* UTF-8 aware alternative to ucfirst
308	* Make a string's first character uppercase
309	*
310	* @author Harry Fuecks
311	* @param string
312	* @return string with first character as upper case (if applicable)
313	*/
314	function utf8_ucfirst($str){
315	switch ( utf8_strlen($str) ) {
316	case 0:
317	return '';
318	case 1:
319	return utf8_strtoupper($str);
320	default:
321	preg_match('/^(.{1})(.*)$/us', $str, $matches);
322	return utf8_strtoupper($matches[1]).$matches[2];
323	}
324	}
325	}
326
327	if(!function_exists('utf8_ucwords')){
328	/**
329	* UTF-8 aware alternative to ucwords
330	* Uppercase the first character of each word in a string
331	*
332	* @author Harry Fuecks
333	* @param string
334	* @return string with first char of each word uppercase
335	* @see http://www.php.net/ucwords
336	*/
337	function utf8_ucwords($str) {
338	// Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
339	// form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
340	// This corresponds to the definition of a "word" defined at http://www.php.net/ucwords
341	$pattern = '/(^\|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
342
343	return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str);
344	}
345
346	/**
347	* Callback function for preg_replace_callback call in utf8_ucwords
348	* You don't need to call this yourself
349	*
350	* @author Harry Fuecks
351	* @param array of matches corresponding to a single word
352	* @return string with first char of the word in uppercase
353	* @see utf8_ucwords
354	* @see utf8_strtoupper
355	*/
356	function utf8_ucwords_callback($matches) {
357	$leadingws = $matches[2];
358	$ucfirst = utf8_strtoupper($matches[3]);
359	$ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1);
360	return $leadingws . $ucword;
361	}
362	}
363
364	if(!function_exists('utf8_deaccent')){
365	/**
366	* Replace accented UTF-8 characters by unaccented ASCII-7 equivalents
367	*
368	* Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
369	* letters. Default is to deaccent both cases ($case = 0)
370	*
371	* @author Andreas Gohr <[email protected]>
372	*/
373	function utf8_deaccent($string,$case=0){
374	if($case <= 0){
375	global $UTF8_LOWER_ACCENTS;
376	$string = strtr($string,$UTF8_LOWER_ACCENTS);
377	}
378	if($case >= 0){
379	global $UTF8_UPPER_ACCENTS;
380	$string = strtr($string,$UTF8_UPPER_ACCENTS);
381	}
382	return $string;
383	}
384	}
385
386	if(!function_exists('utf8_romanize')){
387	/**
388	* Romanize a non-latin string
389	*
390	* @author Andreas Gohr <[email protected]>
391	*/
392	function utf8_romanize($string){
393	if(utf8_isASCII($string)) return $string; //nothing to do
394
395	global $UTF8_ROMANIZATION;
396	return strtr($string,$UTF8_ROMANIZATION);
397	}
398	}
399
400	if(!function_exists('utf8_stripspecials')){
401	/**
402	* Removes special characters (nonalphanumeric) from a UTF-8 string
403	*
404	* This function adds the controlchars 0x00 to 0x19 to the array of
405	* stripped chars (they are not included in $UTF8_SPECIAL_CHARS)
406	*
407	* @author Andreas Gohr <[email protected]>
408	* @param string $string The UTF8 string to strip of special chars
409	* @param string $repl Replace special with this string
410	* @param string $additional Additional chars to strip (used in regexp char class)
411	*/
412	function utf8_stripspecials($string,$repl='',$additional=''){
413	global $UTF8_SPECIAL_CHARS;
414	global $UTF8_SPECIAL_CHARS2;
415
416	static $specials = null;
417	if(is_null($specials)){
418	#$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/');
419	$specials = preg_quote($UTF8_SPECIAL_CHARS2, '/');
420	}
421
422	return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string);
423	}
424	}
425
426	if(!function_exists('utf8_strpos')){
427	/**
428	* This is an Unicode aware replacement for strpos
429	*
430	* @author Leo Feyer <[email protected]>
431	* @see strpos()
432	* @param string
433	* @param string
434	* @param integer
435	* @return integer
436	*/
437	function utf8_strpos($haystack, $needle, $offset=0){
438	$comp = 0;
439	$length = null;
440
441	while (is_null($length) \|\| $length < $offset) {
442	$pos = strpos($haystack, $needle, $offset + $comp);
443
444	if ($pos === false)
445	return false;
446
447	$length = utf8_strlen(substr($haystack, 0, $pos));
448
449	if ($length < $offset)
450	$comp = $pos - $length;
451	}
452
453	return $length;
454	}
455	}
456
457	if(!function_exists('utf8_tohtml')){
458	/**
459	* Encodes UTF-8 characters to HTML entities
460	*
461	* @author Tom N Harris <[email protected]>
462	* @author <vpribish at shopping dot com>
463	* @link http://www.php.net/manual/en/function.utf8-decode.php
464	*/
465	function utf8_tohtml ($str) {
466	$ret = '';
467	foreach (utf8_to_unicode($str) as $cp) {
468	if ($cp < 0x80)
469	$ret .= chr($cp);
470	elseif ($cp < 0x100)
471	$ret .= "&#$cp;";
472	else
473	$ret .= '&#x'.dechex($cp).';';
474	}
475	return $ret;
476	}
477	}
478
479	if(!function_exists('utf8_unhtml')){
480	/**
481	* Decodes HTML entities to UTF-8 characters
482	*
483	* Convert any &#..; entity to a codepoint,
484	* The entities flag defaults to only decoding numeric entities.
485	* Pass HTML_ENTITIES and named entities, including & < etc.
486	* are handled as well. Avoids the problem that would occur if you
487	* had to decode "&#38;&amp;#38;"
488	*
489	* unhtmlspecialchars(utf8_unhtml($s)) -> "&&"
490	* utf8_unhtml(unhtmlspecialchars($s)) -> "&&amp#38;"
491	* what it should be -> "&&amp#38;"
492	*
493	* @author Tom N Harris <[email protected]>
494	* @param string $str UTF-8 encoded string
495	* @param boolean $entities Flag controlling decoding of named entities.
496	* @return UTF-8 encoded string with numeric (and named) entities replaced.
497	*/
498	function utf8_unhtml($str, $entities=null) {
499	static $decoder = null;
500	if (is_null($decoder))
501	$decoder = new utf8_entity_decoder();
502	if (is_null($entities))
503	return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m',
504	'utf8_decode_numeric', $str);
505	else
506	return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m',
507	array(&$decoder, 'decode'), $str);
508	}
509	}
510
511	if(!function_exists('utf8_decode_numeric')){
512	function utf8_decode_numeric($ent) {
513	switch ($ent[2]) {
514	case 'X':
515	case 'x':
516	$cp = hexdec($ent[3]);
517	break;
518	default:
519	$cp = intval($ent[3]);
520	break;
521	}
522	return unicode_to_utf8(array($cp));
523	}
524	}
525
526	if(!class_exists('utf8_entity_decoder')){
527	class utf8_entity_decoder {
528	var $table;
529	function utf8_entity_decoder() {
530	$table = get_html_translation_table(HTML_ENTITIES);
531	$table = array_flip($table);
532	$this->table = array_map(array(&$this,'makeutf8'), $table);
533	}
534	function makeutf8($c) {
535	return unicode_to_utf8(array(ord($c)));
536	}
537	function decode($ent) {
538	if ($ent[1] == '#') {
539	return utf8_decode_numeric($ent);
540	} elseif (array_key_exists($ent[0],$this->table)) {
541	return $this->table[$ent[0]];
542	} else {
543	return $ent[0];
544	}
545	}
546	}
547	}
548
549	if(!function_exists('utf8_to_unicode')){
550	/**
551	* Takes an UTF-8 string and returns an array of ints representing the
552	* Unicode characters. Astral planes are supported ie. the ints in the
553	* output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
554	* are not allowed.
555	*
556	* If $strict is set to true the function returns false if the input
557	* string isn't a valid UTF-8 octet sequence and raises a PHP error at
558	* level E_USER_WARNING
559	*
560	* Note: this function has been modified slightly in this library to
561	* trigger errors on encountering bad bytes
562	*
563	* @author <[email protected]>
564	* @author Harry Fuecks <[email protected]>
565	* @param string UTF-8 encoded string
566	* @param boolean Check for invalid sequences?
567	* @return mixed array of unicode code points or false if UTF-8 invalid
568	* @see unicode_to_utf8
569	* @link http://hsivonen.iki.fi/php-utf8/
570	* @link http://sourceforge.net/projects/phputf8/
571	*/
572	function utf8_to_unicode($str,$strict=false) {
573	$mState = 0; // cached expected number of octets after the current octet
574	// until the beginning of the next UTF8 character sequence
575	$mUcs4 = 0; // cached Unicode character
576	$mBytes = 1; // cached expected number of octets in the current sequence
577
578	$out = array();
579
580	$len = strlen($str);
581
582	for($i = 0; $i < $len; $i++) {
583
584	$in = ord($str{$i});
585
586	if ( $mState == 0) {
587
588	// When mState is zero we expect either a US-ASCII character or a
589	// multi-octet sequence.
590	if (0 == (0x80 & ($in))) {
591	// US-ASCII, pass straight through.
592	$out[] = $in;
593	$mBytes = 1;
594
595	} else if (0xC0 == (0xE0 & ($in))) {
596	// First octet of 2 octet sequence
597	$mUcs4 = ($in);
598	$mUcs4 = ($mUcs4 & 0x1F) << 6;
599	$mState = 1;
600	$mBytes = 2;
601
602	} else if (0xE0 == (0xF0 & ($in))) {
603	// First octet of 3 octet sequence
604	$mUcs4 = ($in);
605	$mUcs4 = ($mUcs4 & 0x0F) << 12;
606	$mState = 2;
607	$mBytes = 3;
608
609	} else if (0xF0 == (0xF8 & ($in))) {
610	// First octet of 4 octet sequence
611	$mUcs4 = ($in);
612	$mUcs4 = ($mUcs4 & 0x07) << 18;
613	$mState = 3;
614	$mBytes = 4;
615
616	} else if (0xF8 == (0xFC & ($in))) {
617	/* First octet of 5 octet sequence.
618	*
619	* This is illegal because the encoded codepoint must be either
620	* (a) not the shortest form or
621	* (b) outside the Unicode range of 0-0x10FFFF.
622	* Rather than trying to resynchronize, we will carry on until the end
623	* of the sequence and let the later error handling code catch it.
624	*/
625	$mUcs4 = ($in);
626	$mUcs4 = ($mUcs4 & 0x03) << 24;
627	$mState = 4;
628	$mBytes = 5;
629
630	} else if (0xFC == (0xFE & ($in))) {
631	// First octet of 6 octet sequence, see comments for 5 octet sequence.
632	$mUcs4 = ($in);
633	$mUcs4 = ($mUcs4 & 1) << 30;
634	$mState = 5;
635	$mBytes = 6;
636
637	} elseif($strict) {
638	/* Current octet is neither in the US-ASCII range nor a legal first
639	* octet of a multi-octet sequence.
640	*/
641	trigger_error(
642	'utf8_to_unicode: Illegal sequence identifier '.
643	'in UTF-8 at byte '.$i,
644	E_USER_WARNING
645	);
646	return false;
647
648	}
649
650	} else {
651
652	// When mState is non-zero, we expect a continuation of the multi-octet
653	// sequence
654	if (0x80 == (0xC0 & ($in))) {
655
656	// Legal continuation.
657	$shift = ($mState - 1) * 6;
658	$tmp = $in;
659	$tmp = ($tmp & 0x0000003F) << $shift;
660	$mUcs4 \|= $tmp;
661
662	/**
663	* End of the multi-octet sequence. mUcs4 now contains the final
664	* Unicode codepoint to be output
665	*/
666	if (0 == --$mState) {
667
668	/*
669	* Check for illegal sequences and codepoints.
670	*/
671	// From Unicode 3.1, non-shortest form is illegal
672	if (((2 == $mBytes) && ($mUcs4 < 0x0080)) \|\|
673	((3 == $mBytes) && ($mUcs4 < 0x0800)) \|\|
674	((4 == $mBytes) && ($mUcs4 < 0x10000)) \|\|
675	(4 < $mBytes) \|\|
676	// From Unicode 3.2, surrogate characters are illegal
677	(($mUcs4 & 0xFFFFF800) == 0xD800) \|\|
678	// Codepoints outside the Unicode range are illegal
679	($mUcs4 > 0x10FFFF)) {
680
681	if($strict){
682	trigger_error(
683	'utf8_to_unicode: Illegal sequence or codepoint '.
684	'in UTF-8 at byte '.$i,
685	E_USER_WARNING
686	);
687
688	return false;
689	}
690
691	}
692
693	if (0xFEFF != $mUcs4) {
694	// BOM is legal but we don't want to output it
695	$out[] = $mUcs4;
696	}
697
698	//initialize UTF8 cache
699	$mState = 0;
700	$mUcs4 = 0;
701	$mBytes = 1;
702	}
703
704	} elseif($strict) {
705	/**
706	((0xC0 & (in) != 0x80) && (mState != 0))
707	* Incomplete multi-octet sequence.
708	*/
709	trigger_error(
710	'utf8_to_unicode: Incomplete multi-octet '.
711	' sequence in UTF-8 at byte '.$i,
712	E_USER_WARNING
713	);
714
715	return false;
716	}
717	}
718	}
719	return $out;
720	}
721	}
722
723	if(!function_exists('unicode_to_utf8')){
724	/**
725	* Takes an array of ints representing the Unicode characters and returns
726	* a UTF-8 string. Astral planes are supported ie. the ints in the
727	* input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
728	* are not allowed.
729	*
730	* If $strict is set to true the function returns false if the input
731	* array contains ints that represent surrogates or are outside the
732	* Unicode range and raises a PHP error at level E_USER_WARNING
733	*
734	* Note: this function has been modified slightly in this library to use
735	* output buffering to concatenate the UTF-8 string (faster) as well as
736	* reference the array by it's keys
737	*
738	* @param array of unicode code points representing a string
739	* @param boolean Check for invalid sequences?
740	* @return mixed UTF-8 string or false if array contains invalid code points
741	* @author <[email protected]>
742	* @author Harry Fuecks <[email protected]>
743	* @see utf8_to_unicode
744	* @link http://hsivonen.iki.fi/php-utf8/
745	* @link http://sourceforge.net/projects/phputf8/
746	*/
747	function unicode_to_utf8($arr,$strict=false) {
748	if (!is_array($arr)) return '';
749	ob_start();
750
751	foreach (array_keys($arr) as $k) {
752
753	if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) {
754	# ASCII range (including control chars)
755
756	echo chr($arr[$k]);
757
758	} else if ($arr[$k] <= 0x07ff) {
759	# 2 byte sequence
760
761	echo chr(0xc0 \| ($arr[$k] >> 6));
762	echo chr(0x80 \| ($arr[$k] & 0x003f));
763
764	} else if($arr[$k] == 0xFEFF) {
765	# Byte order mark (skip)
766
767	// nop -- zap the BOM
768
769	} else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) {
770	# Test for illegal surrogates
771
772	// found a surrogate
773	if($strict){
774	trigger_error(
775	'unicode_to_utf8: Illegal surrogate '.
776	'at index: '.$k.', value: '.$arr[$k],
777	E_USER_WARNING
778	);
779	return false;
780	}
781
782	} else if ($arr[$k] <= 0xffff) {
783	# 3 byte sequence
784
785	echo chr(0xe0 \| ($arr[$k] >> 12));
786	echo chr(0x80 \| (($arr[$k] >> 6) & 0x003f));
787	echo chr(0x80 \| ($arr[$k] & 0x003f));
788
789	} else if ($arr[$k] <= 0x10ffff) {
790	# 4 byte sequence
791
792	echo chr(0xf0 \| ($arr[$k] >> 18));
793	echo chr(0x80 \| (($arr[$k] >> 12) & 0x3f));
794	echo chr(0x80 \| (($arr[$k] >> 6) & 0x3f));
795	echo chr(0x80 \| ($arr[$k] & 0x3f));
796
797	} elseif($strict) {
798
799	trigger_error(
800	'unicode_to_utf8: Codepoint out of Unicode range '.
801	'at index: '.$k.', value: '.$arr[$k],
802	E_USER_WARNING
803	);
804
805	// out of range
806	return false;
807	}
808	}
809
810	$result = ob_get_contents();
811	ob_end_clean();
812	return $result;
813	}
814	}
815
816	if(!function_exists('utf8_to_utf16be')){
817	/**
818	* UTF-8 to UTF-16BE conversion.
819	*
820	* Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
821	*/
822	function utf8_to_utf16be(&$str, $bom = false) {
823	$out = $bom ? "\xFE\xFF" : '';
824	if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
825
826	$uni = utf8_to_unicode($str);
827	foreach($uni as $cp){
828	$out .= pack('n',$cp);
829	}
830	return $out;
831	}
832	}
833
834	if(!function_exists('utf16be_to_utf8')){
835	/**
836	* UTF-8 to UTF-16BE conversion.
837	*
838	* Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
839	*/
840	function utf16be_to_utf8(&$str) {
841	$uni = unpack('n*',$str);
842	return unicode_to_utf8($uni);
843	}
844	}
845
846	if(!function_exists('utf8_bad_replace')){
847	/**
848	* Replace bad bytes with an alternative character
849	*
850	* ASCII character is recommended for replacement char
851	*
852	* PCRE Pattern to locate bad bytes in a UTF-8 string
853	* Comes from W3 FAQ: Multilingual Forms
854	* Note: modified to include full ASCII range including control chars
855	*
856	* @author Harry Fuecks <[email protected]>
857	* @see http://www.w3.org/International/questions/qa-forms-utf-8
858	* @param string to search
859	* @param string to replace bad bytes with (defaults to '?') - use ASCII
860	* @return string
861	*/
862	function utf8_bad_replace($str, $replace = '') {
863	$UTF8_BAD =
864	'([\x00-\x7F]'. # ASCII (including control chars)
865	'\|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte
866	'\|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs
867	'\|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte
868	'\|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates
869	'\|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3
870	'\|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15
871	'\|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16
872	'\|(.{1}))'; # invalid byte
873	ob_start();
874	while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) {
875	if ( !isset($matches[2])) {
876	echo $matches[0];
877	} else {
878	echo $replace;
879	}
880	$str = substr($str,strlen($matches[0]));
881	}
882	$result = ob_get_contents();
883	ob_end_clean();
884	return $result;
885	}
886	}
887
888	if(!function_exists('utf8_correctIdx')){
889	/**
890	* adjust a byte index into a utf8 string to a utf8 character boundary
891	*
892	* @param $str string utf8 character string
893	* @param $i int byte index into $str
894	* @param $next bool direction to search for boundary,
895	* false = up (current character)
896	* true = down (next character)
897	*
898	* @return int byte index into $str now pointing to a utf8 character boundary
899	*
900	* @author chris smith <[email protected]>
901	*/
902	function utf8_correctIdx(&$str,$i,$next=false) {
903
904	if ($i <= 0) return 0;
905
906	$limit = strlen($str);
907	if ($i>=$limit) return $limit;
908
909	if ($next) {
910	while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++;
911	} else {
912	while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--;
913	}
914
915	return $i;
916	}
917	}
918
919	// only needed if no mb_string available
920	if(!UTF8_MBSTRING){
921	/**
922	* UTF-8 Case lookup table
923	*
924	* This lookuptable defines the upper case letters to their correspponding
925	* lower case letter in UTF-8
926	*
927	* @author Andreas Gohr <[email protected]>
928	*/
929	global $UTF8_LOWER_TO_UPPER;
930	if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array(
931	"ïœ"=>"ïŒº","ïœ"=>"ïŒ¹","ïœ"=>"ïŒž","ïœ"=>"ïŒ·","ïœ"=>"ïŒ¶","ïœ"=>"ïŒµ","ïœ"=>"ïŒŽ","ïœ"=>"ïŒ³","ïœ"=>"ïŒ²","ïœ"=>"ïŒ±",
932	"ïœ"=>"ïŒ°","ïœ"=>"ïŒ¯","ïœ"=>"ïŒ®","ïœ"=>"ïŒ","ïœ"=>"ïŒ¬","ïœ"=>"ïŒ«","ïœ"=>"ïŒª","ïœ"=>"ïŒ©","ïœ"=>"ïŒš","ïœ"=>"ïŒ§",
933	"ïœ"=>"ïŒŠ","ïœ
934	"=>"ïŒ¥","ïœ"=>"ïŒ€","ïœ"=>"ïŒ£","ïœ"=>"ïŒ¢","ïœ"=>"ïŒ¡","á¿³"=>"á¿Œ","á¿¥"=>"á¿¬","á¿¡"=>"á¿©","á¿"=>"á¿",
935	"á¿"=>"á¿","á¿"=>"á¿","áŸŸ"=>"Î","áŸ³"=>"áŸŒ","áŸ±"=>"áŸ¹","áŸ°"=>"áŸž","áŸ§"=>"áŸ¯","áŸŠ"=>"áŸ®","áŸ¥"=>"áŸ","áŸ€"=>"áŸ¬",
936	"áŸ£"=>"áŸ«","áŸ¢"=>"áŸª","áŸ¡"=>"áŸ©","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ",
937	"áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ
938	"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áœœ"=>"á¿»",
939	"áœŒ"=>"á¿º","áœ»"=>"á¿«","áœº"=>"á¿ª","áœ¹"=>"á¿¹","áœž"=>"á¿ž","áœ·"=>"á¿","áœ¶"=>"á¿","áœµ"=>"á¿","áœŽ"=>"á¿","áœ³"=>"á¿",
940	"áœ²"=>"á¿","áœ±"=>"áŸ»","áœ°"=>"áŸº","áœ§"=>"áœ¯","áœŠ"=>"áœ®","áœ¥"=>"áœ","áœ€"=>"áœ¬","áœ£"=>"áœ«","áœ¢"=>"áœª","áœ¡"=>"áœ©",
941	"áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ
942	"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ",
943	"áŒ·"=>"áŒ¿","áŒ¶"=>"áŒŸ","áŒµ"=>"áŒœ","áŒŽ"=>"áŒŒ","áŒ³"=>"áŒ»","áŒ²"=>"áŒº","áŒ±"=>"áŒ¹","áŒ°"=>"áŒž","áŒ§"=>"áŒ¯","áŒŠ"=>"áŒ®",
944	"áŒ¥"=>"áŒ","áŒ€"=>"áŒ¬","áŒ£"=>"áŒ«","áŒ¢"=>"áŒª","áŒ¡"=>"áŒ©","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ",
945	"áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ
946	"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","á»¹"=>"á»ž",
947	"á»·"=>"á»¶","á»µ"=>"á»Ž","á»³"=>"á»²","á»±"=>"á»°","á»¯"=>"á»®","á»"=>"á»¬","á»«"=>"á»ª","á»©"=>"á»š","á»§"=>"á»Š","á»¥"=>"á»€",
948	"á»£"=>"á»¢","á»¡"=>"á» ","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»",
949	"á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»
950	"=>"á»","á»"=>"á»","á»"=>"á»","áº¿"=>"áºŸ","áºœ"=>"áºŒ",
951	"áº»"=>"áºº","áº¹"=>"áºž","áº·"=>"áº¶","áºµ"=>"áºŽ","áº³"=>"áº²","áº±"=>"áº°","áº¯"=>"áº®","áº"=>"áº¬","áº«"=>"áºª","áº©"=>"áºš",
952	"áº§"=>"áºŠ","áº¥"=>"áº€","áº£"=>"áº¢","áº¡"=>"áº ","áº"=>"á¹ ","áº"=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº",
953	"áº"=>"áº","áº"=>"áº","áº"=>"áº","áº
954	"=>"áº","áº"=>"áº","áº"=>"áº","á¹¿"=>"á¹Ÿ","á¹œ"=>"á¹Œ","á¹»"=>"á¹º","á¹¹"=>"á¹ž",
955	"á¹·"=>"á¹¶","á¹µ"=>"á¹Ž","á¹³"=>"á¹²","á¹±"=>"á¹°","á¹¯"=>"á¹®","á¹"=>"á¹¬","á¹«"=>"á¹ª","á¹©"=>"á¹š","á¹§"=>"á¹Š","á¹¥"=>"á¹€",
956	"á¹£"=>"á¹¢","á¹¡"=>"á¹ ","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹",
957	"á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹
958	"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","áž¿"=>"ážŸ","ážœ"=>"ážŒ",
959	"áž»"=>"ážº","áž¹"=>"ážž","áž·"=>"áž¶","ážµ"=>"ážŽ","áž³"=>"áž²","áž±"=>"áž°","áž¯"=>"áž®","áž"=>"áž¬","áž«"=>"ážª","áž©"=>"ážš",
960	"áž§"=>"ážŠ","áž¥"=>"áž€","áž£"=>"áž¢","áž¡"=>"áž ","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž",
961	"áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž
962	"=>"áž","áž"=>"áž","áž"=>"áž",
963	"Ö"=>"Õ","Ö
964	"=>"Õ","Ö"=>"Õ","Ö"=>"Õ","Ö"=>"Õ","Ö"=>"Õ","Ö"=>"Õ","Õ¿"=>"Õ","ÕŸ"=>"Õ","Õœ"=>"Õ",
965	"ÕŒ"=>"Õ","Õ»"=>"Õ","Õº"=>"Õ","Õ¹"=>"Õ","Õž"=>"Õ","Õ·"=>"Õ","Õ¶"=>"Õ","Õµ"=>"Õ
966	","ÕŽ"=>"Õ","Õ³"=>"Õ",
967	"Õ²"=>"Õ","Õ±"=>"Õ","Õ°"=>"Õ","Õ¯"=>"Ô¿","Õ®"=>"ÔŸ","Õ"=>"Ôœ","Õ¬"=>"ÔŒ","Õ«"=>"Ô»","Õª"=>"Ôº","Õ©"=>"Ô¹",
968	"Õš"=>"Ôž","Õ§"=>"Ô·","ÕŠ"=>"Ô¶","Õ¥"=>"Ôµ","Õ€"=>"ÔŽ","Õ£"=>"Ô³","Õ¢"=>"Ô²","Õ¡"=>"Ô±","Ô"=>"Ô","Ô"=>"Ô",
969	"Ô"=>"Ô","Ô"=>"Ô","Ô"=>"Ô","Ô
970	"=>"Ô","Ô"=>"Ô","Ô"=>"Ô","Ó¹"=>"Óž","Óµ"=>"ÓŽ","Ó³"=>"Ó²","Ó±"=>"Ó°",
971	"Ó¯"=>"Ó®","Ó"=>"Ó¬","Ó«"=>"Óª","Ó©"=>"Óš","Ó§"=>"ÓŠ","Ó¥"=>"Ó€","Ó£"=>"Ó¢","Ó¡"=>"Ó ","Ó"=>"Ó","Ó"=>"Ó",
972	"Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó",
973	"Ó"=>"Ó
974	","Ó"=>"Ó","Ó"=>"Ó","Ò¿"=>"ÒŸ","Òœ"=>"ÒŒ","Ò»"=>"Òº","Ò¹"=>"Òž","Ò·"=>"Ò¶","Òµ"=>"ÒŽ","Ò³"=>"Ò²",
975	"Ò±"=>"Ò°","Ò¯"=>"Ò®","Ò"=>"Ò¬","Ò«"=>"Òª","Ò©"=>"Òš","Ò§"=>"ÒŠ","Ò¥"=>"Ò€","Ò£"=>"Ò¢","Ò¡"=>"Ò ","Ò"=>"Ò",
976	"Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò",
977	"Ò"=>"Ò","Ñ¿"=>"ÑŸ","Ñœ"=>"ÑŒ","Ñ»"=>"Ñº","Ñ¹"=>"Ñž","Ñ·"=>"Ñ¶","Ñµ"=>"ÑŽ","Ñ³"=>"Ñ²","Ñ±"=>"Ñ°","Ñ¯"=>"Ñ®",
978	"Ñ"=>"Ñ¬","Ñ«"=>"Ñª","Ñ©"=>"Ñš","Ñ§"=>"ÑŠ","Ñ¥"=>"Ñ€","Ñ£"=>"Ñ¢","Ñ¡"=>"Ñ ","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð",
979	"Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð
980	","Ñ"=>"Ð","Ñ"=>"Ð",
981	"Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð","Ñ"=>"Ð¯","Ñ"=>"Ð®","Ñ"=>"Ð","Ñ"=>"Ð¬","Ñ"=>"Ð«","Ñ"=>"Ðª","Ñ"=>"Ð©",
982	"Ñ"=>"Ðš","Ñ"=>"Ð§","Ñ"=>"ÐŠ","Ñ
983	"=>"Ð¥","Ñ"=>"Ð€","Ñ"=>"Ð£","Ñ"=>"Ð¢","Ñ"=>"Ð¡","Ñ"=>"Ð ","Ð¿"=>"Ð",
984	"ÐŸ"=>"Ð","Ðœ"=>"Ð","ÐŒ"=>"Ð","Ð»"=>"Ð","Ðº"=>"Ð","Ð¹"=>"Ð","Ðž"=>"Ð","Ð·"=>"Ð","Ð¶"=>"Ð","Ðµ"=>"Ð",
985	"ÐŽ"=>"Ð","Ð³"=>"Ð","Ð²"=>"Ð","Ð±"=>"Ð","Ð°"=>"Ð","Ïµ"=>"Î","Ï²"=>"Î£","Ï±"=>"Î¡","Ï°"=>"Î","Ï¯"=>"Ï®",
986	"Ï"=>"Ï¬","Ï«"=>"Ïª","Ï©"=>"Ïš","Ï§"=>"ÏŠ","Ï¥"=>"Ï€","Ï£"=>"Ï¢","Ï¡"=>"Ï ","Ï"=>"Ï","Ï"=>"Ï","Ï"=>"Ï",
987	"Ï"=>"Ï","Ï"=>"Î ","Ï"=>"ÎŠ","Ï"=>"Î","Ï"=>"Î","Ï"=>"Î","Ï"=>"Î","Ï"=>"Î","Ï"=>"Î«","Ï"=>"Îª",
988	"Ï"=>"Î©","Ï"=>"Îš","Ï"=>"Î§","Ï"=>"ÎŠ","Ï
989	"=>"Î¥","Ï"=>"Î€","Ï"=>"Î£","Ï"=>"Î£","Ï"=>"Î¡","Ï"=>"Î ",
990	"Î¿"=>"Î","ÎŸ"=>"Î","Îœ"=>"Î","ÎŒ"=>"Î","Î»"=>"Î","Îº"=>"Î","Î¹"=>"Î","Îž"=>"Î","Î·"=>"Î","Î¶"=>"Î",
991	"Îµ"=>"Î","ÎŽ"=>"Î","Î³"=>"Î","Î²"=>"Î","Î±"=>"Î","Î¯"=>"Î","Î®"=>"Î","Î"=>"Î","Î¬"=>"Î","Ê"=>"Æ·",
992	"Ê"=>"Æ²","Ê"=>"Æ±","Ê"=>"Æ®","Ê"=>"Æ©","Ê"=>"ÆŠ","Éµ"=>"Æ","É²"=>"Æ","É¯"=>"Æ","É©"=>"Æ","Éš"=>"Æ",
993	"É£"=>"Æ","É"=>"Æ","É"=>"Æ","É"=>"Æ","É"=>"Æ","É"=>"Æ","É"=>"Æ","È³"=>"È²","È±"=>"È°","È¯"=>"È®",
994	"È"=>"È¬","È«"=>"Èª","È©"=>"Èš","È§"=>"ÈŠ","È¥"=>"È€","È£"=>"È¢","È"=>"È","È"=>"È","È"=>"È","È"=>"È",
995	"È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È
996	"=>"È",
997	"È"=>"È","È"=>"È","Ç¿"=>"ÇŸ","Çœ"=>"ÇŒ","Ç»"=>"Çº","Ç¹"=>"Çž","Çµ"=>"ÇŽ","Ç³"=>"Ç²","Ç¯"=>"Ç®","Ç"=>"Ç¬",
998	"Ç«"=>"Çª","Ç©"=>"Çš","Ç§"=>"ÇŠ","Ç¥"=>"Ç€","Ç£"=>"Ç¢","Ç¡"=>"Ç ","Ç"=>"Ç","Ç"=>"Æ","Ç"=>"Ç","Ç"=>"Ç",
999	"Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç
1000	","Æ¿"=>"Ç·",
1001	"Æœ"=>"ÆŒ","Æ¹"=>"Æž","Æ¶"=>"Æµ","ÆŽ"=>"Æ³","Æ°"=>"Æ¯","Æ"=>"Æ¬","Æš"=>"Æ§","Æ¥"=>"Æ€","Æ£"=>"Æ¢","Æ¡"=>"Æ ",
1002	"Æ"=>"È ","Æ"=>"Æ","Æ"=>"Ç¶","Æ"=>"Æ","Æ"=>"Æ","Æ"=>"Æ","Æ
1003	"=>"Æ","Æ"=>"Æ","Å¿"=>"S","ÅŸ"=>"Åœ",
1004	"ÅŒ"=>"Å»","Åº"=>"Å¹","Å·"=>"Å¶","Åµ"=>"ÅŽ","Å³"=>"Å²","Å±"=>"Å°","Å¯"=>"Å®","Å"=>"Å¬","Å«"=>"Åª","Å©"=>"Åš",
1005	"Å§"=>"ÅŠ","Å¥"=>"Å€","Å£"=>"Å¢","Å¡"=>"Å ","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å",
1006	"Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å
1007	","Å"=>"Å","Å"=>"Å","Å"=>"Ä¿",
1008	"ÄŸ"=>"Äœ","ÄŒ"=>"Ä»","Äº"=>"Ä¹","Ä·"=>"Ä¶","Äµ"=>"ÄŽ","Ä³"=>"Ä²","Ä±"=>"I","Ä¯"=>"Ä®","Ä"=>"Ä¬","Ä«"=>"Äª",
1009	"Ä©"=>"Äš","Ä§"=>"ÄŠ","Ä¥"=>"Ä€","Ä£"=>"Ä¢","Ä¡"=>"Ä ","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä",
1010	"Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä
1011	"=>"Ä","Ä"=>"Ä",
1012	"Ä"=>"Ä","Ã¿"=>"Åž","ÃŸ"=>"Ã","Ãœ"=>"Ã","ÃŒ"=>"Ã","Ã»"=>"Ã","Ãº"=>"Ã","Ã¹"=>"Ã","Ãž"=>"Ã","Ã¶"=>"Ã",
1013	"Ãµ"=>"Ã","ÃŽ"=>"Ã","Ã³"=>"Ã","Ã²"=>"Ã","Ã±"=>"Ã","Ã°"=>"Ã","Ã¯"=>"Ã","Ã®"=>"Ã","Ã"=>"Ã","Ã¬"=>"Ã",
1014	"Ã«"=>"Ã","Ãª"=>"Ã","Ã©"=>"Ã","Ãš"=>"Ã","Ã§"=>"Ã","ÃŠ"=>"Ã","Ã¥"=>"Ã
1015	","Ã€"=>"Ã","Ã£"=>"Ã","Ã¢"=>"Ã",
1016	"Ã¡"=>"Ã","Ã "=>"Ã","Âµ"=>"Î","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T",
1017	"s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J",
1018	"i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A"
1019	);
1020
1021	/**
1022	* UTF-8 Case lookup table
1023	*
1024	* This lookuptable defines the lower case letters to their correspponding
1025	* upper case letter in UTF-8
1026	*
1027	* @author Andreas Gohr <[email protected]>
1028	*/
1029	global $UTF8_UPPER_TO_LOWER;
1030	if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array (
1031	"ïŒº"=>"ïœ","ïŒ¹"=>"ïœ","ïŒž"=>"ïœ","ïŒ·"=>"ïœ","ïŒ¶"=>"ïœ","ïŒµ"=>"ïœ","ïŒŽ"=>"ïœ","ïŒ³"=>"ïœ","ïŒ²"=>"ïœ","ïŒ±"=>"ïœ",
1032	"ïŒ°"=>"ïœ","ïŒ¯"=>"ïœ","ïŒ®"=>"ïœ","ïŒ"=>"ïœ","ïŒ¬"=>"ïœ","ïŒ«"=>"ïœ","ïŒª"=>"ïœ","ïŒ©"=>"ïœ","ïŒš"=>"ïœ","ïŒ§"=>"ïœ",
1033	"ïŒŠ"=>"ïœ","ïŒ¥"=>"ïœ
1034	","ïŒ€"=>"ïœ","ïŒ£"=>"ïœ","ïŒ¢"=>"ïœ","ïŒ¡"=>"ïœ","á¿Œ"=>"á¿³","á¿¬"=>"á¿¥","á¿©"=>"á¿¡","á¿"=>"á¿",
1035	"á¿"=>"á¿","á¿"=>"á¿","Î"=>"áŸŸ","áŸŒ"=>"áŸ³","áŸ¹"=>"áŸ±","áŸž"=>"áŸ°","áŸ¯"=>"áŸ§","áŸ®"=>"áŸŠ","áŸ"=>"áŸ¥","áŸ¬"=>"áŸ€",
1036	"áŸ«"=>"áŸ£","áŸª"=>"áŸ¢","áŸ©"=>"áŸ¡","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ",
1037	"áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ
1038	","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","áŸ"=>"áŸ","á¿»"=>"áœœ",
1039	"á¿º"=>"áœŒ","á¿«"=>"áœ»","á¿ª"=>"áœº","á¿¹"=>"áœ¹","á¿ž"=>"áœž","á¿"=>"áœ·","á¿"=>"áœ¶","á¿"=>"áœµ","á¿"=>"áœŽ","á¿"=>"áœ³",
1040	"á¿"=>"áœ²","áŸ»"=>"áœ±","áŸº"=>"áœ°","áœ¯"=>"áœ§","áœ®"=>"áœŠ","áœ"=>"áœ¥","áœ¬"=>"áœ€","áœ«"=>"áœ£","áœª"=>"áœ¢","áœ©"=>"áœ¡",
1041	"áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ
1042	","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ","áœ"=>"áœ",
1043	"áŒ¿"=>"áŒ·","áŒŸ"=>"áŒ¶","áŒœ"=>"áŒµ","áŒŒ"=>"áŒŽ","áŒ»"=>"áŒ³","áŒº"=>"áŒ²","áŒ¹"=>"áŒ±","áŒž"=>"áŒ°","áŒ¯"=>"áŒ§","áŒ®"=>"áŒŠ",
1044	"áŒ"=>"áŒ¥","áŒ¬"=>"áŒ€","áŒ«"=>"áŒ£","áŒª"=>"áŒ¢","áŒ©"=>"áŒ¡","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ",
1045	"áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ
1046	","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","áŒ"=>"áŒ","á»ž"=>"á»¹",
1047	"á»¶"=>"á»·","á»Ž"=>"á»µ","á»²"=>"á»³","á»°"=>"á»±","á»®"=>"á»¯","á»¬"=>"á»","á»ª"=>"á»«","á»š"=>"á»©","á»Š"=>"á»§","á»€"=>"á»¥",
1048	"á»¢"=>"á»£","á» "=>"á»¡","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»",
1049	"á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»","á»"=>"á»
1050	","á»"=>"á»","á»"=>"á»","áºŸ"=>"áº¿","áºŒ"=>"áºœ",
1051	"áºº"=>"áº»","áºž"=>"áº¹","áº¶"=>"áº·","áºŽ"=>"áºµ","áº²"=>"áº³","áº°"=>"áº±","áº®"=>"áº¯","áº¬"=>"áº","áºª"=>"áº«","áºš"=>"áº©",
1052	"áºŠ"=>"áº§","áº€"=>"áº¥","áº¢"=>"áº£","áº "=>"áº¡","á¹ "=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº",
1053	"áº"=>"áº","áº"=>"áº","áº"=>"áº","áº"=>"áº
1054	","áº"=>"áº","áº"=>"áº","á¹Ÿ"=>"á¹¿","á¹Œ"=>"á¹œ","á¹º"=>"á¹»","á¹ž"=>"á¹¹",
1055	"á¹¶"=>"á¹·","á¹Ž"=>"á¹µ","á¹²"=>"á¹³","á¹°"=>"á¹±","á¹®"=>"á¹¯","á¹¬"=>"á¹","á¹ª"=>"á¹«","á¹š"=>"á¹©","á¹Š"=>"á¹§","á¹€"=>"á¹¥",
1056	"á¹¢"=>"á¹£","á¹ "=>"á¹¡","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹",
1057	"á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹","á¹"=>"á¹
1058	","á¹"=>"á¹","á¹"=>"á¹","ážŸ"=>"áž¿","ážŒ"=>"ážœ",
1059	"ážº"=>"áž»","ážž"=>"áž¹","áž¶"=>"áž·","ážŽ"=>"ážµ","áž²"=>"áž³","áž°"=>"áž±","áž®"=>"áž¯","áž¬"=>"áž","ážª"=>"áž«","ážš"=>"áž©",
1060	"ážŠ"=>"áž§","áž€"=>"áž¥","áž¢"=>"áž£","áž "=>"áž¡","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž",
1061	"áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž","áž"=>"áž
1062	","áž"=>"áž","áž"=>"áž",
1063	"Õ"=>"Ö","Õ"=>"Ö
1064	","Õ"=>"Ö","Õ"=>"Ö","Õ"=>"Ö","Õ"=>"Ö","Õ"=>"Ö","Õ"=>"Õ¿","Õ"=>"ÕŸ","Õ"=>"Õœ",
1065	"Õ"=>"ÕŒ","Õ"=>"Õ»","Õ"=>"Õº","Õ"=>"Õ¹","Õ"=>"Õž","Õ"=>"Õ·","Õ"=>"Õ¶","Õ
1066	"=>"Õµ","Õ"=>"ÕŽ","Õ"=>"Õ³",
1067	"Õ"=>"Õ²","Õ"=>"Õ±","Õ"=>"Õ°","Ô¿"=>"Õ¯","ÔŸ"=>"Õ®","Ôœ"=>"Õ","ÔŒ"=>"Õ¬","Ô»"=>"Õ«","Ôº"=>"Õª","Ô¹"=>"Õ©",
1068	"Ôž"=>"Õš","Ô·"=>"Õ§","Ô¶"=>"ÕŠ","Ôµ"=>"Õ¥","ÔŽ"=>"Õ€","Ô³"=>"Õ£","Ô²"=>"Õ¢","Ô±"=>"Õ¡","Ô"=>"Ô","Ô"=>"Ô",
1069	"Ô"=>"Ô","Ô"=>"Ô","Ô"=>"Ô","Ô"=>"Ô
1070	","Ô"=>"Ô","Ô"=>"Ô","Óž"=>"Ó¹","ÓŽ"=>"Óµ","Ó²"=>"Ó³","Ó°"=>"Ó±",
1071	"Ó®"=>"Ó¯","Ó¬"=>"Ó","Óª"=>"Ó«","Óš"=>"Ó©","ÓŠ"=>"Ó§","Ó€"=>"Ó¥","Ó¢"=>"Ó£","Ó "=>"Ó¡","Ó"=>"Ó","Ó"=>"Ó",
1072	"Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","Ó"=>"Ó",
1073	"Ó
1074	"=>"Ó","Ó"=>"Ó","Ó"=>"Ó","ÒŸ"=>"Ò¿","ÒŒ"=>"Òœ","Òº"=>"Ò»","Òž"=>"Ò¹","Ò¶"=>"Ò·","ÒŽ"=>"Òµ","Ò²"=>"Ò³",
1075	"Ò°"=>"Ò±","Ò®"=>"Ò¯","Ò¬"=>"Ò","Òª"=>"Ò«","Òš"=>"Ò©","ÒŠ"=>"Ò§","Ò€"=>"Ò¥","Ò¢"=>"Ò£","Ò "=>"Ò¡","Ò"=>"Ò",
1076	"Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò","Ò"=>"Ò",
1077	"Ò"=>"Ò","ÑŸ"=>"Ñ¿","ÑŒ"=>"Ñœ","Ñº"=>"Ñ»","Ñž"=>"Ñ¹","Ñ¶"=>"Ñ·","ÑŽ"=>"Ñµ","Ñ²"=>"Ñ³","Ñ°"=>"Ñ±","Ñ®"=>"Ñ¯",
1078	"Ñ¬"=>"Ñ","Ñª"=>"Ñ«","Ñš"=>"Ñ©","ÑŠ"=>"Ñ§","Ñ€"=>"Ñ¥","Ñ¢"=>"Ñ£","Ñ "=>"Ñ¡","Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ",
1079	"Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð
1080	"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ",
1081	"Ð"=>"Ñ","Ð"=>"Ñ","Ð"=>"Ñ","Ð¯"=>"Ñ","Ð®"=>"Ñ","Ð"=>"Ñ","Ð¬"=>"Ñ","Ð«"=>"Ñ","Ðª"=>"Ñ","Ð©"=>"Ñ",
1082	"Ðš"=>"Ñ","Ð§"=>"Ñ","ÐŠ"=>"Ñ","Ð¥"=>"Ñ
1083	","Ð€"=>"Ñ","Ð£"=>"Ñ","Ð¢"=>"Ñ","Ð¡"=>"Ñ","Ð "=>"Ñ","Ð"=>"Ð¿",
1084	"Ð"=>"ÐŸ","Ð"=>"Ðœ","Ð"=>"ÐŒ","Ð"=>"Ð»","Ð"=>"Ðº","Ð"=>"Ð¹","Ð"=>"Ðž","Ð"=>"Ð·","Ð"=>"Ð¶","Ð"=>"Ðµ",
1085	"Ð"=>"ÐŽ","Ð"=>"Ð³","Ð"=>"Ð²","Ð"=>"Ð±","Ð"=>"Ð°","Î"=>"Ïµ","Î£"=>"Ï²","Î¡"=>"Ï±","Î"=>"Ï°","Ï®"=>"Ï¯",
1086	"Ï¬"=>"Ï","Ïª"=>"Ï«","Ïš"=>"Ï©","ÏŠ"=>"Ï§","Ï€"=>"Ï¥","Ï¢"=>"Ï£","Ï "=>"Ï¡","Ï"=>"Ï","Ï"=>"Ï","Ï"=>"Ï",
1087	"Ï"=>"Ï","Î "=>"Ï","ÎŠ"=>"Ï","Î"=>"Ï","Î"=>"Ï","Î"=>"Ï","Î"=>"Ï","Î"=>"Ï","Î«"=>"Ï","Îª"=>"Ï",
1088	"Î©"=>"Ï","Îš"=>"Ï","Î§"=>"Ï","ÎŠ"=>"Ï","Î¥"=>"Ï
1089	","Î€"=>"Ï","Î£"=>"Ï","Î£"=>"Ï","Î¡"=>"Ï","Î "=>"Ï",
1090	"Î"=>"Î¿","Î"=>"ÎŸ","Î"=>"Îœ","Î"=>"ÎŒ","Î"=>"Î»","Î"=>"Îº","Î"=>"Î¹","Î"=>"Îž","Î"=>"Î·","Î"=>"Î¶",
1091	"Î"=>"Îµ","Î"=>"ÎŽ","Î"=>"Î³","Î"=>"Î²","Î"=>"Î±","Î"=>"Î¯","Î"=>"Î®","Î"=>"Î","Î"=>"Î¬","Æ·"=>"Ê",
1092	"Æ²"=>"Ê","Æ±"=>"Ê","Æ®"=>"Ê","Æ©"=>"Ê","ÆŠ"=>"Ê","Æ"=>"Éµ","Æ"=>"É²","Æ"=>"É¯","Æ"=>"É©","Æ"=>"Éš",
1093	"Æ"=>"É£","Æ"=>"É","Æ"=>"É","Æ"=>"É","Æ"=>"É","Æ"=>"É","Æ"=>"É","È²"=>"È³","È°"=>"È±","È®"=>"È¯",
1094	"È¬"=>"È","Èª"=>"È«","Èš"=>"È©","ÈŠ"=>"È§","È€"=>"È¥","È¢"=>"È£","È"=>"È","È"=>"È","È"=>"È","È"=>"È",
1095	"È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È","È"=>"È
1096	",
1097	"È"=>"È","È"=>"È","ÇŸ"=>"Ç¿","ÇŒ"=>"Çœ","Çº"=>"Ç»","Çž"=>"Ç¹","ÇŽ"=>"Çµ","Ç²"=>"Ç³","Ç®"=>"Ç¯","Ç¬"=>"Ç",
1098	"Çª"=>"Ç«","Çš"=>"Ç©","ÇŠ"=>"Ç§","Ç€"=>"Ç¥","Ç¢"=>"Ç£","Ç "=>"Ç¡","Ç"=>"Ç","Æ"=>"Ç","Ç"=>"Ç","Ç"=>"Ç",
1099	"Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç"=>"Ç","Ç
1100	"=>"Ç","Ç·"=>"Æ¿",
1101	"ÆŒ"=>"Æœ","Æž"=>"Æ¹","Æµ"=>"Æ¶","Æ³"=>"ÆŽ","Æ¯"=>"Æ°","Æ¬"=>"Æ","Æ§"=>"Æš","Æ€"=>"Æ¥","Æ¢"=>"Æ£","Æ "=>"Æ¡",
1102	"È "=>"Æ","Æ"=>"Æ","Ç¶"=>"Æ","Æ"=>"Æ","Æ"=>"Æ","Æ"=>"Æ","Æ"=>"Æ
1103	","Æ"=>"Æ","S"=>"Å¿","Åœ"=>"ÅŸ",
1104	"Å»"=>"ÅŒ","Å¹"=>"Åº","Å¶"=>"Å·","ÅŽ"=>"Åµ","Å²"=>"Å³","Å°"=>"Å±","Å®"=>"Å¯","Å¬"=>"Å","Åª"=>"Å«","Åš"=>"Å©",
1105	"ÅŠ"=>"Å§","Å€"=>"Å¥","Å¢"=>"Å£","Å "=>"Å¡","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å",
1106	"Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å"=>"Å","Å
1107	"=>"Å","Å"=>"Å","Å"=>"Å","Ä¿"=>"Å",
1108	"Äœ"=>"ÄŸ","Ä»"=>"ÄŒ","Ä¹"=>"Äº","Ä¶"=>"Ä·","ÄŽ"=>"Äµ","Ä²"=>"Ä³","I"=>"Ä±","Ä®"=>"Ä¯","Ä¬"=>"Ä","Äª"=>"Ä«",
1109	"Äš"=>"Ä©","ÄŠ"=>"Ä§","Ä€"=>"Ä¥","Ä¢"=>"Ä£","Ä "=>"Ä¡","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä",
1110	"Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä","Ä"=>"Ä
1111	","Ä"=>"Ä",
1112	"Ä"=>"Ä","Åž"=>"Ã¿","Ã"=>"ÃŸ","Ã"=>"Ãœ","Ã"=>"ÃŒ","Ã"=>"Ã»","Ã"=>"Ãº","Ã"=>"Ã¹","Ã"=>"Ãž","Ã"=>"Ã¶",
1113	"Ã"=>"Ãµ","Ã"=>"ÃŽ","Ã"=>"Ã³","Ã"=>"Ã²","Ã"=>"Ã±","Ã"=>"Ã°","Ã"=>"Ã¯","Ã"=>"Ã®","Ã"=>"Ã","Ã"=>"Ã¬",
1114	"Ã"=>"Ã«","Ã"=>"Ãª","Ã"=>"Ã©","Ã"=>"Ãš","Ã"=>"Ã§","Ã"=>"ÃŠ","Ã
1115	"=>"Ã¥","Ã"=>"Ã€","Ã"=>"Ã£","Ã"=>"Ã¢",
1116	"Ã"=>"Ã¡","Ã"=>"Ã ","Î"=>"Âµ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t",
1117	"S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j",
1118	"I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a"
1119	);
1120	}; // end of case lookup tables
1121
1122	/**
1123	* UTF-8 lookup table for lower case accented letters
1124	*
1125	* This lookuptable defines replacements for accented characters from the ASCII-7
1126	* range. This are lower case letters only.
1127	*
1128	* @author Andreas Gohr <[email protected]>
1129	* @see utf8_deaccent()
1130	*/
1131	global $UTF8_LOWER_ACCENTS;
1132	if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array(
1133	'Ã ' => 'a', 'ÃŽ' => 'o', 'Ä' => 'd', 'áž' => 'f', 'Ã«' => 'e', 'Å¡' => 's', 'Æ¡' => 'o',
1134	'Ã' => 'ss', 'Ä' => 'a', 'Å' => 'r', 'È' => 't', 'Å' => 'n', 'Ä' => 'a', 'Ä·' => 'k',
1135	'Å' => 's', 'á»³' => 'y', 'Å' => 'n', 'Äº' => 'l', 'Ä§' => 'h', 'á¹' => 'p', 'Ã³' => 'o',
1136	'Ãº' => 'u', 'Ä' => 'e', 'Ã©' => 'e', 'Ã§' => 'c', 'áº' => 'w', 'Ä' => 'c', 'Ãµ' => 'o',
1137	'á¹¡' => 's', 'Ãž' => 'o', 'Ä£' => 'g', 'Å§' => 't', 'È' => 's', 'Ä' => 'e', 'Ä' => 'c',
1138	'Å' => 's', 'Ã®' => 'i', 'Å±' => 'u', 'Ä' => 'c', 'Ä' => 'e', 'Åµ' => 'w', 'á¹«' => 't',
1139	'Å«' => 'u', 'Ä' => 'c', 'Ã¶' => 'oe', 'Ãš' => 'e', 'Å·' => 'y', 'Ä
1140	' => 'a', 'Å' => 'l',
1141	'Å³' => 'u', 'Å¯' => 'u', 'Å' => 's', 'Ä' => 'g', 'ÄŒ' => 'l', 'Æ' => 'f', 'ÅŸ' => 'z',
1142	'áº' => 'w', 'áž' => 'b', 'Ã¥' => 'a', 'Ã¬' => 'i', 'Ã¯' => 'i', 'áž' => 'd', 'Å¥' => 't',
1143	'Å' => 'r', 'Ã€' => 'ae', 'Ã' => 'i', 'Å' => 'r', 'Ãª' => 'e', 'ÃŒ' => 'ue', 'Ã²' => 'o',
1144	'Ä' => 'e', 'Ã±' => 'n', 'Å' => 'n', 'Ä¥' => 'h', 'Ä' => 'g', 'Ä' => 'd', 'Äµ' => 'j',
1145	'Ã¿' => 'y', 'Å©' => 'u', 'Å' => 'u', 'Æ°' => 'u', 'Å£' => 't', 'Ãœ' => 'y', 'Å' => 'o',
1146	'Ã¢' => 'a', 'ÄŸ' => 'l', 'áº
1147	' => 'w', 'ÅŒ' => 'z', 'Ä«' => 'i', 'Ã£' => 'a', 'Ä¡' => 'g',
1148	'á¹' => 'm', 'Å' => 'o', 'Ä©' => 'i', 'Ã¹' => 'u', 'Ä¯' => 'i', 'Åº' => 'z', 'Ã¡' => 'a',
1149	'Ã»' => 'u', 'ÃŸ' => 'th', 'Ã°' => 'dh', 'ÃŠ' => 'ae', 'Âµ' => 'u', 'Ä' => 'e',
1150	);
1151
1152	/**
1153	* UTF-8 lookup table for upper case accented letters
1154	*
1155	* This lookuptable defines replacements for accented characters from the ASCII-7
1156	* range. This are upper case letters only.
1157	*
1158	* @author Andreas Gohr <[email protected]>
1159	* @see utf8_deaccent()
1160	*/
1161	global $UTF8_UPPER_ACCENTS;
1162	if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array(
1163	'Ã' => 'A', 'Ã' => 'O', 'Ä' => 'D', 'áž' => 'F', 'Ã' => 'E', 'Å ' => 'S', 'Æ ' => 'O',
1164	'Ä' => 'A', 'Å' => 'R', 'È' => 'T', 'Å' => 'N', 'Ä' => 'A', 'Ä¶' => 'K',
1165	'Å' => 'S', 'á»²' => 'Y', 'Å
1166	' => 'N', 'Ä¹' => 'L', 'ÄŠ' => 'H', 'á¹' => 'P', 'Ã' => 'O',
1167	'Ã' => 'U', 'Ä' => 'E', 'Ã' => 'E', 'Ã' => 'C', 'áº' => 'W', 'Ä' => 'C', 'Ã' => 'O',
1168	'á¹ ' => 'S', 'Ã' => 'O', 'Ä¢' => 'G', 'ÅŠ' => 'T', 'È' => 'S', 'Ä' => 'E', 'Ä' => 'C',
1169	'Å' => 'S', 'Ã' => 'I', 'Å°' => 'U', 'Ä' => 'C', 'Ä' => 'E', 'ÅŽ' => 'W', 'á¹ª' => 'T',
1170	'Åª' => 'U', 'Ä' => 'C', 'Ã' => 'Oe', 'Ã' => 'E', 'Å¶' => 'Y', 'Ä' => 'A', 'Å' => 'L',
1171	'Å²' => 'U', 'Å®' => 'U', 'Å' => 'S', 'Ä' => 'G', 'Ä»' => 'L', 'Æ' => 'F', 'Åœ' => 'Z',
1172	'áº' => 'W', 'áž' => 'B', 'Ã
1173	' => 'A', 'Ã' => 'I', 'Ã' => 'I', 'áž' => 'D', 'Å€' => 'T',
1174	'Å' => 'R', 'Ã' => 'Ae', 'Ã' => 'I', 'Å' => 'R', 'Ã' => 'E', 'Ã' => 'Ue', 'Ã' => 'O',
1175	'Ä' => 'E', 'Ã' => 'N', 'Å' => 'N', 'Ä€' => 'H', 'Ä' => 'G', 'Ä' => 'D', 'ÄŽ' => 'J',
1176	'Åž' => 'Y', 'Åš' => 'U', 'Å¬' => 'U', 'Æ¯' => 'U', 'Å¢' => 'T', 'Ã' => 'Y', 'Å' => 'O',
1177	'Ã' => 'A', 'Äœ' => 'L', 'áº' => 'W', 'Å»' => 'Z', 'Äª' => 'I', 'Ã' => 'A', 'Ä ' => 'G',
1178	'á¹' => 'M', 'Å' => 'O', 'Äš' => 'I', 'Ã' => 'U', 'Ä®' => 'I', 'Å¹' => 'Z', 'Ã' => 'A',
1179	'Ã' => 'U', 'Ã' => 'Th', 'Ã' => 'Dh', 'Ã' => 'Ae', 'Ä' => 'E',
1180	);
1181
1182	/**
1183	* UTF-8 array of common special characters
1184	*
1185	* This array should contain all special characters (not a letter or digit)
1186	* defined in the various local charsets - it's not a complete list of non-alphanum
1187	* characters in UTF-8. It's not perfect but should match most cases of special
1188	* chars.
1189	*
1190	* The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is!
1191	* These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a
1192	*
1193	* @author Andreas Gohr <[email protected]>
1194	* @see utf8_stripspecials()
1195	*/
1196	global $UTF8_SPECIAL_CHARS;
1197	if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array(
1198	0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023,
1199	0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c,
1200	0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b,
1201	0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e,
1202	0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088,
1203	0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092,
1204	0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c,
1205	0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6,
1206	0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0,
1207	0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba,
1208	0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9,
1209	0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384,
1210	0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1,
1211	0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc,
1212	0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c,
1213	0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651,
1214	0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015,
1215	0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022,
1216	0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab,
1217	0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193,
1218	0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202,
1219	0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212,
1220	0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229,
1221	0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265,
1222	0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310,
1223	0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514,
1224	0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553,
1225	0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d,
1226	0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567,
1227	0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
1228	0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7,
1229	0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702,
1230	0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f,
1231	0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719,
1232	0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723,
1233	0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e,
1234	0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738,
1235	0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742,
1236	0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d,
1237	0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c,
1238	0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f,
1239	0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e,
1240	0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8,
1241	0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3,
1242	0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd,
1243	0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c,
1244	0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017,
1245	0x3018, 0x3019, 0x301a, 0x301b, 0x3036,
1246	0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc,
1247	0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6,
1248	0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0,
1249	0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa,
1250	0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d,
1251	0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09,
1252	0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c,
1253	0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b,
1254	0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65,
1255	0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea,
1256	0xffeb, 0xffec, 0xffed, 0xffee,
1257	0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703,
1258	0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b,
1259	0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713,
1260	0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b,
1261	0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf,
1262	);
1263
1264	// utf8 version of above data
1265	global $UTF8_SPECIAL_CHARS2;
1266	if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 =
1267	"\x1A".'
1268
1269
1270	!"#$%&\'()+,/;<=>?@[\]^`{\|}~ÂÂÂÂÂÂ
1271	ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂï¿œ'.
1272	'ï¿œÂÂÂÂÂÂÂÂÂ Â¡Â¢Â£Â€Â¥ÂŠÂ§ÂšÂ©ÂªÂ«Â¬ÂÂ®Â¯Â°Â±Â²Â³ÂŽÂµÂ¶Â·ÂžÂ¹ÂºÂ»ÂŒÂœï¿œ'.
1273	'ï¿œÂ¿ÃÃ·ËËËËËËËÌÌÌÌÌ£ÎÎ
1274	ÎÏÖ°Ö±Ö²Ö³ÖŽÖµÖ¶Ö·ÖžÖ¹Ö»ÖŒÖœÖŸÖ¿ï¿œ'.
1275	'ï¿œ××××³×ŽØØØÙÙÙÙÙÙÙÙÙÙªàž¿âââââââââââââï¿œ'.
1276	'ï¿œï¿œâ â¡â¢âŠâ°â²â³â¹âºââ§âªâ«â¬âââ¢âŠâµâââââââµ'.
1277	'âââââââââ
1278	âââââââââââââââ â§âšï¿œ'.
1279	'ï¿œâªâ«âŽâŒâ
1280	ââ â¡â€â¥ââââââââ¥â
1281	ââ â¡â©âªâ©âï¿œ'.
1282	'ï¿œï¿œââââââ€â¬âŽâŒâââââââââââââââââ '.
1283	'â¡â¢â£â€â¥âŠâ§âšâ©âªâ«â¬âââââââââ â²âŒâââï¿œ'.
1284	'ï¿œâ
1285	ââââ â£â¥âŠââââââââââââââââââï¿œ'.
1286	'ï¿œï¿œââââââââââ â¡â¢â£â€â¥âŠâ§â©âªâ«â¬ââ®â¯â°â±'.
1287	'â²â³âŽâµâ¶â·âžâ¹âºâ»âŒâœâŸâ¿ââââââ
1288	ââââââï¿œ'.
1289	'ï¿œâââââââââââââ¡â¢â£â€â¥âŠâ§â¿ââââââï¿œ'.
1290	'ï¿œï¿œâââââ â¡â¢â£â€â¥âŠâ§âšâ©âªâ«â¬ââ®â¯â±â²â³âŽâµâ¶'.
1291	'â·âžâ¹âºâ»âŒâœâŸ'.
1292	'ãããããããããããããããããããããããã¶'.
1293	'ïïïï£ï£ï£ï£ï£ï£ï£ï£ï£ï£ ï£¡ï£¢ï££ï£€ï£¥ï¿œ'.
1294	'ï¿œï£§ï£šï£©ï£ªï£«ï£¬ï£ï£®ï£¯ï£°ï£±ï£²ï£³ï£Žï£µï£¶ï£·ï£žï£¹ï£ºï£»ï£Œï£œï£Ÿï¹Œï¹œ'.
1295	'ïŒïŒïŒïŒïŒ
1296	ïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒïŒ ïŒ»ïŒŒïŒœïŒŸïœïœïœïœïœ'.
1297	'ïœïœ ïœ¡ïœ¢ïœ£ïœ€ïœ¥ï¿ ï¿¡ï¿¢ï¿£ï¿€ï¿¥ï¿Šï¿šï¿©ï¿ªï¿«ï¿¬ï¿ï¿®'.
1298	'ðŒðœðŸð¿ðððððð
1299	ðððððððððððððððððððððð'.
1300	' ââ¯â ï»¿';
1301
1302	/**
1303	* Romanization lookup table
1304	*
1305	* This lookup tables provides a way to transform strings written in a language
1306	* different from the ones based upon latin letters into plain ASCII.
1307	*
1308	* Please note: this is not a scientific transliteration table. It only works
1309	* oneway from nonlatin to ASCII and it works by simple character replacement
1310	* only. Specialities of each language are not supported.
1311	*
1312	* @author Andreas Gohr <[email protected]>
1313	* @author Vitaly Blokhin <[email protected]>
1314	* @link http://www.uconv.com/translit.htm
1315	* @author Bisqwit <[email protected]>
1316	* @link http://kanjidict.stc.cx/hiragana.php?src=2
1317	* @link http://www.translatum.gr/converter/greek-transliteration.htm
1318	* @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription
1319	* @link http://www.btranslations.com/resources/romanization/korean.asp
1320	* @author Arthit Suriyawongkul <[email protected]>
1321	* @author Denis Scheither <[email protected]>
1322	*/
1323	global $UTF8_ROMANIZATION;
1324	if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array(
1325	// scandinavian - differs from what we do in deaccent
1326	'Ã¥'=>'a','Ã
1327	'=>'A','Ã€'=>'a','Ã'=>'A','Ã¶'=>'o','Ã'=>'O',
1328
1329	//russian cyrillic
1330	'Ð°'=>'a','Ð'=>'A','Ð±'=>'b','Ð'=>'B','Ð²'=>'v','Ð'=>'V','Ð³'=>'g','Ð'=>'G',
1331	'ÐŽ'=>'d','Ð'=>'D','Ðµ'=>'e','Ð'=>'E','Ñ'=>'jo','Ð'=>'Jo','Ð¶'=>'zh','Ð'=>'Zh',
1332	'Ð·'=>'z','Ð'=>'Z','Ðž'=>'i','Ð'=>'I','Ð¹'=>'j','Ð'=>'J','Ðº'=>'k','Ð'=>'K',
1333	'Ð»'=>'l','Ð'=>'L','ÐŒ'=>'m','Ð'=>'M','Ðœ'=>'n','Ð'=>'N','ÐŸ'=>'o','Ð'=>'O',
1334	'Ð¿'=>'p','Ð'=>'P','Ñ'=>'r','Ð '=>'R','Ñ'=>'s','Ð¡'=>'S','Ñ'=>'t','Ð¢'=>'T',
1335	'Ñ'=>'u','Ð£'=>'U','Ñ'=>'f','Ð€'=>'F','Ñ
1336	'=>'x','Ð¥'=>'X','Ñ'=>'c','ÐŠ'=>'C',
1337	'Ñ'=>'ch','Ð§'=>'Ch','Ñ'=>'sh','Ðš'=>'Sh','Ñ'=>'sch','Ð©'=>'Sch','Ñ'=>'',
1338	'Ðª'=>'','Ñ'=>'y','Ð«'=>'Y','Ñ'=>'','Ð¬'=>'','Ñ'=>'eh','Ð'=>'Eh','Ñ'=>'ju',
1339	'Ð®'=>'Ju','Ñ'=>'ja','Ð¯'=>'Ja',
1340	// Ukrainian cyrillic
1341	'Ò'=>'Gh','Ò'=>'gh','Ð'=>'Je','Ñ'=>'je','Ð'=>'I','Ñ'=>'i','Ð'=>'Ji','Ñ'=>'ji',
1342	// Georgian
1343	'á'=>'a','á'=>'b','á'=>'g','á'=>'d','á'=>'e','á'=>'v','á'=>'z','á'=>'th',
1344	'á'=>'i','á'=>'p','á'=>'l','á'=>'m','á'=>'n','á'=>'o','á'=>'p','á'=>'zh',
1345	'á '=>'r','á¡'=>'s','á¢'=>'t','á£'=>'u','á€'=>'ph','á¥'=>'kh','áŠ'=>'gh','á§'=>'q',
1346	'áš'=>'sh','á©'=>'ch','áª'=>'c','á«'=>'dh','á¬'=>'w','á'=>'j','á®'=>'x','á¯'=>'jh',
1347	'á°'=>'xh',
1348	//Sanskrit
1349	'à€
1350	'=>'a','à€'=>'ah','à€'=>'i','à€'=>'ih','à€'=>'u','à€'=>'uh','à€'=>'ry',
1351	'à¥ '=>'ryh','à€'=>'ly','à¥¡'=>'lyh','à€'=>'e','à€'=>'ay','à€'=>'o','à€'=>'aw',
1352	'à€
1353	à€'=>'amh','à€
1354	à€'=>'aq','à€'=>'k','à€'=>'kh','à€'=>'g','à€'=>'gh','à€'=>'nh',
1355	'à€'=>'c','à€'=>'ch','à€'=>'j','à€'=>'jh','à€'=>'ny','à€'=>'tq','à€ '=>'tqh',
1356	'à€¡'=>'dq','à€¢'=>'dqh','à€£'=>'nq','à€€'=>'t','à€¥'=>'th','à€Š'=>'d','à€§'=>'dh',
1357	'à€š'=>'n','à€ª'=>'p','à€«'=>'ph','à€¬'=>'b','à€'=>'bh','à€®'=>'m','à€¯'=>'z','à€°'=>'r',
1358	'à€²'=>'l','à€µ'=>'v','à€¶'=>'sh','à€·'=>'sqh','à€ž'=>'s','à€¹'=>'x',
1359	//Hebrew
1360	'×'=>'a', '×'=>'b','×'=>'g','×'=>'d','×'=>'h','×'=>'v','×'=>'z','×'=>'kh','×'=>'th',
1361	'×'=>'y','×'=>'h','×'=>'k','×'=>'l','×'=>'m','×'=>'m','×'=>'n','× '=>'n',
1362	'×¡'=>'s','×¢'=>'ah','×£'=>'f','×€'=>'p','×¥'=>'c','×Š'=>'c','×§'=>'q','×š'=>'r',
1363	'×©'=>'sh','×ª'=>'t',
1364	//Arabic
1365	'Ø§'=>'a','Øš'=>'b','Øª'=>'t','Ø«'=>'th','Ø¬'=>'g','Ø'=>'xh','Ø®'=>'x','Ø¯'=>'d',
1366	'Ø°'=>'dh','Ø±'=>'r','Ø²'=>'z','Ø³'=>'s','ØŽ'=>'sh','Øµ'=>'s\'','Ø¶'=>'d\'',
1367	'Ø·'=>'t\'','Øž'=>'z\'','Ø¹'=>'y','Øº'=>'gh','Ù'=>'f','Ù'=>'q','Ù'=>'k',
1368	'Ù'=>'l','Ù
1369	'=>'m','Ù'=>'n','Ù'=>'x\'','Ù'=>'u','Ù'=>'i',
1370
1371	// Japanese characters (last update: 2008-05-09)
1372
1373	// Japanese hiragana
1374
1375	// 3 character syllables, ã£ doubles the consonant after
1376	'ã£ã¡ã'=>'ccha','ã£ã¡ã'=>'cche','ã£ã¡ã'=>'ccho','ã£ã¡ã
1377	'=>'cchu',
1378	'ã£ã³ã'=>'bbya','ã£ã³ã'=>'bbye','ã£ã³ã'=>'bbyi','ã£ã³ã'=>'bbyo','ã£ã³ã
1379	'=>'bbyu',
1380	'ã£ãŽã'=>'ppya','ã£ãŽã'=>'ppye','ã£ãŽã'=>'ppyi','ã£ãŽã'=>'ppyo','ã£ãŽã
1381	'=>'ppyu',
1382	'ã£ã¡ã'=>'ccha','ã£ã¡ã'=>'cche','ã£ã¡'=>'cchi','ã£ã¡ã'=>'ccho','ã£ã¡ã
1383	'=>'cchu',
1384	// 'ã£ã²ã'=>'hya','ã£ã²ã'=>'hye','ã£ã²ã'=>'hyi','ã£ã²ã'=>'hyo','ã£ã²ã
1385	'=>'hyu',
1386	'ã£ãã'=>'kkya','ã£ãã'=>'kkye','ã£ãã'=>'kkyi','ã£ãã'=>'kkyo','ã£ãã
1387	'=>'kkyu',
1388	'ã£ãã'=>'ggya','ã£ãã'=>'ggye','ã£ãã'=>'ggyi','ã£ãã'=>'ggyo','ã£ãã
1389	'=>'ggyu',
1390	'ã£ã¿ã'=>'mmya','ã£ã¿ã'=>'mmye','ã£ã¿ã'=>'mmyi','ã£ã¿ã'=>'mmyo','ã£ã¿ã
1391	'=>'mmyu',
1392	'ã£ã«ã'=>'nnya','ã£ã«ã'=>'nnye','ã£ã«ã'=>'nnyi','ã£ã«ã'=>'nnyo','ã£ã«ã
1393	'=>'nnyu',
1394	'ã£ãã'=>'rrya','ã£ãã'=>'rrye','ã£ãã'=>'rryi','ã£ãã'=>'rryo','ã£ãã
1395	'=>'rryu',
1396	'ã£ãã'=>'ssha','ã£ãã'=>'sshe','ã£ã'=>'sshi','ã£ãã'=>'ssho','ã£ãã
1397	'=>'sshu',
1398
1399	// seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway)
1400	'ãã'=>'n_a','ãã'=>'n_e','ãã'=>'n_i','ãã'=>'n_o','ãã'=>'n_u',
1401	'ãã'=>'n_ya','ãã'=>'n_yo','ãã'=>'n_yu',
1402
1403	// 2 character syllables - normal
1404	'ãµã'=>'fa','ãµã'=>'fe','ãµã'=>'fi','ãµã'=>'fo',
1405	'ã¡ã'=>'cha','ã¡ã'=>'che','ã¡'=>'chi','ã¡ã'=>'cho','ã¡ã
1406	'=>'chu',
1407	'ã²ã'=>'hya','ã²ã'=>'hye','ã²ã'=>'hyi','ã²ã'=>'hyo','ã²ã
1408	'=>'hyu',
1409	'ã³ã'=>'bya','ã³ã'=>'bye','ã³ã'=>'byi','ã³ã'=>'byo','ã³ã
1410	'=>'byu',
1411	'ãŽã'=>'pya','ãŽã'=>'pye','ãŽã'=>'pyi','ãŽã'=>'pyo','ãŽã
1412	'=>'pyu',
1413	'ãã'=>'kya','ãã'=>'kye','ãã'=>'kyi','ãã'=>'kyo','ãã
1414	'=>'kyu',
1415	'ãã'=>'gya','ãã'=>'gye','ãã'=>'gyi','ãã'=>'gyo','ãã
1416	'=>'gyu',
1417	'ã¿ã'=>'mya','ã¿ã'=>'mye','ã¿ã'=>'myi','ã¿ã'=>'myo','ã¿ã
1418	'=>'myu',
1419	'ã«ã'=>'nya','ã«ã'=>'nye','ã«ã'=>'nyi','ã«ã'=>'nyo','ã«ã
1420	'=>'nyu',
1421	'ãã'=>'rya','ãã'=>'rye','ãã'=>'ryi','ãã'=>'ryo','ãã
1422	'=>'ryu',
1423	'ãã'=>'sha','ãã'=>'she','ã'=>'shi','ãã'=>'sho','ãã
1424	'=>'shu',
1425	'ãã'=>'ja','ãã'=>'je','ãã'=>'jo','ãã
1426	'=>'ju',
1427	'ãã'=>'we','ãã'=>'wi',
1428	'ãã'=>'ye',
1429
1430	// 2 character syllables, ã£ doubles the consonant after
1431	'ã£ã°'=>'bba','ã£ã¹'=>'bbe','ã£ã³'=>'bbi','ã£ãŒ'=>'bbo','ã£ã¶'=>'bbu',
1432	'ã£ã±'=>'ppa','ã£ãº'=>'ppe','ã£ãŽ'=>'ppi','ã£ãœ'=>'ppo','ã£ã·'=>'ppu',
1433	'ã£ã'=>'tta','ã£ãŠ'=>'tte','ã£ã¡'=>'cchi','ã£ãš'=>'tto','ã£ã€'=>'ttsu',
1434	'ã£ã '=>'dda','ã£ã§'=>'dde','ã£ã¢'=>'ddi','ã£ã©'=>'ddo','ã£ã¥'=>'ddu',
1435	'ã£ã'=>'gga','ã£ã'=>'gge','ã£ã'=>'ggi','ã£ã'=>'ggo','ã£ã'=>'ggu',
1436	'ã£ã'=>'kka','ã£ã'=>'kke','ã£ã'=>'kki','ã£ã'=>'kko','ã£ã'=>'kku',
1437	'ã£ãŸ'=>'mma','ã£ã'=>'mme','ã£ã¿'=>'mmi','ã£ã'=>'mmo','ã£ã'=>'mmu',
1438	'ã£ãª'=>'nna','ã£ã'=>'nne','ã£ã«'=>'nni','ã£ã®'=>'nno','ã£ã¬'=>'nnu',
1439	'ã£ã'=>'rra','ã£ã'=>'rre','ã£ã'=>'rri','ã£ã'=>'rro','ã£ã'=>'rru',
1440	'ã£ã'=>'ssa','ã£ã'=>'sse','ã£ã'=>'sshi','ã£ã'=>'sso','ã£ã'=>'ssu',
1441	'ã£ã'=>'zza','ã£ã'=>'zze','ã£ã'=>'jji','ã£ã'=>'zzo','ã£ã'=>'zzu',
1442
1443	// 1 character syllabels
1444	'ã'=>'a','ã'=>'e','ã'=>'i','ã'=>'o','ã'=>'u','ã'=>'n',
1445	'ã¯'=>'ha','ãž'=>'he','ã²'=>'hi','ã»'=>'ho','ãµ'=>'fu',
1446	'ã°'=>'ba','ã¹'=>'be','ã³'=>'bi','ãŒ'=>'bo','ã¶'=>'bu',
1447	'ã±'=>'pa','ãº'=>'pe','ãŽ'=>'pi','ãœ'=>'po','ã·'=>'pu',
1448	'ã'=>'ta','ãŠ'=>'te','ã¡'=>'chi','ãš'=>'to','ã€'=>'tsu',
1449	'ã '=>'da','ã§'=>'de','ã¢'=>'di','ã©'=>'do','ã¥'=>'du',
1450	'ã'=>'ga','ã'=>'ge','ã'=>'gi','ã'=>'go','ã'=>'gu',
1451	'ã'=>'ka','ã'=>'ke','ã'=>'ki','ã'=>'ko','ã'=>'ku',
1452	'ãŸ'=>'ma','ã'=>'me','ã¿'=>'mi','ã'=>'mo','ã'=>'mu',
1453	'ãª'=>'na','ã'=>'ne','ã«'=>'ni','ã®'=>'no','ã¬'=>'nu',
1454	'ã'=>'ra','ã'=>'re','ã'=>'ri','ã'=>'ro','ã'=>'ru',
1455	'ã'=>'sa','ã'=>'se','ã'=>'shi','ã'=>'so','ã'=>'su',
1456	'ã'=>'wa','ã'=>'wo',
1457	'ã'=>'za','ã'=>'ze','ã'=>'ji','ã'=>'zo','ã'=>'zu',
1458	'ã'=>'ya','ã'=>'yo','ã'=>'yu',
1459	// old characters
1460	'ã'=>'we','ã'=>'wi',
1461
1462	// convert what's left (probably only kicks in when something's missing above)
1463	// 'ã'=>'a','ã'=>'e','ã'=>'i','ã'=>'o','ã
1464	'=>'u',
1465	// 'ã'=>'ya','ã'=>'yo','ã
1466	'=>'yu',
1467
1468	// never seen one of those (disabled for the moment)
1469	// 'ãŽã'=>'va','ãŽã'=>'ve','ãŽã'=>'vi','ãŽã'=>'vo','ãŽ'=>'vu',
1470	// 'ã§ã'=>'dha','ã§ã'=>'dhe','ã§ã'=>'dhi','ã§ã'=>'dho','ã§ã
1471	'=>'dhu',
1472	// 'ã©ã'=>'dwa','ã©ã'=>'dwe','ã©ã'=>'dwi','ã©ã'=>'dwo','ã©ã
1473	'=>'dwu',
1474	// 'ã¢ã'=>'dya','ã¢ã'=>'dye','ã¢ã'=>'dyi','ã¢ã'=>'dyo','ã¢ã
1475	'=>'dyu',
1476	// 'ãµã'=>'fwa','ãµã'=>'fwe','ãµã'=>'fwi','ãµã'=>'fwo','ãµã
1477	'=>'fwu',
1478	// 'ãµã'=>'fya','ãµã'=>'fye','ãµã'=>'fyi','ãµã'=>'fyo','ãµã
1479	'=>'fyu',
1480	// 'ãã'=>'swa','ãã'=>'swe','ãã'=>'swi','ãã'=>'swo','ãã
1481	'=>'swu',
1482	// 'ãŠã'=>'tha','ãŠã'=>'the','ãŠã'=>'thi','ãŠã'=>'tho','ãŠã
1483	'=>'thu',
1484	// 'ã€ã'=>'tsa','ã€ã'=>'tse','ã€ã'=>'tsi','ã€ã'=>'tso','ã€'=>'tsu',
1485	// 'ãšã'=>'twa','ãšã'=>'twe','ãšã'=>'twi','ãšã'=>'two','ãšã
1486	'=>'twu',
1487	// 'ãŽã'=>'vya','ãŽã'=>'vye','ãŽã'=>'vyi','ãŽã'=>'vyo','ãŽã
1488	'=>'vyu',
1489	// 'ãã'=>'wha','ãã'=>'whe','ãã'=>'whi','ãã'=>'who','ãã
1490	'=>'whu',
1491	// 'ãã'=>'zha','ãã'=>'zhe','ãã'=>'zhi','ãã'=>'zho','ãã
1492	'=>'zhu',
1493	// 'ãã'=>'zya','ãã'=>'zye','ãã'=>'zyi','ãã'=>'zyo','ãã
1494	'=>'zyu',
1495
1496	// 'spare' characters from other romanization systems
1497	// 'ã '=>'da','ã§'=>'de','ã¢'=>'di','ã©'=>'do','ã¥'=>'du',
1498	// 'ã'=>'la','ã'=>'le','ã'=>'li','ã'=>'lo','ã'=>'lu',
1499	// 'ã'=>'sa','ã'=>'se','ã'=>'si','ã'=>'so','ã'=>'su',
1500	// 'ã¡ã'=>'cya','ã¡ã'=>'cye','ã¡ã'=>'cyi','ã¡ã'=>'cyo','ã¡ã
1501	'=>'cyu',
1502	//'ãã'=>'jya','ãã'=>'jye','ãã'=>'jyi','ãã'=>'jyo','ãã
1503	'=>'jyu',
1504	//'ãã'=>'lya','ãã'=>'lye','ãã'=>'lyi','ãã'=>'lyo','ãã
1505	'=>'lyu',
1506	//'ãã'=>'sya','ãã'=>'sye','ãã'=>'syi','ãã'=>'syo','ãã
1507	'=>'syu',
1508	//'ã¡ã'=>'tya','ã¡ã'=>'tye','ã¡ã'=>'tyi','ã¡ã'=>'tyo','ã¡ã
1509	'=>'tyu',
1510	//'ã'=>'ci',,ã'=>'yi','ã¢'=>'dzi',
1511	//'ã£ãã'=>'jja','ã£ãã'=>'jje','ã£ã'=>'jji','ã£ãã'=>'jjo','ã£ãã
1512	'=>'jju',
1513
1514
1515	// Japanese katakana
1516
1517	// 4 character syllables: ã doubles the consonant after, ãŒ doubles the vowel before (usualy written with macron, but we don't want that in our URLs)
1518	'ããã£ãŒ'=>'bbyaa','ããã§ãŒ'=>'bbyee','ããã£ãŒ'=>'bbyii','ããã§ãŒ'=>'bbyoo','ããã¥ãŒ'=>'bbyuu',
1519	'ããã£ãŒ'=>'ppyaa','ããã§ãŒ'=>'ppyee','ããã£ãŒ'=>'ppyii','ããã§ãŒ'=>'ppyoo','ããã¥ãŒ'=>'ppyuu',
1520	'ããã£ãŒ'=>'kkyaa','ããã§ãŒ'=>'kkyee','ããã£ãŒ'=>'kkyii','ããã§ãŒ'=>'kkyoo','ããã¥ãŒ'=>'kkyuu',
1521	'ãã®ã£ãŒ'=>'ggyaa','ãã®ã§ãŒ'=>'ggyee','ãã®ã£ãŒ'=>'ggyii','ãã®ã§ãŒ'=>'ggyoo','ãã®ã¥ãŒ'=>'ggyuu',
1522	'ããã£ãŒ'=>'mmyaa','ããã§ãŒ'=>'mmyee','ããã£ãŒ'=>'mmyii','ããã§ãŒ'=>'mmyoo','ããã¥ãŒ'=>'mmyuu',
1523	'ããã£ãŒ'=>'nnyaa','ããã§ãŒ'=>'nnyee','ããã£ãŒ'=>'nnyii','ããã§ãŒ'=>'nnyoo','ããã¥ãŒ'=>'nnyuu',
1524	'ããªã£ãŒ'=>'rryaa','ããªã§ãŒ'=>'rryee','ããªã£ãŒ'=>'rryii','ããªã§ãŒ'=>'rryoo','ããªã¥ãŒ'=>'rryuu',
1525	'ãã·ã£ãŒ'=>'sshaa','ãã·ã§ãŒ'=>'sshee','ãã·ãŒ'=>'sshii','ãã·ã§ãŒ'=>'sshoo','ãã·ã¥ãŒ'=>'sshuu',
1526	'ããã£ãŒ'=>'cchaa','ããã§ãŒ'=>'cchee','ãããŒ'=>'cchii','ããã§ãŒ'=>'cchoo','ããã¥ãŒ'=>'cchuu',
1527	'ããã£ãŒ'=>'ttii',
1528	'ããã£ãŒ'=>'ddii',
1529
1530	// 3 character syllables - doubled vowels
1531	'ãã¡ãŒ'=>'faa','ãã§ãŒ'=>'fee','ãã£ãŒ'=>'fii','ãã©ãŒ'=>'foo',
1532	'ãã£ãŒ'=>'fyaa','ãã§ãŒ'=>'fyee','ãã£ãŒ'=>'fyii','ãã§ãŒ'=>'fyoo','ãã¥ãŒ'=>'fyuu',
1533	'ãã£ãŒ'=>'hyaa','ãã§ãŒ'=>'hyee','ãã£ãŒ'=>'hyii','ãã§ãŒ'=>'hyoo','ãã¥ãŒ'=>'hyuu',
1534	'ãã£ãŒ'=>'byaa','ãã§ãŒ'=>'byee','ãã£ãŒ'=>'byii','ãã§ãŒ'=>'byoo','ãã¥ãŒ'=>'byuu',
1535	'ãã£ãŒ'=>'pyaa','ãã§ãŒ'=>'pyee','ãã£ãŒ'=>'pyii','ãã§ãŒ'=>'pyoo','ãã¥ãŒ'=>'pyuu',
1536	'ãã£ãŒ'=>'kyaa','ãã§ãŒ'=>'kyee','ãã£ãŒ'=>'kyii','ãã§ãŒ'=>'kyoo','ãã¥ãŒ'=>'kyuu',
1537	'ã®ã£ãŒ'=>'gyaa','ã®ã§ãŒ'=>'gyee','ã®ã£ãŒ'=>'gyii','ã®ã§ãŒ'=>'gyoo','ã®ã¥ãŒ'=>'gyuu',
1538	'ãã£ãŒ'=>'myaa','ãã§ãŒ'=>'myee','ãã£ãŒ'=>'myii','ãã§ãŒ'=>'myoo','ãã¥ãŒ'=>'myuu',
1539	'ãã£ãŒ'=>'nyaa','ãã§ãŒ'=>'nyee','ãã£ãŒ'=>'nyii','ãã§ãŒ'=>'nyoo','ãã¥ãŒ'=>'nyuu',
1540	'ãªã£ãŒ'=>'ryaa','ãªã§ãŒ'=>'ryee','ãªã£ãŒ'=>'ryii','ãªã§ãŒ'=>'ryoo','ãªã¥ãŒ'=>'ryuu',
1541	'ã·ã£ãŒ'=>'shaa','ã·ã§ãŒ'=>'shee','ã·ãŒ'=>'shii','ã·ã§ãŒ'=>'shoo','ã·ã¥ãŒ'=>'shuu',
1542	'ãžã£ãŒ'=>'jaa','ãžã§ãŒ'=>'jee','ãžãŒ'=>'jii','ãžã§ãŒ'=>'joo','ãžã¥ãŒ'=>'juu',
1543	'ã¹ã¡ãŒ'=>'swaa','ã¹ã§ãŒ'=>'swee','ã¹ã£ãŒ'=>'swii','ã¹ã©ãŒ'=>'swoo','ã¹ã¥ãŒ'=>'swuu',
1544	'ãã¡ãŒ'=>'daa','ãã§ãŒ'=>'dee','ãã£ãŒ'=>'dii','ãã©ãŒ'=>'doo','ãã¥ãŒ'=>'duu',
1545	'ãã£ãŒ'=>'chaa','ãã§ãŒ'=>'chee','ããŒ'=>'chii','ãã§ãŒ'=>'choo','ãã¥ãŒ'=>'chuu',
1546	'ãã£ãŒ'=>'dyaa','ãã§ãŒ'=>'dyee','ãã£ãŒ'=>'dyii','ãã§ãŒ'=>'dyoo','ãã¥ãŒ'=>'dyuu',
1547	'ãã£ãŒ'=>'tsaa','ãã§ãŒ'=>'tsee','ãã£ãŒ'=>'tsii','ãã§ãŒ'=>'tsoo','ããŒ'=>'tsuu',
1548	'ãã¡ãŒ'=>'twaa','ãã§ãŒ'=>'twee','ãã£ãŒ'=>'twii','ãã©ãŒ'=>'twoo','ãã¥ãŒ'=>'twuu',
1549	'ãã¡ãŒ'=>'dwaa','ãã§ãŒ'=>'dwee','ãã£ãŒ'=>'dwii','ãã©ãŒ'=>'dwoo','ãã¥ãŒ'=>'dwuu',
1550	'ãŠã¡ãŒ'=>'whaa','ãŠã§ãŒ'=>'whee','ãŠã£ãŒ'=>'whii','ãŠã©ãŒ'=>'whoo','ãŠã¥ãŒ'=>'whuu',
1551	'ãŽã£ãŒ'=>'vyaa','ãŽã§ãŒ'=>'vyee','ãŽã£ãŒ'=>'vyii','ãŽã§ãŒ'=>'vyoo','ãŽã¥ãŒ'=>'vyuu',
1552	'ãŽã¡ãŒ'=>'vaa','ãŽã§ãŒ'=>'vee','ãŽã£ãŒ'=>'vii','ãŽã©ãŒ'=>'voo','ãŽãŒ'=>'vuu',
1553	'ãŠã§ãŒ'=>'wee','ãŠã£ãŒ'=>'wii',
1554	'ã€ã§ãŒ'=>'yee',
1555	'ãã£ãŒ'=>'tii',
1556	'ãã£ãŒ'=>'dii',
1557
1558	// 3 character syllables - doubled consonants
1559	'ããã£'=>'bbya','ããã§'=>'bbye','ããã£'=>'bbyi','ããã§'=>'bbyo','ããã¥'=>'bbyu',
1560	'ããã£'=>'ppya','ããã§'=>'ppye','ããã£'=>'ppyi','ããã§'=>'ppyo','ããã¥'=>'ppyu',
1561	'ããã£'=>'kkya','ããã§'=>'kkye','ããã£'=>'kkyi','ããã§'=>'kkyo','ããã¥'=>'kkyu',
1562	'ãã®ã£'=>'ggya','ãã®ã§'=>'ggye','ãã®ã£'=>'ggyi','ãã®ã§'=>'ggyo','ãã®ã¥'=>'ggyu',
1563	'ããã£'=>'mmya','ããã§'=>'mmye','ããã£'=>'mmyi','ããã§'=>'mmyo','ããã¥'=>'mmyu',
1564	'ããã£'=>'nnya','ããã§'=>'nnye','ããã£'=>'nnyi','ããã§'=>'nnyo','ããã¥'=>'nnyu',
1565	'ããªã£'=>'rrya','ããªã§'=>'rrye','ããªã£'=>'rryi','ããªã§'=>'rryo','ããªã¥'=>'rryu',
1566	'ãã·ã£'=>'ssha','ãã·ã§'=>'sshe','ãã·'=>'sshi','ãã·ã§'=>'ssho','ãã·ã¥'=>'sshu',
1567	'ããã£'=>'ccha','ããã§'=>'cche','ãã'=>'cchi','ããã§'=>'ccho','ããã¥'=>'cchu',
1568	'ããã£'=>'tti',
1569	'ããã£'=>'ddi',
1570
1571	// 3 character syllables - doubled vowel and consonants
1572	'ãããŒ'=>'bbaa','ãããŒ'=>'bbee','ãããŒ'=>'bbii','ãããŒ'=>'bboo','ãããŒ'=>'bbuu',
1573	'ãããŒ'=>'ppaa','ãããŒ'=>'ppee','ãããŒ'=>'ppii','ãããŒ'=>'ppoo','ãããŒ'=>'ppuu',
1574	'ãã±ãŒ'=>'kkee','ãããŒ'=>'kkii','ãã³ãŒ'=>'kkoo','ãã¯ãŒ'=>'kkuu','ãã«ãŒ'=>'kkaa',
1575	'ãã¬ãŒ'=>'ggaa','ãã²ãŒ'=>'ggee','ãã®ãŒ'=>'ggii','ããŽãŒ'=>'ggoo','ãã°ãŒ'=>'gguu',
1576	'ãããŒ'=>'maa','ãã¡ãŒ'=>'mee','ãããŒ'=>'mii','ãã¢ãŒ'=>'moo','ãã ãŒ'=>'muu',
1577	'ãããŒ'=>'nnaa','ãããŒ'=>'nnee','ãããŒ'=>'nnii','ãããŒ'=>'nnoo','ãããŒ'=>'nnuu',
1578	'ãã©ãŒ'=>'rraa','ãã¬ãŒ'=>'rree','ããªãŒ'=>'rrii','ãããŒ'=>'rroo','ãã«ãŒ'=>'rruu',
1579	'ããµãŒ'=>'ssaa','ãã»ãŒ'=>'ssee','ãã·ãŒ'=>'sshii','ããœãŒ'=>'ssoo','ãã¹ãŒ'=>'ssuu',
1580	'ãã¶ãŒ'=>'zzaa','ããŒãŒ'=>'zzee','ããžãŒ'=>'jjii','ããŸãŒ'=>'zzoo','ããºãŒ'=>'zzuu',
1581	'ãã¿ãŒ'=>'ttaa','ãããŒ'=>'ttee','ãããŒ'=>'chii','ãããŒ'=>'ttoo','ãããŒ'=>'ttsuu',
1582	'ãããŒ'=>'ddaa','ãããŒ'=>'ddee','ãããŒ'=>'ddii','ãããŒ'=>'ddoo','ãã
1583	ãŒ'=>'dduu',
1584
1585	// 2 character syllables - normal
1586	'ãã¡'=>'fa','ãã§'=>'fe','ãã£'=>'fi','ãã©'=>'fo','ãã¥'=>'fu',
1587	// 'ãã£'=>'fya','ãã§'=>'fye','ãã£'=>'fyi','ãã§'=>'fyo','ãã¥'=>'fyu',
1588	'ãã£'=>'fa','ãã§'=>'fe','ãã£'=>'fi','ãã§'=>'fo','ãã¥'=>'fu',
1589	'ãã£'=>'hya','ãã§'=>'hye','ãã£'=>'hyi','ãã§'=>'hyo','ãã¥'=>'hyu',
1590	'ãã£'=>'bya','ãã§'=>'bye','ãã£'=>'byi','ãã§'=>'byo','ãã¥'=>'byu',
1591	'ãã£'=>'pya','ãã§'=>'pye','ãã£'=>'pyi','ãã§'=>'pyo','ãã¥'=>'pyu',
1592	'ãã£'=>'kya','ãã§'=>'kye','ãã£'=>'kyi','ãã§'=>'kyo','ãã¥'=>'kyu',
1593	'ã®ã£'=>'gya','ã®ã§'=>'gye','ã®ã£'=>'gyi','ã®ã§'=>'gyo','ã®ã¥'=>'gyu',
1594	'ãã£'=>'mya','ãã§'=>'mye','ãã£'=>'myi','ãã§'=>'myo','ãã¥'=>'myu',
1595	'ãã£'=>'nya','ãã§'=>'nye','ãã£'=>'nyi','ãã§'=>'nyo','ãã¥'=>'nyu',
1596	'ãªã£'=>'rya','ãªã§'=>'rye','ãªã£'=>'ryi','ãªã§'=>'ryo','ãªã¥'=>'ryu',
1597	'ã·ã£'=>'sha','ã·ã§'=>'she','ã·ã§'=>'sho','ã·ã¥'=>'shu',
1598	'ãžã£'=>'ja','ãžã§'=>'je','ãžã§'=>'jo','ãžã¥'=>'ju',
1599	'ã¹ã¡'=>'swa','ã¹ã§'=>'swe','ã¹ã£'=>'swi','ã¹ã©'=>'swo','ã¹ã¥'=>'swu',
1600	'ãã¡'=>'da','ãã§'=>'de','ãã£'=>'di','ãã©'=>'do','ãã¥'=>'du',
1601	'ãã£'=>'cha','ãã§'=>'che','ã'=>'chi','ãã§'=>'cho','ãã¥'=>'chu',
1602	// 'ãã£'=>'dya','ãã§'=>'dye','ãã£'=>'dyi','ãã§'=>'dyo','ãã¥'=>'dyu',
1603	'ãã£'=>'tsa','ãã§'=>'tse','ãã£'=>'tsi','ãã§'=>'tso','ã'=>'tsu',
1604	'ãã¡'=>'twa','ãã§'=>'twe','ãã£'=>'twi','ãã©'=>'two','ãã¥'=>'twu',
1605	'ãã¡'=>'dwa','ãã§'=>'dwe','ãã£'=>'dwi','ãã©'=>'dwo','ãã¥'=>'dwu',
1606	'ãŠã¡'=>'wha','ãŠã§'=>'whe','ãŠã£'=>'whi','ãŠã©'=>'who','ãŠã¥'=>'whu',
1607	'ãŽã£'=>'vya','ãŽã§'=>'vye','ãŽã£'=>'vyi','ãŽã§'=>'vyo','ãŽã¥'=>'vyu',
1608	'ãŽã¡'=>'va','ãŽã§'=>'ve','ãŽã£'=>'vi','ãŽã©'=>'vo','ãŽ'=>'vu',
1609	'ãŠã§'=>'we','ãŠã£'=>'wi',
1610	'ã€ã§'=>'ye',
1611	'ãã£'=>'ti',
1612	'ãã£'=>'di',
1613
1614	// 2 character syllables - doubled vocal
1615	'ã¢ãŒ'=>'aa','ãšãŒ'=>'ee','ã€ãŒ'=>'ii','ãªãŒ'=>'oo','ãŠãŒ'=>'uu',
1616	'ããŒ'=>'daa','ããŒ'=>'dee','ããŒ'=>'dii','ããŒ'=>'doo','ã
1617	ãŒ'=>'duu',
1618	'ããŒ'=>'haa','ããŒ'=>'hee','ããŒ'=>'hii','ããŒ'=>'hoo','ããŒ'=>'fuu',
1619	'ããŒ'=>'baa','ããŒ'=>'bee','ããŒ'=>'bii','ããŒ'=>'boo','ããŒ'=>'buu',
1620	'ããŒ'=>'paa','ããŒ'=>'pee','ããŒ'=>'pii','ããŒ'=>'poo','ããŒ'=>'puu',
1621	'ã±ãŒ'=>'kee','ããŒ'=>'kii','ã³ãŒ'=>'koo','ã¯ãŒ'=>'kuu','ã«ãŒ'=>'kaa',
1622	'ã¬ãŒ'=>'gaa','ã²ãŒ'=>'gee','ã®ãŒ'=>'gii','ãŽãŒ'=>'goo','ã°ãŒ'=>'guu',
1623	'ããŒ'=>'maa','ã¡ãŒ'=>'mee','ããŒ'=>'mii','ã¢ãŒ'=>'moo','ã ãŒ'=>'muu',
1624	'ããŒ'=>'naa','ããŒ'=>'nee','ããŒ'=>'nii','ããŒ'=>'noo','ããŒ'=>'nuu',
1625	'ã©ãŒ'=>'raa','ã¬ãŒ'=>'ree','ãªãŒ'=>'rii','ããŒ'=>'roo','ã«ãŒ'=>'ruu',
1626	'ãµãŒ'=>'saa','ã»ãŒ'=>'see','ã·ãŒ'=>'shii','ãœãŒ'=>'soo','ã¹ãŒ'=>'suu',
1627	'ã¶ãŒ'=>'zaa','ãŒãŒ'=>'zee','ãžãŒ'=>'jii','ãŸãŒ'=>'zoo','ãºãŒ'=>'zuu',
1628	'ã¿ãŒ'=>'taa','ããŒ'=>'tee','ããŒ'=>'chii','ããŒ'=>'too','ããŒ'=>'tsuu',
1629	'ã¯ãŒ'=>'waa','ã²ãŒ'=>'woo',
1630	'ã€ãŒ'=>'yaa','ãšãŒ'=>'yoo','ãŠãŒ'=>'yuu',
1631	'ãµãŒ'=>'kaa','ã¶ãŒ'=>'kee',
1632	// old characters
1633	'ã±ãŒ'=>'wee','ã°ãŒ'=>'wii',
1634
1635	// seperate katakana 'n'
1636	'ã³ã¢'=>'n_a','ã³ãš'=>'n_e','ã³ã€'=>'n_i','ã³ãª'=>'n_o','ã³ãŠ'=>'n_u',
1637	'ã³ã€'=>'n_ya','ã³ãš'=>'n_yo','ã³ãŠ'=>'n_yu',
1638
1639	// 2 character syllables - doubled consonants
1640	'ãã'=>'bba','ãã'=>'bbe','ãã'=>'bbi','ãã'=>'bbo','ãã'=>'bbu',
1641	'ãã'=>'ppa','ãã'=>'ppe','ãã'=>'ppi','ãã'=>'ppo','ãã'=>'ppu',
1642	'ãã±'=>'kke','ãã'=>'kki','ãã³'=>'kko','ãã¯'=>'kku','ãã«'=>'kka',
1643	'ãã¬'=>'gga','ãã²'=>'gge','ãã®'=>'ggi','ããŽ'=>'ggo','ãã°'=>'ggu',
1644	'ãã'=>'ma','ãã¡'=>'me','ãã'=>'mi','ãã¢'=>'mo','ãã '=>'mu',
1645	'ãã'=>'nna','ãã'=>'nne','ãã'=>'nni','ãã'=>'nno','ãã'=>'nnu',
1646	'ãã©'=>'rra','ãã¬'=>'rre','ããª'=>'rri','ãã'=>'rro','ãã«'=>'rru',
1647	'ããµ'=>'ssa','ãã»'=>'sse','ãã·'=>'sshi','ããœ'=>'sso','ãã¹'=>'ssu',
1648	'ãã¶'=>'zza','ããŒ'=>'zze','ããž'=>'jji','ããŸ'=>'zzo','ããº'=>'zzu',
1649	'ãã¿'=>'tta','ãã'=>'tte','ãã'=>'cchi','ãã'=>'tto','ãã'=>'ttsu',
1650	'ãã'=>'dda','ãã'=>'dde','ãã'=>'ddi','ãã'=>'ddo','ãã
1651	'=>'ddu',
1652
1653	// 1 character syllables
1654	'ã¢'=>'a','ãš'=>'e','ã€'=>'i','ãª'=>'o','ãŠ'=>'u','ã³'=>'n',
1655	'ã'=>'ha','ã'=>'he','ã'=>'hi','ã'=>'ho','ã'=>'fu',
1656	'ã'=>'ba','ã'=>'be','ã'=>'bi','ã'=>'bo','ã'=>'bu',
1657	'ã'=>'pa','ã'=>'pe','ã'=>'pi','ã'=>'po','ã'=>'pu',
1658	'ã±'=>'ke','ã'=>'ki','ã³'=>'ko','ã¯'=>'ku','ã«'=>'ka',
1659	'ã¬'=>'ga','ã²'=>'ge','ã®'=>'gi','ãŽ'=>'go','ã°'=>'gu',
1660	'ã'=>'ma','ã¡'=>'me','ã'=>'mi','ã¢'=>'mo','ã '=>'mu',
1661	'ã'=>'na','ã'=>'ne','ã'=>'ni','ã'=>'no','ã'=>'nu',
1662	'ã©'=>'ra','ã¬'=>'re','ãª'=>'ri','ã'=>'ro','ã«'=>'ru',
1663	'ãµ'=>'sa','ã»'=>'se','ã·'=>'shi','ãœ'=>'so','ã¹'=>'su',
1664	'ã¶'=>'za','ãŒ'=>'ze','ãž'=>'ji','ãŸ'=>'zo','ãº'=>'zu',
1665	'ã¿'=>'ta','ã'=>'te','ã'=>'chi','ã'=>'to','ã'=>'tsu',
1666	'ã'=>'da','ã'=>'de','ã'=>'di','ã'=>'do','ã
1667	'=>'du',
1668	'ã¯'=>'wa','ã²'=>'wo',
1669	'ã€'=>'ya','ãš'=>'yo','ãŠ'=>'yu',
1670	'ãµ'=>'ka','ã¶'=>'ke',
1671	// old characters
1672	'ã±'=>'we','ã°'=>'wi',
1673
1674	// convert what's left (probably only kicks in when something's missing above)
1675	'ã¡'=>'a','ã§'=>'e','ã£'=>'i','ã©'=>'o','ã¥'=>'u',
1676	'ã£'=>'ya','ã§'=>'yo','ã¥'=>'yu',
1677
1678	// special characters
1679	'ã»'=>'_','ã'=>'_',
1680	'ãŒ'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise
1681
1682	// 'ã©'=>'la','ã¬'=>'le','ãª'=>'li','ã'=>'lo','ã«'=>'lu',
1683	// 'ãã£'=>'cya','ãã§'=>'cye','ãã£'=>'cyi','ãã§'=>'cyo','ãã¥'=>'cyu',
1684	//'ãã£'=>'dha','ãã§'=>'dhe','ãã£'=>'dhi','ãã§'=>'dho','ãã¥'=>'dhu',
1685	// 'ãªã£'=>'lya','ãªã§'=>'lye','ãªã£'=>'lyi','ãªã§'=>'lyo','ãªã¥'=>'lyu',
1686	// 'ãã£'=>'tha','ãã§'=>'the','ãã£'=>'thi','ãã§'=>'tho','ãã¥'=>'thu',
1687	//'ãã¡'=>'fwa','ãã§'=>'fwe','ãã£'=>'fwi','ãã©'=>'fwo','ãã¥'=>'fwu',
1688	//'ãã£'=>'tya','ãã§'=>'tye','ãã£'=>'tyi','ãã§'=>'tyo','ãã¥'=>'tyu',
1689	// 'ãžã£'=>'jya','ãžã§'=>'jye','ãžã£'=>'jyi','ãžã§'=>'jyo','ãžã¥'=>'jyu',
1690	// 'ãžã£'=>'zha','ãžã§'=>'zhe','ãžã£'=>'zhi','ãžã§'=>'zho','ãžã¥'=>'zhu',
1691	//'ãžã£'=>'zya','ãžã§'=>'zye','ãžã£'=>'zyi','ãžã§'=>'zyo','ãžã¥'=>'zyu',
1692	//'ã·ã£'=>'sya','ã·ã§'=>'sye','ã·ã£'=>'syi','ã·ã§'=>'syo','ã·ã¥'=>'syu',
1693	//'ã·'=>'ci','ã'=>'hu',ã·'=>'si','ã'=>'ti','ã'=>'tu','ã€'=>'yi','ã'=>'dzi',
1694
1695	// "Greeklish"
1696	'Î'=>'G','Î'=>'E','Î'=>'Th','Î'=>'L','Î'=>'X','Î '=>'P','Î£'=>'S','ÎŠ'=>'F','Îš'=>'Ps',
1697	'Î³'=>'g','ÎŽ'=>'e','Îž'=>'th','Î»'=>'l','ÎŸ'=>'x','Ï'=>'p','Ï'=>'s','Ï'=>'f','Ï'=>'ps',
1698
1699	// Thai
1700	'àž'=>'k','àž'=>'kh','àž'=>'kh','àž'=>'kh','àž
1701	'=>'kh','àž'=>'kh','àž'=>'ng','àž'=>'ch',
1702	'àž'=>'ch','àž'=>'ch','àž'=>'s','àž'=>'ch','àž'=>'y','àž'=>'d','àž'=>'t','àž'=>'th',
1703	'àž'=>'d','àž'=>'th','àž'=>'n','àž'=>'d','àž'=>'t','àž'=>'th','àž'=>'th','àž'=>'th',
1704	'àž'=>'n','àž'=>'b','àž'=>'p','àž'=>'ph','àž'=>'f','àž'=>'ph','àž'=>'f','àž '=>'ph',
1705	'àž¡'=>'m','àž¢'=>'y','àž£'=>'r','àž€'=>'rue','àž€à¹
1706	'=>'rue','àž¥'=>'l','àžŠ'=>'lue',
1707	'àžŠà¹
1708	'=>'lue','àž§'=>'w','àžš'=>'s','àž©'=>'s','àžª'=>'s','àž«'=>'h','àž¬'=>'l','àž®'=>'h',
1709	'àž°'=>'a','àž±'=>'a','àž£àž£'=>'a','àž²'=>'a','à¹
1710	'=>'a','àž³'=>'am','à¹àž²'=>'am',
1711	'àžŽ'=>'i','àžµ'=>'i','àž¶'=>'ue','àžµ'=>'ue','àžž'=>'u','àž¹'=>'u',
1712	'à¹'=>'e','à¹'=>'ae','à¹'=>'o','àž'=>'o',
1713	'àžµàž¢àž°'=>'ia','àžµàž¢'=>'ia','àž·àžàž°'=>'uea','àž·àž'=>'uea','àž±àž§àž°'=>'ua','àž±àž§'=>'ua',
1714	'à¹'=>'ai','à¹'=>'ai','àž±àž¢'=>'ai','àž²àž¢'=>'ai','àž²àž§'=>'ao',
1715	'àžžàž¢'=>'ui','àžàž¢'=>'oi','àž·àžàž¢'=>'ueai','àž§àž¢'=>'uai',
1716	'àžŽàž§'=>'io','à¹àž§'=>'eo','àžµàž¢àž§'=>'iao',
1717	'à¹'=>'','à¹'=>'','à¹'=>'','à¹'=>'','à¹'=>'',
1718	'à¹'=>'','à¹'=>'','à¹'=>'','àžº'=>'',
1719	'à¹'=>'2','à¹'=>'o','àž¯'=>'-','à¹'=>'-','à¹'=>'-',
1720	'à¹'=>'0','à¹'=>'1','à¹'=>'2','à¹'=>'3','à¹'=>'4',
1721	'à¹'=>'5','à¹'=>'6','à¹'=>'7','à¹'=>'8','à¹'=>'9',
1722
1723	// Korean
1724	'ã±'=>'k','ã
1725	'=>'kh','ã²'=>'kk','ã·'=>'t','ã
1726	'=>'th','ãž'=>'tt','ã
1727	'=>'p',
1728	'ã
1729	'=>'ph','ã
1730	'=>'pp','ã
1731	'=>'c','ã
1732	'=>'ch','ã
1733	'=>'cc','ã
1734
1735	'=>'s','ã
1736	'=>'ss',
1737	'ã
1738	'=>'h','ã
1739	'=>'ng','ãŽ'=>'n','ã¹'=>'l','ã
1740	'=>'m', 'ã
1741	'=>'a','ã
1742	'=>'e','ã
1743	'=>'o',
1744	'ã
1745	'=>'wu','ã
1746	¡'=>'u','ã
1747	£'=>'i','ã
1748	'=>'ay','ã
1749	'=>'ey','ã
1750	'=>'oy','ã
1751	'=>'wa','ã
1752	'=>'we',
1753	'ã
1754	'=>'wi','ã
1755	'=>'way','ã
1756	'=>'wey','ã
1757	¢'=>'uy','ã
1758	'=>'ya','ã
1759	'=>'ye','ã
1760	'=>'oy',
1761	'ã
1762	'=>'yu','ã
1763	'=>'yay','ã
1764	'=>'yey',
1765	);
1766
1767

Note: See TracBrowser for help on using the repository browser.

Download in other formats: