= "\x80" && $c < "\xc0") { // Legal tail bytes are nice. $sequence .= $c; } else { if ($len == 0) { // Premature end of string! Drop a replacement character into // output to represent the invalid UTF-8 sequence. $result .= $unknown; break 2; } else { // Illegal tail byte; abandon the sequence. $result .= $unknown; // Back up and reprocess this byte; it may itself be a legal // ASCII or UTF-8 sequence head. --$i; ++$len; continue 2; } } } while (--$remaining); $n = ord($head); if ($n <= 0xdf) { $ord = ($n - 192) * 64 + (ord($sequence[1]) - 128); } elseif ($n <= 0xef) { $ord = ($n - 224) * 4096 + (ord($sequence[1]) - 128) * 64 + (ord($sequence[2]) - 128); } elseif ($n <= 0xf7) { $ord = ($n - 240) * 262144 + (ord($sequence[1]) - 128) * 4096 + (ord($sequence[2]) - 128) * 64 + (ord($sequence[3]) - 128); } elseif ($n <= 0xfb) { $ord = ($n - 248) * 16777216 + (ord($sequence[1]) - 128) * 262144 + (ord($sequence[2]) - 128) * 4096 + (ord($sequence[3]) - 128) * 64 + (ord($sequence[4]) - 128); } elseif ($n <= 0xfd) { $ord = ($n - 252) * 1073741824 + (ord($sequence[1]) - 128) * 16777216 + (ord($sequence[2]) - 128) * 262144 + (ord($sequence[3]) - 128) * 4096 + (ord($sequence[4]) - 128) * 64 + (ord($sequence[5]) - 128); } $result .= _transliteration_replace($ord, $unknown, $source_langcode); $head = ''; } elseif ($c < "\x80") { // ASCII byte. $result .= $c; $head = ''; } elseif ($c < "\xc0") { // Illegal tail bytes. if ($head == '') { $result .= $unknown; } } else { // Miscellaneous freaks. $result .= $unknown; $head = ''; } } } return $result; } /** * Replaces a Unicode character using the transliteration database. * * @param $ord * An ordinal Unicode character code. * @param $unknown * Replacement string for characters that do not have a suitable ASCII * equivalent. * @param $langcode * Optional ISO 639 language code that denotes the language of the input and * is used to apply language-specific variations. Defaults to the current * display language. * @return * ASCII replacement character. */ function _transliteration_replace($ord, $unknown = '?', $langcode = NULL) { static $map = array(); //GL: set language later /* if (!isset($langcode)) { global $language; $langcode = $language->language; } */ $bank = $ord >> 8; if (!isset($map[$bank][$langcode])) { $file = './resources/transliteration-data/' . sprintf('x%02x', $bank) . '.php'; if (file_exists($file)) { include $file; if ($langcode != 'en' && isset($variant[$langcode])) { // Merge in language specific mappings. $map[$bank][$langcode] = $variant[$langcode] + $base; } else { $map[$bank][$langcode] = $base; } } else { $map[$bank][$langcode] = array(); } } $ord = $ord & 255; return isset($map[$bank][$langcode][$ord]) ? $map[$bank][$langcode][$ord] : $unknown; }