Merge branch 'master' of github.com:glls/cops
This commit is contained in:
commit
7f79e02b05
71
base.php
71
base.php
|
@ -340,77 +340,12 @@ function useNormAndUp () {
|
|||
|
||||
function normalizeUtf8String( $s)
|
||||
{
|
||||
$original_string = $s;
|
||||
|
||||
// maps German (umlauts) and other European characters onto two characters before just removing diacritics
|
||||
$s = preg_replace( '@\x{00c4}@u' , "AE", $s ); // umlaut Ä => AE
|
||||
$s = preg_replace( '@\x{00d6}@u' , "OE", $s ); // umlaut Ö => OE
|
||||
$s = preg_replace( '@\x{00dc}@u' , "UE", $s ); // umlaut Ü => UE
|
||||
$s = preg_replace( '@\x{00e4}@u' , "ae", $s ); // umlaut ä => ae
|
||||
$s = preg_replace( '@\x{00f6}@u' , "oe", $s ); // umlaut ö => oe
|
||||
$s = preg_replace( '@\x{00fc}@u' , "ue", $s ); // umlaut ü => ue
|
||||
$s = preg_replace( '@\x{00f1}@u' , "ny", $s ); // ñ => ny
|
||||
$s = preg_replace( '@\x{00ff}@u' , "yu", $s ); // ÿ => yu
|
||||
|
||||
|
||||
// maps special characters (characters with diacritics) on their base-character followed by the diacritical mark
|
||||
// exmaple: Ú => U´, á => a`
|
||||
$s = Normalizer::normalize( $s, Normalizer::FORM_D );
|
||||
|
||||
|
||||
$s = preg_replace( '@\pM@u' , "", $s ); // removes diacritics
|
||||
|
||||
|
||||
$s = preg_replace( '@\x{00df}@u' , "ss", $s ); // maps German ß onto ss
|
||||
$s = preg_replace( '@\x{00c6}@u' , "AE", $s ); // Æ => AE
|
||||
$s = preg_replace( '@\x{00e6}@u' , "ae", $s ); // æ => ae
|
||||
$s = preg_replace( '@\x{0132}@u' , "IJ", $s ); // ? => IJ
|
||||
$s = preg_replace( '@\x{0133}@u' , "ij", $s ); // ? => ij
|
||||
$s = preg_replace( '@\x{0152}@u' , "OE", $s ); // Œ => OE
|
||||
$s = preg_replace( '@\x{0153}@u' , "oe", $s ); // œ => oe
|
||||
|
||||
$s = preg_replace( '@\x{00d0}@u' , "D", $s ); // Ð => D
|
||||
$s = preg_replace( '@\x{0110}@u' , "D", $s ); // Ð => D
|
||||
$s = preg_replace( '@\x{00f0}@u' , "d", $s ); // ð => d
|
||||
$s = preg_replace( '@\x{0111}@u' , "d", $s ); // d => d
|
||||
$s = preg_replace( '@\x{0126}@u' , "H", $s ); // H => H
|
||||
$s = preg_replace( '@\x{0127}@u' , "h", $s ); // h => h
|
||||
$s = preg_replace( '@\x{0131}@u' , "i", $s ); // i => i
|
||||
$s = preg_replace( '@\x{0138}@u' , "k", $s ); // ? => k
|
||||
$s = preg_replace( '@\x{013f}@u' , "L", $s ); // ? => L
|
||||
$s = preg_replace( '@\x{0141}@u' , "L", $s ); // L => L
|
||||
$s = preg_replace( '@\x{0140}@u' , "l", $s ); // ? => l
|
||||
$s = preg_replace( '@\x{0142}@u' , "l", $s ); // l => l
|
||||
$s = preg_replace( '@\x{014a}@u' , "N", $s ); // ? => N
|
||||
$s = preg_replace( '@\x{0149}@u' , "n", $s ); // ? => n
|
||||
$s = preg_replace( '@\x{014b}@u' , "n", $s ); // ? => n
|
||||
$s = preg_replace( '@\x{00d8}@u' , "O", $s ); // Ø => O
|
||||
$s = preg_replace( '@\x{00f8}@u' , "o", $s ); // ø => o
|
||||
$s = preg_replace( '@\x{017f}@u' , "s", $s ); // ? => s
|
||||
$s = preg_replace( '@\x{00de}@u' , "T", $s ); // Þ => T
|
||||
$s = preg_replace( '@\x{0166}@u' , "T", $s ); // T => T
|
||||
$s = preg_replace( '@\x{00fe}@u' , "t", $s ); // þ => t
|
||||
$s = preg_replace( '@\x{0167}@u' , "t", $s ); // t => t
|
||||
|
||||
// remove all non-ASCii characters
|
||||
$s = preg_replace( '@[^\0-\x80]@u' , "", $s );
|
||||
|
||||
|
||||
// possible errors in UTF8-regular-expressions
|
||||
if (empty($s))
|
||||
return $original_string;
|
||||
else
|
||||
return $s;
|
||||
}
|
||||
|
||||
function transliterate ($a) {
|
||||
include_once 'transliteration.php';
|
||||
return _transliteration_process($a);
|
||||
return _transliteration_process($s);
|
||||
}
|
||||
|
||||
function normAndUp ($a) {
|
||||
//return mb_strtoupper (normalizeUtf8String($a), 'UTF-8');
|
||||
return mb_strtoupper (transliterate($a), 'UTF-8');
|
||||
function normAndUp ($s) {
|
||||
return mb_strtoupper (normalizeUtf8String($s), 'UTF-8');
|
||||
}
|
||||
|
||||
class Link
|
||||
|
|
Loading…
Reference in a new issue