diff --git a/base.php b/base.php index 544046e..f91c0c5 100644 --- a/base.php +++ b/base.php @@ -256,6 +256,83 @@ function addURLParameter($urlParams, $paramName, $paramValue) { return $start . http_build_query($params); } +function useNormAndUp () { + global $config; + return extension_loaded('mbstring') && + extension_loaded('intl') && + class_exists("Normalizer", $autoload = false) && + $config ['cops_normalized_search'] == "1"; +} + +function normalizeUtf8String( $s) +{ + $original_string = $s; + + // maps German (umlauts) and other European characters onto two characters before just removing diacritics + $s = preg_replace( '@\x{00c4}@u' , "AE", $s ); // umlaut Ä => AE + $s = preg_replace( '@\x{00d6}@u' , "OE", $s ); // umlaut Ö => OE + $s = preg_replace( '@\x{00dc}@u' , "UE", $s ); // umlaut Ü => UE + $s = preg_replace( '@\x{00e4}@u' , "ae", $s ); // umlaut ä => ae + $s = preg_replace( '@\x{00f6}@u' , "oe", $s ); // umlaut ö => oe + $s = preg_replace( '@\x{00fc}@u' , "ue", $s ); // umlaut ü => ue + $s = preg_replace( '@\x{00f1}@u' , "ny", $s ); // ñ => ny + $s = preg_replace( '@\x{00ff}@u' , "yu", $s ); // ÿ => yu + + + // maps special characters (characters with diacritics) on their base-character followed by the diacritical mark + // exmaple: Ú => U´, á => a` + $s = Normalizer::normalize( $s, Normalizer::FORM_D ); + + + $s = preg_replace( '@\pM@u' , "", $s ); // removes diacritics + + + $s = preg_replace( '@\x{00df}@u' , "ss", $s ); // maps German ß onto ss + $s = preg_replace( '@\x{00c6}@u' , "AE", $s ); // Æ => AE + $s = preg_replace( '@\x{00e6}@u' , "ae", $s ); // æ => ae + $s = preg_replace( '@\x{0132}@u' , "IJ", $s ); // ? => IJ + $s = preg_replace( '@\x{0133}@u' , "ij", $s ); // ? => ij + $s = preg_replace( '@\x{0152}@u' , "OE", $s ); // Œ => OE + $s = preg_replace( '@\x{0153}@u' , "oe", $s ); // œ => oe + + $s = preg_replace( '@\x{00d0}@u' , "D", $s ); // Ð => D + $s = preg_replace( '@\x{0110}@u' , "D", $s ); // Ð => D + $s = preg_replace( '@\x{00f0}@u' , "d", $s ); // ð => d + $s = preg_replace( '@\x{0111}@u' , "d", $s ); // d => d + $s = preg_replace( '@\x{0126}@u' , "H", $s ); // H => H + $s = preg_replace( '@\x{0127}@u' , "h", $s ); // h => h + $s = preg_replace( '@\x{0131}@u' , "i", $s ); // i => i + $s = preg_replace( '@\x{0138}@u' , "k", $s ); // ? => k + $s = preg_replace( '@\x{013f}@u' , "L", $s ); // ? => L + $s = preg_replace( '@\x{0141}@u' , "L", $s ); // L => L + $s = preg_replace( '@\x{0140}@u' , "l", $s ); // ? => l + $s = preg_replace( '@\x{0142}@u' , "l", $s ); // l => l + $s = preg_replace( '@\x{014a}@u' , "N", $s ); // ? => N + $s = preg_replace( '@\x{0149}@u' , "n", $s ); // ? => n + $s = preg_replace( '@\x{014b}@u' , "n", $s ); // ? => n + $s = preg_replace( '@\x{00d8}@u' , "O", $s ); // Ø => O + $s = preg_replace( '@\x{00f8}@u' , "o", $s ); // ø => o + $s = preg_replace( '@\x{017f}@u' , "s", $s ); // ? => s + $s = preg_replace( '@\x{00de}@u' , "T", $s ); // Þ => T + $s = preg_replace( '@\x{0166}@u' , "T", $s ); // T => T + $s = preg_replace( '@\x{00fe}@u' , "t", $s ); // þ => t + $s = preg_replace( '@\x{0167}@u' , "t", $s ); // t => t + + // remove all non-ASCii characters + $s = preg_replace( '@[^\0-\x80]@u' , "", $s ); + + + // possible errors in UTF8-regular-expressions + if (empty($s)) + return $original_string; + else + return $s; +} + +function normAndUp ($a) { + return mb_strtoupper (normalizeUtf8String($a), 'UTF-8'); +} + class Link { const OPDS_THUMBNAIL_TYPE = "http://opds-spec.org/image/thumbnail"; @@ -792,29 +869,33 @@ class PageQueryResult extends Page private function searchByScope ($scope, $limit = FALSE) { $n = $this->n; $numberPerPage = NULL; + $queryNormedAndUp = $this->query; + if (useNormAndUp ()) { + $queryNormedAndUp = normAndUp ($this->query); + } if ($limit) { $n = 1; $numberPerPage = 5; } switch ($scope) { case self::SCOPE_BOOK : - $array = Book::getBooksByStartingLetter ('%' . $this->query, $n, NULL, $numberPerPage); + $array = Book::getBooksByStartingLetter ('%' . $queryNormedAndUp, $n, NULL, $numberPerPage); break; case self::SCOPE_AUTHOR : - $array = Author::getAuthorsForSearch ('%' . $this->query); + $array = Author::getAuthorsForSearch ('%' . $queryNormedAndUp); break; case self::SCOPE_SERIES : - $array = Serie::getAllSeriesByQuery ($this->query); + $array = Serie::getAllSeriesByQuery ($queryNormedAndUp); break; case self::SCOPE_TAG : - $array = Tag::getAllTagsByQuery ($this->query, $n, NULL, $numberPerPage); + $array = Tag::getAllTagsByQuery ($queryNormedAndUp, $n, NULL, $numberPerPage); break; case self::SCOPE_PUBLISHER : - $array = Publisher::getAllPublishersByQuery ($this->query); + $array = Publisher::getAllPublishersByQuery ($queryNormedAndUp); break; default: $array = Book::getBooksByQuery ( - array ("all" => "%" . $this->query . "%"), $n); + array ("all" => "%" . $queryNormedAndUp . "%"), $n); } return $array; @@ -1138,6 +1219,9 @@ abstract class Base try { if (is_readable (self::getDbFileName ($database))) { self::$db = new PDO('sqlite:'. self::getDbFileName ($database)); + if (useNormAndUp ()) { + self::$db->sqliteCreateFunction ('normAndUp', 'normAndUp', 1); + } } else { self::error (); } @@ -1167,6 +1251,11 @@ abstract class Base public static function executeQuery($query, $columns, $filter, $params, $n, $database = NULL, $numberPerPage = NULL) { $totalResult = -1; + if (useNormAndUp ()) { + $query = preg_replace("/upper/", "normAndUp", $query); + $columns = preg_replace("/upper/", "normAndUp", $columns); + } + if (is_null ($numberPerPage)) { $numberPerPage = getCurrentOption ("max_item_per_page"); } diff --git a/checkconfig.php b/checkconfig.php index 387748a..2a43d22 100644 --- a/checkconfig.php +++ b/checkconfig.php @@ -85,6 +85,42 @@ ?> +
+

Check if mbstring is properly installed and loaded

+

+ +

+
+
+

Check if intl is properly installed and loaded

+

+ +

+
+
+

Check if Normalizer class is properly installed and loaded

+

+ +

+

Check if the rendering will be done on client side or server side

diff --git a/config_default.php b/config_default.php index ee3340f..d802669 100644 --- a/config_default.php +++ b/config_default.php @@ -258,3 +258,10 @@ * BEWARE : Do not touch this if you're not using password, not using PRS-TX or not using Aldiko. */ $config ['cops_fetch_protect'] = "0"; + + /* + * Make the search better (don't care about diacritics, uppercase should work on Cyrillic) but slower. + * 1 : Yes (enable) + * 0 : No + */ + $config ['cops_normalized_search'] = "0"; diff --git a/test/baseTest.php b/test/baseTest.php index ad25761..61b26da 100644 --- a/test/baseTest.php +++ b/test/baseTest.php @@ -125,4 +125,8 @@ class BaseTest extends PHPUnit_Framework_TestCase $this->assertTrue (Base::checkDatabaseAvailability ()); } + + public function testNormalizeUtf8String () { + $this->assertEquals ("AAAAAEACEEEEIIIIOEOOOOOEUUUUEYaaaaaeaceeeeiiiioedoooooeuuuueyyuny", normalizeUtf8String ("ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏŒÒÓÔÕÖÙÚÛÜÝàáâãäåçèéêëìíîïœðòóôõöùúûüýÿñ")); + } } \ No newline at end of file diff --git a/test/pageTest.php b/test/pageTest.php index 5afa5d3..8ec0254 100644 --- a/test/pageTest.php +++ b/test/pageTest.php @@ -669,16 +669,21 @@ class PageTest extends PHPUnit_Framework_TestCase $query = "curee"; $qid = NULL; $n = "1"; + $config ['cops_normalized_search'] = "1"; + if (!useNormAndUp ()) { + $this->markTestIncomplete(); + } $currentPage = Page::getPage ($page, $qid, $query, $n); $currentPage->InitializeContent (); - $this->markTestIncomplete(); $this->assertEquals ("Search result for *curee*", $currentPage->title); $this->assertCount (1, $currentPage->entryArray); $this->assertEquals ("Search result for *curee* in books", $currentPage->entryArray [0]->title); $this->assertEquals ("1 book", $currentPage->entryArray [0]->content); $this->assertFalse ($currentPage->ContainsBook ()); + + $config ['cops_normalized_search'] = "0"; } public function testAuthorSearch_ByName ()