Allow normalized search (without diacritics) it should also fix the case problem with non latin languages (cyrillic).
The last part is not tested. disclaimer : Slow ! re #49, #48
This commit is contained in:
parent
23fcc4d641
commit
78d42b48e2
101
base.php
101
base.php
|
@ -256,6 +256,83 @@ function addURLParameter($urlParams, $paramName, $paramValue) {
|
||||||
return $start . http_build_query($params);
|
return $start . http_build_query($params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function useNormAndUp () {
|
||||||
|
global $config;
|
||||||
|
return extension_loaded('mbstring') &&
|
||||||
|
extension_loaded('intl') &&
|
||||||
|
class_exists("Normalizer", $autoload = false) &&
|
||||||
|
$config ['cops_normalized_search'] == "1";
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeUtf8String( $s)
|
||||||
|
{
|
||||||
|
$original_string = $s;
|
||||||
|
|
||||||
|
// maps German (umlauts) and other European characters onto two characters before just removing diacritics
|
||||||
|
$s = preg_replace( '@\x{00c4}@u' , "AE", $s ); // umlaut Ä => AE
|
||||||
|
$s = preg_replace( '@\x{00d6}@u' , "OE", $s ); // umlaut Ö => OE
|
||||||
|
$s = preg_replace( '@\x{00dc}@u' , "UE", $s ); // umlaut Ü => UE
|
||||||
|
$s = preg_replace( '@\x{00e4}@u' , "ae", $s ); // umlaut ä => ae
|
||||||
|
$s = preg_replace( '@\x{00f6}@u' , "oe", $s ); // umlaut ö => oe
|
||||||
|
$s = preg_replace( '@\x{00fc}@u' , "ue", $s ); // umlaut ü => ue
|
||||||
|
$s = preg_replace( '@\x{00f1}@u' , "ny", $s ); // ñ => ny
|
||||||
|
$s = preg_replace( '@\x{00ff}@u' , "yu", $s ); // ÿ => yu
|
||||||
|
|
||||||
|
|
||||||
|
// maps special characters (characters with diacritics) on their base-character followed by the diacritical mark
|
||||||
|
// exmaple: Ú => U´, á => a`
|
||||||
|
$s = Normalizer::normalize( $s, Normalizer::FORM_D );
|
||||||
|
|
||||||
|
|
||||||
|
$s = preg_replace( '@\pM@u' , "", $s ); // removes diacritics
|
||||||
|
|
||||||
|
|
||||||
|
$s = preg_replace( '@\x{00df}@u' , "ss", $s ); // maps German ß onto ss
|
||||||
|
$s = preg_replace( '@\x{00c6}@u' , "AE", $s ); // Æ => AE
|
||||||
|
$s = preg_replace( '@\x{00e6}@u' , "ae", $s ); // æ => ae
|
||||||
|
$s = preg_replace( '@\x{0132}@u' , "IJ", $s ); // ? => IJ
|
||||||
|
$s = preg_replace( '@\x{0133}@u' , "ij", $s ); // ? => ij
|
||||||
|
$s = preg_replace( '@\x{0152}@u' , "OE", $s ); // Œ => OE
|
||||||
|
$s = preg_replace( '@\x{0153}@u' , "oe", $s ); // œ => oe
|
||||||
|
|
||||||
|
$s = preg_replace( '@\x{00d0}@u' , "D", $s ); // Ð => D
|
||||||
|
$s = preg_replace( '@\x{0110}@u' , "D", $s ); // Ð => D
|
||||||
|
$s = preg_replace( '@\x{00f0}@u' , "d", $s ); // ð => d
|
||||||
|
$s = preg_replace( '@\x{0111}@u' , "d", $s ); // d => d
|
||||||
|
$s = preg_replace( '@\x{0126}@u' , "H", $s ); // H => H
|
||||||
|
$s = preg_replace( '@\x{0127}@u' , "h", $s ); // h => h
|
||||||
|
$s = preg_replace( '@\x{0131}@u' , "i", $s ); // i => i
|
||||||
|
$s = preg_replace( '@\x{0138}@u' , "k", $s ); // ? => k
|
||||||
|
$s = preg_replace( '@\x{013f}@u' , "L", $s ); // ? => L
|
||||||
|
$s = preg_replace( '@\x{0141}@u' , "L", $s ); // L => L
|
||||||
|
$s = preg_replace( '@\x{0140}@u' , "l", $s ); // ? => l
|
||||||
|
$s = preg_replace( '@\x{0142}@u' , "l", $s ); // l => l
|
||||||
|
$s = preg_replace( '@\x{014a}@u' , "N", $s ); // ? => N
|
||||||
|
$s = preg_replace( '@\x{0149}@u' , "n", $s ); // ? => n
|
||||||
|
$s = preg_replace( '@\x{014b}@u' , "n", $s ); // ? => n
|
||||||
|
$s = preg_replace( '@\x{00d8}@u' , "O", $s ); // Ø => O
|
||||||
|
$s = preg_replace( '@\x{00f8}@u' , "o", $s ); // ø => o
|
||||||
|
$s = preg_replace( '@\x{017f}@u' , "s", $s ); // ? => s
|
||||||
|
$s = preg_replace( '@\x{00de}@u' , "T", $s ); // Þ => T
|
||||||
|
$s = preg_replace( '@\x{0166}@u' , "T", $s ); // T => T
|
||||||
|
$s = preg_replace( '@\x{00fe}@u' , "t", $s ); // þ => t
|
||||||
|
$s = preg_replace( '@\x{0167}@u' , "t", $s ); // t => t
|
||||||
|
|
||||||
|
// remove all non-ASCii characters
|
||||||
|
$s = preg_replace( '@[^\0-\x80]@u' , "", $s );
|
||||||
|
|
||||||
|
|
||||||
|
// possible errors in UTF8-regular-expressions
|
||||||
|
if (empty($s))
|
||||||
|
return $original_string;
|
||||||
|
else
|
||||||
|
return $s;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normAndUp ($a) {
|
||||||
|
return mb_strtoupper (normalizeUtf8String($a), 'UTF-8');
|
||||||
|
}
|
||||||
|
|
||||||
class Link
|
class Link
|
||||||
{
|
{
|
||||||
const OPDS_THUMBNAIL_TYPE = "http://opds-spec.org/image/thumbnail";
|
const OPDS_THUMBNAIL_TYPE = "http://opds-spec.org/image/thumbnail";
|
||||||
|
@ -792,29 +869,33 @@ class PageQueryResult extends Page
|
||||||
private function searchByScope ($scope, $limit = FALSE) {
|
private function searchByScope ($scope, $limit = FALSE) {
|
||||||
$n = $this->n;
|
$n = $this->n;
|
||||||
$numberPerPage = NULL;
|
$numberPerPage = NULL;
|
||||||
|
$queryNormedAndUp = $this->query;
|
||||||
|
if (useNormAndUp ()) {
|
||||||
|
$queryNormedAndUp = normAndUp ($this->query);
|
||||||
|
}
|
||||||
if ($limit) {
|
if ($limit) {
|
||||||
$n = 1;
|
$n = 1;
|
||||||
$numberPerPage = 5;
|
$numberPerPage = 5;
|
||||||
}
|
}
|
||||||
switch ($scope) {
|
switch ($scope) {
|
||||||
case self::SCOPE_BOOK :
|
case self::SCOPE_BOOK :
|
||||||
$array = Book::getBooksByStartingLetter ('%' . $this->query, $n, NULL, $numberPerPage);
|
$array = Book::getBooksByStartingLetter ('%' . $queryNormedAndUp, $n, NULL, $numberPerPage);
|
||||||
break;
|
break;
|
||||||
case self::SCOPE_AUTHOR :
|
case self::SCOPE_AUTHOR :
|
||||||
$array = Author::getAuthorsForSearch ('%' . $this->query);
|
$array = Author::getAuthorsForSearch ('%' . $queryNormedAndUp);
|
||||||
break;
|
break;
|
||||||
case self::SCOPE_SERIES :
|
case self::SCOPE_SERIES :
|
||||||
$array = Serie::getAllSeriesByQuery ($this->query);
|
$array = Serie::getAllSeriesByQuery ($queryNormedAndUp);
|
||||||
break;
|
break;
|
||||||
case self::SCOPE_TAG :
|
case self::SCOPE_TAG :
|
||||||
$array = Tag::getAllTagsByQuery ($this->query, $n, NULL, $numberPerPage);
|
$array = Tag::getAllTagsByQuery ($queryNormedAndUp, $n, NULL, $numberPerPage);
|
||||||
break;
|
break;
|
||||||
case self::SCOPE_PUBLISHER :
|
case self::SCOPE_PUBLISHER :
|
||||||
$array = Publisher::getAllPublishersByQuery ($this->query);
|
$array = Publisher::getAllPublishersByQuery ($queryNormedAndUp);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
$array = Book::getBooksByQuery (
|
$array = Book::getBooksByQuery (
|
||||||
array ("all" => "%" . $this->query . "%"), $n);
|
array ("all" => "%" . $queryNormedAndUp . "%"), $n);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $array;
|
return $array;
|
||||||
|
@ -1138,6 +1219,9 @@ abstract class Base
|
||||||
try {
|
try {
|
||||||
if (is_readable (self::getDbFileName ($database))) {
|
if (is_readable (self::getDbFileName ($database))) {
|
||||||
self::$db = new PDO('sqlite:'. self::getDbFileName ($database));
|
self::$db = new PDO('sqlite:'. self::getDbFileName ($database));
|
||||||
|
if (useNormAndUp ()) {
|
||||||
|
self::$db->sqliteCreateFunction ('normAndUp', 'normAndUp', 1);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
self::error ();
|
self::error ();
|
||||||
}
|
}
|
||||||
|
@ -1167,6 +1251,11 @@ abstract class Base
|
||||||
public static function executeQuery($query, $columns, $filter, $params, $n, $database = NULL, $numberPerPage = NULL) {
|
public static function executeQuery($query, $columns, $filter, $params, $n, $database = NULL, $numberPerPage = NULL) {
|
||||||
$totalResult = -1;
|
$totalResult = -1;
|
||||||
|
|
||||||
|
if (useNormAndUp ()) {
|
||||||
|
$query = preg_replace("/upper/", "normAndUp", $query);
|
||||||
|
$columns = preg_replace("/upper/", "normAndUp", $columns);
|
||||||
|
}
|
||||||
|
|
||||||
if (is_null ($numberPerPage)) {
|
if (is_null ($numberPerPage)) {
|
||||||
$numberPerPage = getCurrentOption ("max_item_per_page");
|
$numberPerPage = getCurrentOption ("max_item_per_page");
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,6 +85,42 @@
|
||||||
?>
|
?>
|
||||||
</h4>
|
</h4>
|
||||||
</article>
|
</article>
|
||||||
|
<article class="frontpage">
|
||||||
|
<h2>Check if mbstring is properly installed and loaded</h2>
|
||||||
|
<h4>
|
||||||
|
<?php
|
||||||
|
if (extension_loaded('mbstring')) {
|
||||||
|
echo "OK";
|
||||||
|
} else {
|
||||||
|
echo "Please install the php5-mbstring extension and make sure it's enabled";
|
||||||
|
}
|
||||||
|
?>
|
||||||
|
</h4>
|
||||||
|
</article>
|
||||||
|
<article class="frontpage">
|
||||||
|
<h2>Check if intl is properly installed and loaded</h2>
|
||||||
|
<h4>
|
||||||
|
<?php
|
||||||
|
if (extension_loaded('intl')) {
|
||||||
|
echo "OK";
|
||||||
|
} else {
|
||||||
|
echo "Please install the php5-intl extension and make sure it's enabled";
|
||||||
|
}
|
||||||
|
?>
|
||||||
|
</h4>
|
||||||
|
</article>
|
||||||
|
<article class="frontpage">
|
||||||
|
<h2>Check if Normalizer class is properly installed and loaded</h2>
|
||||||
|
<h4>
|
||||||
|
<?php
|
||||||
|
if (class_exists("Normalizer", $autoload = false)) {
|
||||||
|
echo "OK";
|
||||||
|
} else {
|
||||||
|
echo "Please make sure intl is enabled in your php.ini";
|
||||||
|
}
|
||||||
|
?>
|
||||||
|
</h4>
|
||||||
|
</article>
|
||||||
<article class="frontpage">
|
<article class="frontpage">
|
||||||
<h2>Check if the rendering will be done on client side or server side</h2>
|
<h2>Check if the rendering will be done on client side or server side</h2>
|
||||||
<h4>
|
<h4>
|
||||||
|
|
|
@ -258,3 +258,10 @@
|
||||||
* BEWARE : Do not touch this if you're not using password, not using PRS-TX or not using Aldiko.
|
* BEWARE : Do not touch this if you're not using password, not using PRS-TX or not using Aldiko.
|
||||||
*/
|
*/
|
||||||
$config ['cops_fetch_protect'] = "0";
|
$config ['cops_fetch_protect'] = "0";
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make the search better (don't care about diacritics, uppercase should work on Cyrillic) but slower.
|
||||||
|
* 1 : Yes (enable)
|
||||||
|
* 0 : No
|
||||||
|
*/
|
||||||
|
$config ['cops_normalized_search'] = "0";
|
||||||
|
|
|
@ -125,4 +125,8 @@ class BaseTest extends PHPUnit_Framework_TestCase
|
||||||
|
|
||||||
$this->assertTrue (Base::checkDatabaseAvailability ());
|
$this->assertTrue (Base::checkDatabaseAvailability ());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testNormalizeUtf8String () {
|
||||||
|
$this->assertEquals ("AAAAAEACEEEEIIIIOEOOOOOEUUUUEYaaaaaeaceeeeiiiioedoooooeuuuueyyuny", normalizeUtf8String ("ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏŒÒÓÔÕÖÙÚÛÜÝàáâãäåçèéêëìíîïœðòóôõöùúûüýÿñ"));
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -669,16 +669,21 @@ class PageTest extends PHPUnit_Framework_TestCase
|
||||||
$query = "curee";
|
$query = "curee";
|
||||||
$qid = NULL;
|
$qid = NULL;
|
||||||
$n = "1";
|
$n = "1";
|
||||||
|
$config ['cops_normalized_search'] = "1";
|
||||||
|
if (!useNormAndUp ()) {
|
||||||
|
$this->markTestIncomplete();
|
||||||
|
}
|
||||||
|
|
||||||
$currentPage = Page::getPage ($page, $qid, $query, $n);
|
$currentPage = Page::getPage ($page, $qid, $query, $n);
|
||||||
$currentPage->InitializeContent ();
|
$currentPage->InitializeContent ();
|
||||||
|
|
||||||
$this->markTestIncomplete();
|
|
||||||
$this->assertEquals ("Search result for *curee*", $currentPage->title);
|
$this->assertEquals ("Search result for *curee*", $currentPage->title);
|
||||||
$this->assertCount (1, $currentPage->entryArray);
|
$this->assertCount (1, $currentPage->entryArray);
|
||||||
$this->assertEquals ("Search result for *curee* in books", $currentPage->entryArray [0]->title);
|
$this->assertEquals ("Search result for *curee* in books", $currentPage->entryArray [0]->title);
|
||||||
$this->assertEquals ("1 book", $currentPage->entryArray [0]->content);
|
$this->assertEquals ("1 book", $currentPage->entryArray [0]->content);
|
||||||
$this->assertFalse ($currentPage->ContainsBook ());
|
$this->assertFalse ($currentPage->ContainsBook ());
|
||||||
|
|
||||||
|
$config ['cops_normalized_search'] = "0";
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testAuthorSearch_ByName ()
|
public function testAuthorSearch_ByName ()
|
||||||
|
|
Loading…
Reference in a new issue