Add fallback for non-supported languages & search in path and text
This commit is contained in:
parent
06a5cd5aef
commit
db1f5c84a7
|
@ -102,6 +102,7 @@ ELASTICSEARCH_URL = 'http://127.0.0.1:9200'
|
||||||
|
|
||||||
SEARCH_TYPE = 'whoosh'
|
SEARCH_TYPE = 'whoosh'
|
||||||
WHOOSH_INDEX = '/tmp/whoosh'
|
WHOOSH_INDEX = '/tmp/whoosh'
|
||||||
|
WHOOSH_LANGUAGE = 'en'
|
||||||
|
|
||||||
# Get ReCaptcha Keys for your domain here:
|
# Get ReCaptcha Keys for your domain here:
|
||||||
# https://www.google.com/recaptcha/admin#whyrecaptcha
|
# https://www.google.com/recaptcha/admin#whyrecaptcha
|
||||||
|
|
|
@ -7,7 +7,7 @@ def simple(app):
|
||||||
|
|
||||||
|
|
||||||
def whoosh(app):
|
def whoosh(app):
|
||||||
return WhooshSearch(app.config['WHOOSH_INDEX'])
|
return WhooshSearch(app.config['WHOOSH_INDEX'], app.config['WHOOSH_LANGUAGE'])
|
||||||
|
|
||||||
|
|
||||||
def elasticsearch(app):
|
def elasticsearch(app):
|
||||||
|
@ -47,38 +47,46 @@ class SimpleSearch(BaseSearch):
|
||||||
|
|
||||||
|
|
||||||
class WhooshSearch(BaseSearch):
|
class WhooshSearch(BaseSearch):
|
||||||
def __init__(self, index_path):
|
def __init__(self, index_path, language):
|
||||||
from whoosh import index as whoosh_index
|
from whoosh import index as whoosh_index
|
||||||
from whoosh.fields import Schema, TEXT, ID
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
from whoosh import qparser
|
from whoosh import qparser
|
||||||
from whoosh.highlight import UppercaseFormatter
|
from whoosh.highlight import UppercaseFormatter
|
||||||
from whoosh.analysis import LanguageAnalyzer
|
from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer
|
||||||
|
from whoosh.lang import has_stemmer, has_stopwords
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=LanguageAnalyzer("de")))
|
if not has_stemmer(language) or not has_stopwords(language):
|
||||||
|
print("Language '%s' not supported by Whoosh, falling back to default analyzer." % (language))
|
||||||
|
analyzer = SimpleAnalyzer()
|
||||||
|
else:
|
||||||
|
analyzer = LanguageAnalyzer(language)
|
||||||
|
|
||||||
|
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer))
|
||||||
self.formatter = UppercaseFormatter()
|
self.formatter = UppercaseFormatter()
|
||||||
|
|
||||||
|
self.index_path = index_path
|
||||||
if os.path.exists(index_path):
|
if os.path.exists(index_path):
|
||||||
self.search_index = whoosh_index.open_dir(index_path)
|
self.search_index = whoosh_index.open_dir(index_path)
|
||||||
else:
|
else:
|
||||||
os.mkdir(index_path)
|
os.mkdir(index_path)
|
||||||
self.search_index = whoosh_index.create_in(index_path, self.schema)
|
self.search_index = whoosh_index.create_in(index_path, self.schema)
|
||||||
|
|
||||||
self.query_parser = qparser.QueryParser("body", schema=self.schema)
|
self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema)
|
||||||
self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
|
self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
|
||||||
|
|
||||||
def index(self, index, doc_type, id_=None, body=None):
|
def index(self, index, doc_type, id_=None, body=None):
|
||||||
writer = self.search_index.writer()
|
writer = self.search_index.writer()
|
||||||
writer.update_document(path=id_.decode("utf-8"), body=body["content"])
|
writer.update_document(path=id_.decode("utf-8"), body=body["content"].decode("utf-8"))
|
||||||
writer.commit()
|
writer.commit()
|
||||||
|
|
||||||
def index_wiki(self, name, body):
|
def index_wiki(self, name, body):
|
||||||
self.index('wiki', 'page', id_=name, body=body)
|
self.index('wiki', 'page', id_=name, body=body)
|
||||||
|
|
||||||
def delete_index(self, index):
|
def delete_index(self, index):
|
||||||
writer = self.search_index.writer()
|
from whoosh import index as whoosh_index
|
||||||
writer.delete_by_term('path', index)
|
self.search_index.close()
|
||||||
writer.commit()
|
self.search_index = whoosh_index.create_in(self.index_path, schema=self.schema)
|
||||||
|
|
||||||
def wiki(self, query):
|
def wiki(self, query):
|
||||||
if not query:
|
if not query:
|
||||||
|
|
Loading…
Reference in a new issue