Add fallback for non-supported languages & search in path and text

This commit is contained in:
Fabian Schlager 2014-11-21 15:40:40 +01:00
parent 06a5cd5aef
commit db1f5c84a7
2 changed files with 18 additions and 9 deletions

View file

@ -102,6 +102,7 @@ ELASTICSEARCH_URL = 'http://127.0.0.1:9200'
SEARCH_TYPE = 'whoosh' SEARCH_TYPE = 'whoosh'
WHOOSH_INDEX = '/tmp/whoosh' WHOOSH_INDEX = '/tmp/whoosh'
WHOOSH_LANGUAGE = 'en'
# Get ReCaptcha Keys for your domain here: # Get ReCaptcha Keys for your domain here:
# https://www.google.com/recaptcha/admin#whyrecaptcha # https://www.google.com/recaptcha/admin#whyrecaptcha

View file

@ -7,7 +7,7 @@ def simple(app):
def whoosh(app): def whoosh(app):
return WhooshSearch(app.config['WHOOSH_INDEX']) return WhooshSearch(app.config['WHOOSH_INDEX'], app.config['WHOOSH_LANGUAGE'])
def elasticsearch(app): def elasticsearch(app):
@ -47,38 +47,46 @@ class SimpleSearch(BaseSearch):
class WhooshSearch(BaseSearch): class WhooshSearch(BaseSearch):
def __init__(self, index_path): def __init__(self, index_path, language):
from whoosh import index as whoosh_index from whoosh import index as whoosh_index
from whoosh.fields import Schema, TEXT, ID from whoosh.fields import Schema, TEXT, ID
from whoosh import qparser from whoosh import qparser
from whoosh.highlight import UppercaseFormatter from whoosh.highlight import UppercaseFormatter
from whoosh.analysis import LanguageAnalyzer from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer
from whoosh.lang import has_stemmer, has_stopwords
import os.path import os.path
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=LanguageAnalyzer("de"))) if not has_stemmer(language) or not has_stopwords(language):
print("Language '%s' not supported by Whoosh, falling back to default analyzer." % (language))
analyzer = SimpleAnalyzer()
else:
analyzer = LanguageAnalyzer(language)
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer))
self.formatter = UppercaseFormatter() self.formatter = UppercaseFormatter()
self.index_path = index_path
if os.path.exists(index_path): if os.path.exists(index_path):
self.search_index = whoosh_index.open_dir(index_path) self.search_index = whoosh_index.open_dir(index_path)
else: else:
os.mkdir(index_path) os.mkdir(index_path)
self.search_index = whoosh_index.create_in(index_path, self.schema) self.search_index = whoosh_index.create_in(index_path, self.schema)
self.query_parser = qparser.QueryParser("body", schema=self.schema) self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema)
self.query_parser.add_plugin(qparser.FuzzyTermPlugin()) self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
def index(self, index, doc_type, id_=None, body=None): def index(self, index, doc_type, id_=None, body=None):
writer = self.search_index.writer() writer = self.search_index.writer()
writer.update_document(path=id_.decode("utf-8"), body=body["content"]) writer.update_document(path=id_.decode("utf-8"), body=body["content"].decode("utf-8"))
writer.commit() writer.commit()
def index_wiki(self, name, body): def index_wiki(self, name, body):
self.index('wiki', 'page', id_=name, body=body) self.index('wiki', 'page', id_=name, body=body)
def delete_index(self, index): def delete_index(self, index):
writer = self.search_index.writer() from whoosh import index as whoosh_index
writer.delete_by_term('path', index) self.search_index.close()
writer.commit() self.search_index = whoosh_index.create_in(self.index_path, schema=self.schema)
def wiki(self, query): def wiki(self, query):
if not query: if not query: