Initial version of whoosh based search
This commit is contained in:
parent
e22bd045f9
commit
06a5cd5aef
|
@ -95,11 +95,14 @@ CACHE_REDIS_DB = '0'
|
|||
# CACHE_TYPE = 'memcached'
|
||||
CACHE_MEMCACHED_SERVERS = ['127.0.0.1:11211']
|
||||
|
||||
SEARCH_TYPE = 'simple' # simple is not good for large wikis
|
||||
# SEARCH_TYPE = 'simple' # simple is not good for large wikis
|
||||
|
||||
# SEARCH_TYPE = 'elasticsearch'
|
||||
ELASTICSEARCH_URL = 'http://127.0.0.1:9200'
|
||||
|
||||
SEARCH_TYPE = 'whoosh'
|
||||
WHOOSH_INDEX = '/tmp/whoosh'
|
||||
|
||||
# Get ReCaptcha Keys for your domain here:
|
||||
# https://www.google.com/recaptcha/admin#whyrecaptcha
|
||||
RECAPTCHA_ENABLE = False
|
||||
|
|
|
@ -6,6 +6,10 @@ def simple(app):
|
|||
return SimpleSearch()
|
||||
|
||||
|
||||
def whoosh(app):
|
||||
return WhooshSearch(app.config['WHOOSH_INDEX'])
|
||||
|
||||
|
||||
def elasticsearch(app):
|
||||
from flask.ext.elastic import Elastic
|
||||
return ElasticSearch(Elastic(app))
|
||||
|
@ -42,6 +46,65 @@ class SimpleSearch(BaseSearch):
|
|||
pass
|
||||
|
||||
|
||||
class WhooshSearch(BaseSearch):
|
||||
def __init__(self, index_path):
|
||||
from whoosh import index as whoosh_index
|
||||
from whoosh.fields import Schema, TEXT, ID
|
||||
from whoosh import qparser
|
||||
from whoosh.highlight import UppercaseFormatter
|
||||
from whoosh.analysis import LanguageAnalyzer
|
||||
import os.path
|
||||
|
||||
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=LanguageAnalyzer("de")))
|
||||
self.formatter = UppercaseFormatter()
|
||||
|
||||
if os.path.exists(index_path):
|
||||
self.search_index = whoosh_index.open_dir(index_path)
|
||||
else:
|
||||
os.mkdir(index_path)
|
||||
self.search_index = whoosh_index.create_in(index_path, self.schema)
|
||||
|
||||
self.query_parser = qparser.QueryParser("body", schema=self.schema)
|
||||
self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
|
||||
|
||||
def index(self, index, doc_type, id_=None, body=None):
|
||||
writer = self.search_index.writer()
|
||||
writer.update_document(path=id_.decode("utf-8"), body=body["content"])
|
||||
writer.commit()
|
||||
|
||||
def index_wiki(self, name, body):
|
||||
self.index('wiki', 'page', id_=name, body=body)
|
||||
|
||||
def delete_index(self, index):
|
||||
writer = self.search_index.writer()
|
||||
writer.delete_by_term('path', index)
|
||||
writer.commit()
|
||||
|
||||
def wiki(self, query):
|
||||
if not query:
|
||||
return []
|
||||
|
||||
q = self.query_parser.parse("%s~2" % (query,))
|
||||
|
||||
with self.search_index.searcher() as s:
|
||||
results = s.search(q)
|
||||
|
||||
results.formatter = self.formatter
|
||||
|
||||
res = []
|
||||
for hit in results:
|
||||
name = hit["path"]
|
||||
page_data = g.current_wiki.get_page(name)["data"].decode("utf-8")
|
||||
content = hit.highlights('body', text=page_data)
|
||||
|
||||
res.append(dict(name=name, content=content))
|
||||
|
||||
return res
|
||||
|
||||
def users(self, query):
|
||||
pass
|
||||
|
||||
|
||||
class ElasticSearch(BaseSearch):
|
||||
def __init__(self, elastic):
|
||||
self.elastic = elastic
|
||||
|
|
Loading…
Reference in a new issue