diff --git a/realms/config/__init__.py b/realms/config/__init__.py index d686686..aae052a 100644 --- a/realms/config/__init__.py +++ b/realms/config/__init__.py @@ -86,20 +86,23 @@ DB_URI = 'sqlite:////tmp/wiki.db' CACHE_TYPE = 'simple' # Redis -#CACHE_TYPE = 'redis' +# CACHE_TYPE = 'redis' CACHE_REDIS_HOST = '127.0.0.1' CACHE_REDIS_PORT = 6379 CACHE_REDIS_DB = '0' # Memcached -#CACHE_TYPE = 'memcached' +# CACHE_TYPE = 'memcached' CACHE_MEMCACHED_SERVERS = ['127.0.0.1:11211'] -SEARCH_TYPE = 'simple' # simple is not good for large wikis +# SEARCH_TYPE = 'simple' # simple is not good for large wikis # SEARCH_TYPE = 'elasticsearch' ELASTICSEARCH_URL = 'http://127.0.0.1:9200' +SEARCH_TYPE = 'whoosh' +WHOOSH_INDEX = '/tmp/whoosh' + # Get ReCaptcha Keys for your domain here: # https://www.google.com/recaptcha/admin#whyrecaptcha RECAPTCHA_ENABLE = False diff --git a/realms/modules/search/models.py b/realms/modules/search/models.py index a9f019c..87dbedc 100644 --- a/realms/modules/search/models.py +++ b/realms/modules/search/models.py @@ -6,6 +6,10 @@ def simple(app): return SimpleSearch() +def whoosh(app): + return WhooshSearch(app.config['WHOOSH_INDEX']) + + def elasticsearch(app): from flask.ext.elastic import Elastic return ElasticSearch(Elastic(app)) @@ -42,6 +46,65 @@ class SimpleSearch(BaseSearch): pass +class WhooshSearch(BaseSearch): + def __init__(self, index_path): + from whoosh import index as whoosh_index + from whoosh.fields import Schema, TEXT, ID + from whoosh import qparser + from whoosh.highlight import UppercaseFormatter + from whoosh.analysis import LanguageAnalyzer + import os.path + + self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=LanguageAnalyzer("de"))) + self.formatter = UppercaseFormatter() + + if os.path.exists(index_path): + self.search_index = whoosh_index.open_dir(index_path) + else: + os.mkdir(index_path) + self.search_index = whoosh_index.create_in(index_path, self.schema) + + self.query_parser = qparser.QueryParser("body", schema=self.schema) + self.query_parser.add_plugin(qparser.FuzzyTermPlugin()) + + def index(self, index, doc_type, id_=None, body=None): + writer = self.search_index.writer() + writer.update_document(path=id_.decode("utf-8"), body=body["content"]) + writer.commit() + + def index_wiki(self, name, body): + self.index('wiki', 'page', id_=name, body=body) + + def delete_index(self, index): + writer = self.search_index.writer() + writer.delete_by_term('path', index) + writer.commit() + + def wiki(self, query): + if not query: + return [] + + q = self.query_parser.parse("%s~2" % (query,)) + + with self.search_index.searcher() as s: + results = s.search(q) + + results.formatter = self.formatter + + res = [] + for hit in results: + name = hit["path"] + page_data = g.current_wiki.get_page(name)["data"].decode("utf-8") + content = hit.highlights('body', text=page_data) + + res.append(dict(name=name, content=content)) + + return res + + def users(self, query): + pass + + class ElasticSearch(BaseSearch): def __init__(self, elastic): self.elastic = elastic