Initial version of whoosh based search

This commit is contained in:
Fabian Schlager 2014-11-13 22:07:14 +01:00
parent e22bd045f9
commit 06a5cd5aef
2 changed files with 69 additions and 3 deletions

View file

@ -86,20 +86,23 @@ DB_URI = 'sqlite:////tmp/wiki.db'
CACHE_TYPE = 'simple' CACHE_TYPE = 'simple'
# Redis # Redis
#CACHE_TYPE = 'redis' # CACHE_TYPE = 'redis'
CACHE_REDIS_HOST = '127.0.0.1' CACHE_REDIS_HOST = '127.0.0.1'
CACHE_REDIS_PORT = 6379 CACHE_REDIS_PORT = 6379
CACHE_REDIS_DB = '0' CACHE_REDIS_DB = '0'
# Memcached # Memcached
#CACHE_TYPE = 'memcached' # CACHE_TYPE = 'memcached'
CACHE_MEMCACHED_SERVERS = ['127.0.0.1:11211'] CACHE_MEMCACHED_SERVERS = ['127.0.0.1:11211']
SEARCH_TYPE = 'simple' # simple is not good for large wikis # SEARCH_TYPE = 'simple' # simple is not good for large wikis
# SEARCH_TYPE = 'elasticsearch' # SEARCH_TYPE = 'elasticsearch'
ELASTICSEARCH_URL = 'http://127.0.0.1:9200' ELASTICSEARCH_URL = 'http://127.0.0.1:9200'
SEARCH_TYPE = 'whoosh'
WHOOSH_INDEX = '/tmp/whoosh'
# Get ReCaptcha Keys for your domain here: # Get ReCaptcha Keys for your domain here:
# https://www.google.com/recaptcha/admin#whyrecaptcha # https://www.google.com/recaptcha/admin#whyrecaptcha
RECAPTCHA_ENABLE = False RECAPTCHA_ENABLE = False

View file

@ -6,6 +6,10 @@ def simple(app):
return SimpleSearch() return SimpleSearch()
def whoosh(app):
return WhooshSearch(app.config['WHOOSH_INDEX'])
def elasticsearch(app): def elasticsearch(app):
from flask.ext.elastic import Elastic from flask.ext.elastic import Elastic
return ElasticSearch(Elastic(app)) return ElasticSearch(Elastic(app))
@ -42,6 +46,65 @@ class SimpleSearch(BaseSearch):
pass pass
class WhooshSearch(BaseSearch):
def __init__(self, index_path):
from whoosh import index as whoosh_index
from whoosh.fields import Schema, TEXT, ID
from whoosh import qparser
from whoosh.highlight import UppercaseFormatter
from whoosh.analysis import LanguageAnalyzer
import os.path
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=LanguageAnalyzer("de")))
self.formatter = UppercaseFormatter()
if os.path.exists(index_path):
self.search_index = whoosh_index.open_dir(index_path)
else:
os.mkdir(index_path)
self.search_index = whoosh_index.create_in(index_path, self.schema)
self.query_parser = qparser.QueryParser("body", schema=self.schema)
self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
def index(self, index, doc_type, id_=None, body=None):
writer = self.search_index.writer()
writer.update_document(path=id_.decode("utf-8"), body=body["content"])
writer.commit()
def index_wiki(self, name, body):
self.index('wiki', 'page', id_=name, body=body)
def delete_index(self, index):
writer = self.search_index.writer()
writer.delete_by_term('path', index)
writer.commit()
def wiki(self, query):
if not query:
return []
q = self.query_parser.parse("%s~2" % (query,))
with self.search_index.searcher() as s:
results = s.search(q)
results.formatter = self.formatter
res = []
for hit in results:
name = hit["path"]
page_data = g.current_wiki.get_page(name)["data"].decode("utf-8")
content = hit.highlights('body', text=page_data)
res.append(dict(name=name, content=content))
return res
def users(self, query):
pass
class ElasticSearch(BaseSearch): class ElasticSearch(BaseSearch):
def __init__(self, elastic): def __init__(self, elastic):
self.elastic = elastic self.elastic = elastic