Merge pull request #40 from ghtyrant/master
Add Whoosh as an alternate search backend
This commit is contained in:
commit
334a9be4d0
23
README.md
23
README.md
|
@ -214,8 +214,8 @@ _Don't forget to create your database._
|
||||||
## Search
|
## Search
|
||||||
|
|
||||||
Realms wiki comes with basic search capabilities but it is not recommended
|
Realms wiki comes with basic search capabilities but it is not recommended
|
||||||
for large wikis or if you require more advanced search capabilities. The only
|
for large wikis or if you require more advanced search capabilities. The
|
||||||
backend we currently support is ElasticSearch
|
backends we currently support are ElasticSearch and Whoosh.
|
||||||
|
|
||||||
### Elasticsearch Setup
|
### Elasticsearch Setup
|
||||||
|
|
||||||
|
@ -239,6 +239,25 @@ In your Realms Config, have the following options set:
|
||||||
"SEARCH_TYPE": "elasticsearch"
|
"SEARCH_TYPE": "elasticsearch"
|
||||||
"ELASTICSEARCH_URL": "http://127.0.0.1:9200"
|
"ELASTICSEARCH_URL": "http://127.0.0.1:9200"
|
||||||
|
|
||||||
|
### Whoosh Setup
|
||||||
|
|
||||||
|
Simply install Whoosh to your Python environment, e.g.
|
||||||
|
|
||||||
|
pip install Whoosh
|
||||||
|
|
||||||
|
**Configuring Whoosh**
|
||||||
|
|
||||||
|
To use Whoosh, set the following in your Realms config:
|
||||||
|
|
||||||
|
"SEARCH_TYPE": "whoosh"
|
||||||
|
"WHOOSH_INDEX": "/path/to/your/whoosh/index"
|
||||||
|
"WHOOSH_LANGUAGE": "en"
|
||||||
|
|
||||||
|
WHOOSH_INDEX has to be a path read- and writeable by Realm's user. It will be created automatically if it doesn't exist.
|
||||||
|
|
||||||
|
Whoosh is set up to use language optimization, so set WHOOSH_LANGUAGE to the language used in your wiki. For available languages, check whoosh.lang.languages.
|
||||||
|
If your language is not supported, Realms will fall back to a simple text analyzer.
|
||||||
|
|
||||||
## Running
|
## Running
|
||||||
|
|
||||||
realms-wiki start
|
realms-wiki start
|
||||||
|
|
|
@ -86,13 +86,13 @@ DB_URI = 'sqlite:////tmp/wiki.db'
|
||||||
CACHE_TYPE = 'simple'
|
CACHE_TYPE = 'simple'
|
||||||
|
|
||||||
# Redis
|
# Redis
|
||||||
#CACHE_TYPE = 'redis'
|
# CACHE_TYPE = 'redis'
|
||||||
CACHE_REDIS_HOST = '127.0.0.1'
|
CACHE_REDIS_HOST = '127.0.0.1'
|
||||||
CACHE_REDIS_PORT = 6379
|
CACHE_REDIS_PORT = 6379
|
||||||
CACHE_REDIS_DB = '0'
|
CACHE_REDIS_DB = '0'
|
||||||
|
|
||||||
# Memcached
|
# Memcached
|
||||||
#CACHE_TYPE = 'memcached'
|
# CACHE_TYPE = 'memcached'
|
||||||
CACHE_MEMCACHED_SERVERS = ['127.0.0.1:11211']
|
CACHE_MEMCACHED_SERVERS = ['127.0.0.1:11211']
|
||||||
|
|
||||||
SEARCH_TYPE = 'simple' # simple is not good for large wikis
|
SEARCH_TYPE = 'simple' # simple is not good for large wikis
|
||||||
|
@ -100,6 +100,10 @@ SEARCH_TYPE = 'simple' # simple is not good for large wikis
|
||||||
# SEARCH_TYPE = 'elasticsearch'
|
# SEARCH_TYPE = 'elasticsearch'
|
||||||
ELASTICSEARCH_URL = 'http://127.0.0.1:9200'
|
ELASTICSEARCH_URL = 'http://127.0.0.1:9200'
|
||||||
|
|
||||||
|
# SEARCH_TYPE = 'whoosh'
|
||||||
|
WHOOSH_INDEX = '/tmp/whoosh'
|
||||||
|
WHOOSH_LANGUAGE = 'en'
|
||||||
|
|
||||||
# Get ReCaptcha Keys for your domain here:
|
# Get ReCaptcha Keys for your domain here:
|
||||||
# https://www.google.com/recaptcha/admin#whyrecaptcha
|
# https://www.google.com/recaptcha/admin#whyrecaptcha
|
||||||
RECAPTCHA_ENABLE = False
|
RECAPTCHA_ENABLE = False
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
from flask import g, current_app
|
from flask import g, current_app
|
||||||
from realms.lib.util import filename_to_cname
|
from realms.lib.util import filename_to_cname
|
||||||
|
|
||||||
|
@ -6,6 +8,10 @@ def simple(app):
|
||||||
return SimpleSearch()
|
return SimpleSearch()
|
||||||
|
|
||||||
|
|
||||||
|
def whoosh(app):
|
||||||
|
return WhooshSearch(app.config['WHOOSH_INDEX'], app.config['WHOOSH_LANGUAGE'])
|
||||||
|
|
||||||
|
|
||||||
def elasticsearch(app):
|
def elasticsearch(app):
|
||||||
from flask.ext.elastic import Elastic
|
from flask.ext.elastic import Elastic
|
||||||
return ElasticSearch(Elastic(app))
|
return ElasticSearch(Elastic(app))
|
||||||
|
@ -42,6 +48,82 @@ class SimpleSearch(BaseSearch):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class WhooshSearch(BaseSearch):
|
||||||
|
def __init__(self, index_path, language):
|
||||||
|
from whoosh import index as whoosh_index
|
||||||
|
from whoosh.fields import Schema, TEXT, ID
|
||||||
|
from whoosh import qparser
|
||||||
|
from whoosh.highlight import UppercaseFormatter
|
||||||
|
from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer
|
||||||
|
from whoosh.lang import has_stemmer, has_stopwords
|
||||||
|
import os
|
||||||
|
|
||||||
|
if not has_stemmer(language) or not has_stopwords(language):
|
||||||
|
# TODO Display a warning?
|
||||||
|
analyzer = SimpleAnalyzer()
|
||||||
|
else:
|
||||||
|
analyzer = LanguageAnalyzer(language)
|
||||||
|
|
||||||
|
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer))
|
||||||
|
self.formatter = UppercaseFormatter()
|
||||||
|
|
||||||
|
self.index_path = index_path
|
||||||
|
|
||||||
|
if not os.path.exists(index_path):
|
||||||
|
try:
|
||||||
|
os.mkdir(index_path)
|
||||||
|
except OSError as e:
|
||||||
|
sys.exit("Error creating Whoosh index: %s" % e)
|
||||||
|
|
||||||
|
if whoosh_index.exists_in(index_path):
|
||||||
|
try:
|
||||||
|
self.search_index = whoosh_index.open_dir(index_path)
|
||||||
|
except whoosh_index.IndexError as e:
|
||||||
|
sys.exit("Error opening whoosh index: %s" % (e))
|
||||||
|
else:
|
||||||
|
self.search_index = whoosh_index.create_in(index_path, self.schema)
|
||||||
|
|
||||||
|
self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema)
|
||||||
|
self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
|
||||||
|
|
||||||
|
def index(self, index, doc_type, id_=None, body=None):
|
||||||
|
writer = self.search_index.writer()
|
||||||
|
writer.update_document(path=id_.decode("utf-8"), body=body["content"].decode("utf-8"))
|
||||||
|
writer.commit()
|
||||||
|
|
||||||
|
def index_wiki(self, name, body):
|
||||||
|
self.index('wiki', 'page', id_=name, body=body)
|
||||||
|
|
||||||
|
def delete_index(self, index):
|
||||||
|
from whoosh import index as whoosh_index
|
||||||
|
self.search_index.close()
|
||||||
|
self.search_index = whoosh_index.create_in(self.index_path, schema=self.schema)
|
||||||
|
|
||||||
|
def wiki(self, query):
|
||||||
|
if not query:
|
||||||
|
return []
|
||||||
|
|
||||||
|
q = self.query_parser.parse(query)
|
||||||
|
|
||||||
|
with self.search_index.searcher() as s:
|
||||||
|
results = s.search(q)
|
||||||
|
|
||||||
|
results.formatter = self.formatter
|
||||||
|
|
||||||
|
res = []
|
||||||
|
for hit in results:
|
||||||
|
name = hit["path"]
|
||||||
|
page_data = g.current_wiki.get_page(name)["data"].decode("utf-8")
|
||||||
|
content = hit.highlights('body', text=page_data)
|
||||||
|
|
||||||
|
res.append(dict(name=name, content=content))
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def users(self, query):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ElasticSearch(BaseSearch):
|
class ElasticSearch(BaseSearch):
|
||||||
def __init__(self, elastic):
|
def __init__(self, elastic):
|
||||||
self.elastic = elastic
|
self.elastic = elastic
|
||||||
|
|
Loading…
Reference in a new issue