2014-11-21 18:11:27 +02:00
|
|
|
import sys
|
|
|
|
|
2014-11-13 01:06:56 +02:00
|
|
|
from flask import g, current_app
|
|
|
|
from realms.lib.util import filename_to_cname
|
2014-11-10 18:54:46 +02:00
|
|
|
|
|
|
|
|
2014-11-13 01:06:56 +02:00
|
|
|
def simple(app):
|
|
|
|
return SimpleSearch()
|
2014-11-10 18:54:46 +02:00
|
|
|
|
|
|
|
|
2014-11-13 23:07:14 +02:00
|
|
|
def whoosh(app):
|
2014-11-21 16:40:40 +02:00
|
|
|
return WhooshSearch(app.config['WHOOSH_INDEX'], app.config['WHOOSH_LANGUAGE'])
|
2014-11-13 23:07:14 +02:00
|
|
|
|
|
|
|
|
2014-11-13 01:06:56 +02:00
|
|
|
def elasticsearch(app):
|
|
|
|
from flask.ext.elastic import Elastic
|
2015-07-24 15:29:24 +03:00
|
|
|
fields = app.config.get('ELASTICSEARCH_FIELDS')
|
|
|
|
return ElasticSearch(Elastic(app), fields)
|
2014-11-13 01:06:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
class Search(object):
|
|
|
|
def __init__(self, app=None):
|
|
|
|
if app is not None:
|
|
|
|
self.init_app(app)
|
|
|
|
|
|
|
|
def init_app(self, app):
|
|
|
|
search_obj = globals()[app.config['SEARCH_TYPE']]
|
|
|
|
app.extensions['search'] = search_obj(app)
|
|
|
|
|
|
|
|
def __getattr__(self, item):
|
|
|
|
return getattr(current_app.extensions['search'], item)
|
|
|
|
|
|
|
|
|
|
|
|
class BaseSearch():
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class SimpleSearch(BaseSearch):
|
|
|
|
def wiki(self, query):
|
|
|
|
res = []
|
|
|
|
for entry in g.current_wiki.get_index():
|
|
|
|
name = filename_to_cname(entry['name'])
|
2014-11-13 06:11:13 +02:00
|
|
|
if set(query.split()).intersection(name.split('-')):
|
2014-11-13 01:06:56 +02:00
|
|
|
page = g.current_wiki.get_page(name)
|
|
|
|
res.append(dict(name=name, content=page['data']))
|
|
|
|
return res
|
|
|
|
|
|
|
|
def users(self, query):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2014-11-13 23:07:14 +02:00
|
|
|
class WhooshSearch(BaseSearch):
|
2014-11-21 16:40:40 +02:00
|
|
|
def __init__(self, index_path, language):
|
2014-11-13 23:07:14 +02:00
|
|
|
from whoosh import index as whoosh_index
|
|
|
|
from whoosh.fields import Schema, TEXT, ID
|
|
|
|
from whoosh import qparser
|
|
|
|
from whoosh.highlight import UppercaseFormatter
|
2014-11-21 16:40:40 +02:00
|
|
|
from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer
|
|
|
|
from whoosh.lang import has_stemmer, has_stopwords
|
2014-11-21 18:11:27 +02:00
|
|
|
import os
|
2014-11-13 23:07:14 +02:00
|
|
|
|
2014-11-21 16:40:40 +02:00
|
|
|
if not has_stemmer(language) or not has_stopwords(language):
|
2014-11-21 17:30:13 +02:00
|
|
|
# TODO Display a warning?
|
2014-11-21 16:40:40 +02:00
|
|
|
analyzer = SimpleAnalyzer()
|
|
|
|
else:
|
|
|
|
analyzer = LanguageAnalyzer(language)
|
|
|
|
|
|
|
|
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer))
|
2014-11-13 23:07:14 +02:00
|
|
|
self.formatter = UppercaseFormatter()
|
|
|
|
|
2014-11-21 16:40:40 +02:00
|
|
|
self.index_path = index_path
|
2014-11-21 18:11:27 +02:00
|
|
|
|
|
|
|
if not os.path.exists(index_path):
|
|
|
|
try:
|
|
|
|
os.mkdir(index_path)
|
|
|
|
except OSError as e:
|
|
|
|
sys.exit("Error creating Whoosh index: %s" % e)
|
|
|
|
|
|
|
|
if whoosh_index.exists_in(index_path):
|
|
|
|
try:
|
|
|
|
self.search_index = whoosh_index.open_dir(index_path)
|
|
|
|
except whoosh_index.IndexError as e:
|
|
|
|
sys.exit("Error opening whoosh index: %s" % (e))
|
2014-11-13 23:07:14 +02:00
|
|
|
else:
|
|
|
|
self.search_index = whoosh_index.create_in(index_path, self.schema)
|
|
|
|
|
2014-11-21 16:40:40 +02:00
|
|
|
self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema)
|
2014-11-13 23:07:14 +02:00
|
|
|
self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
|
|
|
|
|
|
|
|
def index(self, index, doc_type, id_=None, body=None):
|
|
|
|
writer = self.search_index.writer()
|
2014-11-21 16:40:40 +02:00
|
|
|
writer.update_document(path=id_.decode("utf-8"), body=body["content"].decode("utf-8"))
|
2014-11-13 23:07:14 +02:00
|
|
|
writer.commit()
|
|
|
|
|
|
|
|
def index_wiki(self, name, body):
|
|
|
|
self.index('wiki', 'page', id_=name, body=body)
|
|
|
|
|
|
|
|
def delete_index(self, index):
|
2014-11-21 16:40:40 +02:00
|
|
|
from whoosh import index as whoosh_index
|
|
|
|
self.search_index.close()
|
|
|
|
self.search_index = whoosh_index.create_in(self.index_path, schema=self.schema)
|
2014-11-13 23:07:14 +02:00
|
|
|
|
|
|
|
def wiki(self, query):
|
|
|
|
if not query:
|
|
|
|
return []
|
|
|
|
|
2014-11-21 17:45:20 +02:00
|
|
|
q = self.query_parser.parse(query)
|
2014-11-13 23:07:14 +02:00
|
|
|
|
|
|
|
with self.search_index.searcher() as s:
|
|
|
|
results = s.search(q)
|
|
|
|
|
|
|
|
results.formatter = self.formatter
|
|
|
|
|
|
|
|
res = []
|
|
|
|
for hit in results:
|
|
|
|
name = hit["path"]
|
|
|
|
page_data = g.current_wiki.get_page(name)["data"].decode("utf-8")
|
|
|
|
content = hit.highlights('body', text=page_data)
|
|
|
|
|
|
|
|
res.append(dict(name=name, content=content))
|
|
|
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
def users(self, query):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2014-11-13 01:06:56 +02:00
|
|
|
class ElasticSearch(BaseSearch):
|
2015-07-24 15:29:24 +03:00
|
|
|
def __init__(self, elastic, fields):
|
2014-11-13 01:06:56 +02:00
|
|
|
self.elastic = elastic
|
2015-07-24 15:29:24 +03:00
|
|
|
self.fields = fields
|
2014-11-13 01:06:56 +02:00
|
|
|
|
|
|
|
def index(self, index, doc_type, id_=None, body=None):
|
|
|
|
return self.elastic.index(index=index, doc_type=doc_type, id=id_, body=body)
|
|
|
|
|
|
|
|
def index_wiki(self, name, body):
|
|
|
|
self.index('wiki', 'page', id_=name, body=body)
|
|
|
|
|
|
|
|
def delete_index(self, index):
|
|
|
|
return self.elastic.indices.delete(index=index, ignore=[400, 404])
|
|
|
|
|
|
|
|
def wiki(self, query):
|
2014-11-12 01:06:28 +02:00
|
|
|
if not query:
|
|
|
|
return []
|
|
|
|
|
2014-11-13 01:06:56 +02:00
|
|
|
res = self.elastic.search(index='wiki', body={"query": {
|
2014-11-12 01:06:28 +02:00
|
|
|
"multi_match": {
|
|
|
|
"query": query,
|
2015-07-24 15:29:24 +03:00
|
|
|
"fields": self.fields
|
2014-11-12 01:06:28 +02:00
|
|
|
}}})
|
|
|
|
|
|
|
|
return [hit["_source"] for hit in res['hits']['hits']]
|
2014-11-10 18:54:46 +02:00
|
|
|
|
2014-11-13 01:06:56 +02:00
|
|
|
def users(self, query):
|
|
|
|
pass
|