search pass three

This commit is contained in:
Matthew Scragg 2014-11-12 17:06:56 -06:00
parent 13d4be8937
commit 08a4c71c10
9 changed files with 132 additions and 44 deletions

View file

@ -11,14 +11,14 @@ import httplib
import traceback import traceback
import click import click
from flask import Flask, request, render_template, url_for, redirect, g from flask import Flask, request, render_template, url_for, redirect, g
from flask.ext.elastic import Elastic
from flask.ext.cache import Cache from flask.ext.cache import Cache
from flask.ext.login import LoginManager, current_user from flask.ext.login import LoginManager, current_user
from flask.ext.sqlalchemy import SQLAlchemy, declarative_base, Model, _QueryProperty from flask.ext.sqlalchemy import SQLAlchemy
from flask.ext.assets import Environment, Bundle from flask.ext.assets import Environment, Bundle
from werkzeug.routing import BaseConverter from werkzeug.routing import BaseConverter
from werkzeug.exceptions import HTTPException from werkzeug.exceptions import HTTPException
from .modules.search.models import Search
from .lib.util import to_canonical, remove_ext, mkdir_safe, gravatar_url, to_dict from .lib.util import to_canonical, remove_ext, mkdir_safe, gravatar_url, to_dict
from .lib.hook import HookModelMeta from .lib.hook import HookModelMeta
from .lib.util import is_su, in_virtualenv from .lib.util import is_su, in_virtualenv
@ -161,7 +161,7 @@ def create_app(config=None):
db.init_app(app) db.init_app(app)
cache.init_app(app) cache.init_app(app)
assets.init_app(app) assets.init_app(app)
elastic.init_app(app) search.init_app(app)
for status_code in httplib.responses: for status_code in httplib.responses:
if status_code >= 400: if status_code >= 400:
@ -199,7 +199,7 @@ login_manager = LoginManager()
db = SQLAlchemy() db = SQLAlchemy()
cache = Cache() cache = Cache()
assets = Assets() assets = Assets()
elastic = Elastic() search = Search()
assets.register('main.js', assets.register('main.js',
'vendor/jquery/dist/jquery.js', 'vendor/jquery/dist/jquery.js',

View file

@ -95,6 +95,9 @@ CACHE_REDIS_DB = '0'
#CACHE_TYPE = 'memcached' #CACHE_TYPE = 'memcached'
CACHE_MEMCACHED_SERVERS = ['127.0.0.1:11211'] CACHE_MEMCACHED_SERVERS = ['127.0.0.1:11211']
SEARCH_TYPE = 'simple' # simple is not good for large wikis
# SEARCH_TYPE = 'elasticsearch'
ELASTICSEARCH_URL = 'http://127.0.0.1:9200' ELASTICSEARCH_URL = 'http://127.0.0.1:9200'
# Get ReCaptcha Keys for your domain here: # Get ReCaptcha Keys for your domain here:

View file

@ -97,6 +97,25 @@ def to_canonical(s):
s = s.lower() s = s.lower()
return s return s
def cname_to_filename(cname):
""" Convert canonical name to filename
:param cname: Canonical name
:return: str -- Filename
"""
return cname + ".md"
def filename_to_cname(filename):
"""Convert filename to canonical name.
.. note::
It's assumed filename is already canonical format
"""
return os.path.splitext(filename)[0]
def gravatar_url(email): def gravatar_url(email):
return "//www.gravatar.com/avatar/" + hashlib.md5(email).hexdigest() return "//www.gravatar.com/avatar/" + hashlib.md5(email).hexdigest()

View file

@ -0,0 +1,36 @@
import click
from realms import create_app, search
from realms.modules.wiki.models import Wiki
from realms.lib.util import filename_to_cname
@click.group(short_help="Search Module")
def cli():
pass
@cli.command()
def rebuild_index():
""" Rebuild search index
"""
app = create_app()
if app.config.get('SEARCH_TYPE') == 'simple':
click.echo("Search type is simple, try using elasticsearch.")
return
with app.app_context():
# Wiki
search.delete_index('wiki')
wiki = Wiki(app.config['WIKI_PATH'])
for entry in wiki.get_index():
page = wiki.get_page(entry['name'])
name = filename_to_cname(page['name'])
# TODO add email?
body = dict(name=name,
content=page['data'],
message=page['info']['message'],
username=page['info']['author'],
updated_on=entry['mtime'],
created_on=entry['ctime'])
search.index_wiki(name, body)

View file

@ -1,15 +1,21 @@
from realms.modules.wiki.models import Wiki from realms.modules.wiki.models import Wiki
from realms.modules.search.models import Search from realms import search
@Wiki.after('write_page') @Wiki.after('write_page')
def wiki_write_page(name, content, message=None, username=None, email=None, **kwargs): def wiki_write_page(name, content, message=None, username=None, email=None, **kwargs):
if not hasattr(search, 'index_wiki'):
# using simple search or none
return
body = dict(name=name, body = dict(name=name,
content=content, content=content,
message=message, message=message,
email=email, email=email,
username=username) username=username)
return Search.index('wiki', 'page', id_=name, body=body) return search.index_wiki(name, body)
@Wiki.after('rename_page') @Wiki.after('rename_page')

View file

@ -1,26 +1,71 @@
from realms import elastic from flask import g, current_app
from realms.lib.model import HookMixin from realms.lib.util import filename_to_cname
class Search(HookMixin): def simple(app):
return SimpleSearch()
@classmethod
def index(cls, index, doc_type, id_=None, body=None):
return elastic.index(index=index, doc_type=doc_type, id=id_, body=body)
@classmethod def elasticsearch(app):
def wiki(cls, query): from flask.ext.elastic import Elastic
return ElasticSearch(Elastic(app))
class Search(object):
def __init__(self, app=None):
if app is not None:
self.init_app(app)
def init_app(self, app):
search_obj = globals()[app.config['SEARCH_TYPE']]
app.extensions['search'] = search_obj(app)
def __getattr__(self, item):
return getattr(current_app.extensions['search'], item)
class BaseSearch():
pass
class SimpleSearch(BaseSearch):
def wiki(self, query):
res = []
for entry in g.current_wiki.get_index():
name = filename_to_cname(entry['name'])
if query in name.split('-'):
page = g.current_wiki.get_page(name)
res.append(dict(name=name, content=page['data']))
return res
def users(self, query):
pass
class ElasticSearch(BaseSearch):
def __init__(self, elastic):
self.elastic = elastic
def index(self, index, doc_type, id_=None, body=None):
return self.elastic.index(index=index, doc_type=doc_type, id=id_, body=body)
def index_wiki(self, name, body):
self.index('wiki', 'page', id_=name, body=body)
def delete_index(self, index):
return self.elastic.indices.delete(index=index, ignore=[400, 404])
def wiki(self, query):
if not query: if not query:
return [] return []
res = elastic.search(index='wiki', body={"query": { res = self.elastic.search(index='wiki', body={"query": {
"multi_match": { "multi_match": {
"query": query, "query": query,
"fields": ["name^3", "content"] "fields": ["name"]
}}}) }}})
return [hit["_source"] for hit in res['hits']['hits']] return [hit["_source"] for hit in res['hits']['hits']]
@classmethod def users(self, query):
def users(cls, query): pass
pass

View file

@ -1,10 +1,10 @@
from flask import abort, g, render_template, request, redirect, Blueprint, flash, url_for, current_app from flask import abort, g, render_template, request, redirect, Blueprint, flash, url_for, current_app
from .models import Search from realms import search as search_engine
blueprint = Blueprint('search', __name__) blueprint = Blueprint('search', __name__)
@blueprint.route('/_search') @blueprint.route('/_search')
def search(): def search():
results = Search.wiki(request.args.get('q')) results = search_engine.wiki(request.args.get('q'))
return render_template('search/search.html', results=results) return render_template('search/search.html', results=results)

View file

@ -5,32 +5,11 @@ import gittle.utils
import yaml import yaml
from gittle import Gittle from gittle import Gittle
from dulwich.repo import NotGitRepository from dulwich.repo import NotGitRepository
from realms.lib.util import to_canonical from realms.lib.util import to_canonical, cname_to_filename, filename_to_cname
from realms import cache from realms import cache
from realms.lib.hook import HookMixin from realms.lib.hook import HookMixin
def cname_to_filename(cname):
""" Convert canonical name to filename
:param cname: Canonical name
:return: str -- Filename
"""
return cname + ".md"
def filename_to_cname(filename):
"""Convert filename to canonical name.
.. note::
It's assumed filename is already canonical format
"""
return os.path.splitext(filename)[0]
class PageNotFound(Exception): class PageNotFound(Exception):
pass pass

View file

@ -1,7 +1,7 @@
import json import json
from nose.tools import * from nose.tools import *
from flask import url_for from flask import url_for
from realms.modules.wiki.models import cname_to_filename, filename_to_cname from realms.lib.util import cname_to_filename, filename_to_cname
from realms.lib.test import BaseTest from realms.lib.test import BaseTest