Merge branch 'search'

* search:
  search pass three
  search pass two
  search module, wip
This commit is contained in:
Matthew Scragg 2014-11-12 17:07:38 -06:00
commit fe5d1d52eb
16 changed files with 225 additions and 30 deletions

View file

@ -10,13 +10,28 @@ if [ -d "/vagrant" ]; then
fi
echo "Provisioning..."
if ! type "add-apt-repository" > /dev/null; then
sudo apt-get update
sudo apt-get install -y software-properties-common python-software-properties
fi
# Elastic Search
wget -qO - http://packages.elasticsearch.org/GPG-KEY-elasticsearch | sudo apt-key add -
echo 'deb http://packages.elasticsearch.org/elasticsearch/1.4/debian stable main' | sudo tee /etc/apt/sources.list.d/elastic.list
sudo add-apt-repository -y ppa:chris-lea/node.js
sudo apt-get update
sudo apt-get install -y python build-essential pkg-config git \
python-pip python-virtualenv python-dev zlib1g-dev \
libffi-dev libyaml-dev libssl-dev nodejs
libffi-dev libyaml-dev libssl-dev nodejs openjdk-7-jre-headless elasticsearch
# Create swap file because ES eats up RAM and 14.04 doesn't have swap by default
sudo fallocate -l 1G /swapfile
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
# lxml deps
# libxml2-dev libxslt1-dev
@ -63,4 +78,6 @@ sudo mv /tmp/realms-wiki /usr/local/bin
sudo chmod +x /usr/local/bin/realms-wiki
sudo service elasticsearch start
realms-wiki start

View file

@ -19,6 +19,7 @@ from werkzeug.routing import BaseConverter
from werkzeug.exceptions import HTTPException
from sqlalchemy.ext.declarative import declarative_base
from .modules.search.models import Search
from .lib.util import to_canonical, remove_ext, mkdir_safe, gravatar_url, to_dict
from .lib.hook import HookModelMeta, HookMixin
from .lib.util import is_su, in_virtualenv
@ -161,6 +162,7 @@ def create_app(config=None):
db.init_app(app)
cache.init_app(app)
assets.init_app(app)
search.init_app(app)
db.Model = declarative_base(metaclass=HookModelMeta, cls=HookMixin)
@ -199,6 +201,7 @@ login_manager = LoginManager()
db = SQLAlchemy()
cache = Cache()
assets = Assets()
search = Search()
assets.register('main.js',
'vendor/jquery/dist/jquery.js',

View file

@ -95,6 +95,11 @@ CACHE_REDIS_DB = '0'
#CACHE_TYPE = 'memcached'
CACHE_MEMCACHED_SERVERS = ['127.0.0.1:11211']
SEARCH_TYPE = 'simple' # simple is not good for large wikis
# SEARCH_TYPE = 'elasticsearch'
ELASTICSEARCH_URL = 'http://127.0.0.1:9200'
# Get ReCaptcha Keys for your domain here:
# https://www.google.com/recaptcha/admin#whyrecaptcha
RECAPTCHA_ENABLE = False
@ -146,4 +151,4 @@ if ENV != "DEV":
ASSETS_DEBUG = False
SQLALCHEMY_ECHO = False
MODULES = ['wiki', 'auth']
MODULES = ['wiki', 'auth', 'search']

View file

@ -7,12 +7,15 @@ def hook_func(name, fn):
@wraps(fn)
def wrapper(self, *args, **kwargs):
for hook, a, kw in self.__class__._pre_hooks.get(name) or []:
hook(*a, **kw)
hook(*args, **kwargs)
rv = fn(self, *args, **kwargs)
# Attach return value for post hooks
kwargs.update(dict(rv=rv))
for hook, a, kw in self.__class__._post_hooks.get(name) or []:
hook(*a, **kw)
hook(*args, **kwargs)
return rv
return wrapper

View file

@ -97,6 +97,25 @@ def to_canonical(s):
s = s.lower()
return s
def cname_to_filename(cname):
""" Convert canonical name to filename
:param cname: Canonical name
:return: str -- Filename
"""
return cname + ".md"
def filename_to_cname(filename):
"""Convert filename to canonical name.
.. note::
It's assumed filename is already canonical format
"""
return os.path.splitext(filename)[0]
def gravatar_url(email):
return "//www.gravatar.com/avatar/" + hashlib.md5(email).hexdigest()

View file

View file

@ -0,0 +1,36 @@
import click
from realms import create_app, search
from realms.modules.wiki.models import Wiki
from realms.lib.util import filename_to_cname
@click.group(short_help="Search Module")
def cli():
pass
@cli.command()
def rebuild_index():
""" Rebuild search index
"""
app = create_app()
if app.config.get('SEARCH_TYPE') == 'simple':
click.echo("Search type is simple, try using elasticsearch.")
return
with app.app_context():
# Wiki
search.delete_index('wiki')
wiki = Wiki(app.config['WIKI_PATH'])
for entry in wiki.get_index():
page = wiki.get_page(entry['name'])
name = filename_to_cname(page['name'])
# TODO add email?
body = dict(name=name,
content=page['data'],
message=page['info']['message'],
username=page['info']['author'],
updated_on=entry['mtime'],
created_on=entry['ctime'])
search.index_wiki(name, body)

View file

@ -0,0 +1,23 @@
from realms.modules.wiki.models import Wiki
from realms import search
@Wiki.after('write_page')
def wiki_write_page(name, content, message=None, username=None, email=None, **kwargs):
if not hasattr(search, 'index_wiki'):
# using simple search or none
return
body = dict(name=name,
content=content,
message=message,
email=email,
username=username)
return search.index_wiki(name, body)
@Wiki.after('rename_page')
def wiki_rename_page(*args, **kwargs):
pass

View file

@ -0,0 +1,71 @@
from flask import g, current_app
from realms.lib.util import filename_to_cname
def simple(app):
return SimpleSearch()
def elasticsearch(app):
from flask.ext.elastic import Elastic
return ElasticSearch(Elastic(app))
class Search(object):
def __init__(self, app=None):
if app is not None:
self.init_app(app)
def init_app(self, app):
search_obj = globals()[app.config['SEARCH_TYPE']]
app.extensions['search'] = search_obj(app)
def __getattr__(self, item):
return getattr(current_app.extensions['search'], item)
class BaseSearch():
pass
class SimpleSearch(BaseSearch):
def wiki(self, query):
res = []
for entry in g.current_wiki.get_index():
name = filename_to_cname(entry['name'])
if query in name.split('-'):
page = g.current_wiki.get_page(name)
res.append(dict(name=name, content=page['data']))
return res
def users(self, query):
pass
class ElasticSearch(BaseSearch):
def __init__(self, elastic):
self.elastic = elastic
def index(self, index, doc_type, id_=None, body=None):
return self.elastic.index(index=index, doc_type=doc_type, id=id_, body=body)
def index_wiki(self, name, body):
self.index('wiki', 'page', id_=name, body=body)
def delete_index(self, index):
return self.elastic.indices.delete(index=index, ignore=[400, 404])
def wiki(self, query):
if not query:
return []
res = self.elastic.search(index='wiki', body={"query": {
"multi_match": {
"query": query,
"fields": ["name"]
}}})
return [hit["_source"] for hit in res['hits']['hits']]
def users(self, query):
pass

View file

@ -0,0 +1,10 @@
from flask import abort, g, render_template, request, redirect, Blueprint, flash, url_for, current_app
from realms import search as search_engine
blueprint = Blueprint('search', __name__)
@blueprint.route('/_search')
def search():
results = search_engine.wiki(request.args.get('q'))
return render_template('search/search.html', results=results)

View file

@ -5,32 +5,11 @@ import gittle.utils
import yaml
from gittle import Gittle
from dulwich.repo import NotGitRepository
from realms.lib.util import to_canonical
from realms.lib.util import to_canonical, cname_to_filename, filename_to_cname
from realms import cache
from realms.lib.hook import HookMixin
def cname_to_filename(cname):
""" Convert canonical name to filename
:param cname: Canonical name
:return: str -- Filename
"""
return cname + ".md"
def filename_to_cname(filename):
"""Convert filename to canonical name.
.. note::
It's assumed filename is already canonical format
"""
return os.path.splitext(filename)[0]
class PageNotFound(Exception):
pass
@ -124,7 +103,6 @@ class Wiki(HookMixin):
return ret
def rename_page(self, old_name, new_name, username=None, email=None, message=None):
"""Rename page.

View file

@ -1,7 +1,7 @@
import json
from nose.tools import *
from flask import url_for
from realms.modules.wiki.models import cname_to_filename, filename_to_cname
from realms.lib.util import cname_to_filename, filename_to_cname
from realms.lib.test import BaseTest

View file

@ -51,6 +51,10 @@
border-radius: 0;
}
.navbar .form-control {
max-height: 33px;
}
.checkbox-cell {
width: 4em;
padding: 0.3em;

View file

@ -49,7 +49,15 @@
<li><a href="{{ url_for('wiki.history', name=name) }}"><i class="fa fa-clock-o"></i> History</a></li>
{% endif %}
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<form class="navbar-form" role="search" action="{{ url_for('search.search') }}">
<div class="form-group">
<input name="q" type="text" class="form-control" placeholder="Search">
</div>
</form>
</li>
{% if current_user.is_authenticated() %}
<li class="dropdown user-avatar">
<a href="#" class="dropdown-toggle" data-toggle="dropdown">

View file

@ -0,0 +1,17 @@
{% extends 'layout.html' %}
{% block body %}
{% if results %}
<div class="list-group">
{% for r in results %}
<a href="{{ url_for('wiki.page', name=r['name']) }}" class="list-group-item">
<h4 class="list-group-item-heading">{{ r['name'] }}</h4>
<p class="list-group-item-text">
{{ r['content'][:100] }}
</p>
</a>
{% endfor %}
</div>
{% else %}
No Results Found
{% endif %}
{% endblock %}

View file

@ -26,6 +26,7 @@ setup(name='realms-wiki',
'Flask==0.10.1',
'Flask-Assets==0.10',
'Flask-Cache==0.13.1',
'Flask-Elastic==0.2',
'Flask-Login==0.2.11',
'Flask-SQLAlchemy==2.0',
'Flask-WTF==0.10.2',