Initial version of whoosh based search
This commit is contained in:
		
							parent
							
								
									e22bd045f9
								
							
						
					
					
						commit
						06a5cd5aef
					
				
					 2 changed files with 69 additions and 3 deletions
				
			
		|  | @ -86,20 +86,23 @@ DB_URI = 'sqlite:////tmp/wiki.db' | |||
| CACHE_TYPE = 'simple' | ||||
| 
 | ||||
| # Redis | ||||
| #CACHE_TYPE = 'redis' | ||||
| # CACHE_TYPE = 'redis' | ||||
| CACHE_REDIS_HOST = '127.0.0.1' | ||||
| CACHE_REDIS_PORT = 6379 | ||||
| CACHE_REDIS_DB = '0' | ||||
| 
 | ||||
| # Memcached | ||||
| #CACHE_TYPE = 'memcached' | ||||
| # CACHE_TYPE = 'memcached' | ||||
| CACHE_MEMCACHED_SERVERS = ['127.0.0.1:11211'] | ||||
| 
 | ||||
| SEARCH_TYPE = 'simple'  # simple is not good for large wikis | ||||
| # SEARCH_TYPE = 'simple'  # simple is not good for large wikis | ||||
| 
 | ||||
| # SEARCH_TYPE = 'elasticsearch' | ||||
| ELASTICSEARCH_URL = 'http://127.0.0.1:9200' | ||||
| 
 | ||||
| SEARCH_TYPE = 'whoosh' | ||||
| WHOOSH_INDEX = '/tmp/whoosh' | ||||
| 
 | ||||
| # Get ReCaptcha Keys for your domain here: | ||||
| # https://www.google.com/recaptcha/admin#whyrecaptcha | ||||
| RECAPTCHA_ENABLE = False | ||||
|  |  | |||
|  | @ -6,6 +6,10 @@ def simple(app): | |||
|     return SimpleSearch() | ||||
| 
 | ||||
| 
 | ||||
| def whoosh(app): | ||||
|     return WhooshSearch(app.config['WHOOSH_INDEX']) | ||||
| 
 | ||||
| 
 | ||||
| def elasticsearch(app): | ||||
|     from flask.ext.elastic import Elastic | ||||
|     return ElasticSearch(Elastic(app)) | ||||
|  | @ -42,6 +46,65 @@ class SimpleSearch(BaseSearch): | |||
|         pass | ||||
| 
 | ||||
| 
 | ||||
| class WhooshSearch(BaseSearch): | ||||
|     def __init__(self, index_path): | ||||
|         from whoosh import index as whoosh_index | ||||
|         from whoosh.fields import Schema, TEXT, ID | ||||
|         from whoosh import qparser | ||||
|         from whoosh.highlight import UppercaseFormatter | ||||
|         from whoosh.analysis import LanguageAnalyzer | ||||
|         import os.path | ||||
| 
 | ||||
|         self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=LanguageAnalyzer("de"))) | ||||
|         self.formatter = UppercaseFormatter() | ||||
| 
 | ||||
|         if os.path.exists(index_path): | ||||
|             self.search_index = whoosh_index.open_dir(index_path) | ||||
|         else: | ||||
|             os.mkdir(index_path) | ||||
|             self.search_index = whoosh_index.create_in(index_path, self.schema) | ||||
| 
 | ||||
|         self.query_parser = qparser.QueryParser("body", schema=self.schema) | ||||
|         self.query_parser.add_plugin(qparser.FuzzyTermPlugin()) | ||||
| 
 | ||||
|     def index(self, index, doc_type, id_=None, body=None): | ||||
|         writer = self.search_index.writer() | ||||
|         writer.update_document(path=id_.decode("utf-8"), body=body["content"]) | ||||
|         writer.commit() | ||||
| 
 | ||||
|     def index_wiki(self, name, body): | ||||
|         self.index('wiki', 'page', id_=name, body=body) | ||||
| 
 | ||||
|     def delete_index(self, index): | ||||
|         writer = self.search_index.writer() | ||||
|         writer.delete_by_term('path', index) | ||||
|         writer.commit() | ||||
| 
 | ||||
|     def wiki(self, query): | ||||
|         if not query: | ||||
|             return [] | ||||
| 
 | ||||
|         q = self.query_parser.parse("%s~2" % (query,)) | ||||
| 
 | ||||
|         with self.search_index.searcher() as s: | ||||
|             results = s.search(q) | ||||
| 
 | ||||
|             results.formatter = self.formatter | ||||
| 
 | ||||
|             res = [] | ||||
|             for hit in results: | ||||
|                 name = hit["path"] | ||||
|                 page_data = g.current_wiki.get_page(name)["data"].decode("utf-8") | ||||
|                 content = hit.highlights('body', text=page_data) | ||||
| 
 | ||||
|                 res.append(dict(name=name, content=content)) | ||||
| 
 | ||||
|             return res | ||||
| 
 | ||||
|     def users(self, query): | ||||
|         pass | ||||
| 
 | ||||
| 
 | ||||
| class ElasticSearch(BaseSearch): | ||||
|     def __init__(self, elastic): | ||||
|         self.elastic = elastic | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue