realms-wiki/realms/modules/wiki/models.py

337 lines
9.8 KiB
Python
Raw Normal View History

2013-10-02 07:32:53 +03:00
import os
import re
2013-10-08 22:47:49 +03:00
import lxml.html
2013-12-03 22:09:57 +02:00
from lxml.html.clean import Cleaner
2013-10-03 17:58:07 +03:00
import ghdiff
2013-10-15 23:32:17 +03:00
import gittle.utils
2014-09-07 19:54:51 +03:00
import yaml
from gittle import Gittle
2013-10-05 00:42:45 +03:00
from dulwich.repo import NotGitRepository
2013-11-08 20:20:40 +02:00
from werkzeug.utils import escape, unescape
2014-08-30 18:06:12 +03:00
from realms.lib.util import to_canonical
from realms import cache
2014-10-09 06:42:29 +03:00
from realms.lib.hook import HookMixin
2013-09-29 00:33:00 +03:00
2014-10-09 23:47:12 +03:00
def cname_to_filename(cname):
""" Convert canonical name to filename
:param cname: Canonical name
:return: str -- Filename
"""
2014-10-09 23:47:12 +03:00
return cname.lower() + ".md"
def filename_to_cname(filename):
"""Convert filename to canonical name.
.. note::
It's assumed filename is already canonical format
2014-10-09 23:47:12 +03:00
"""
return os.path.splitext(filename)[0]
class PageNotFound(Exception):
pass
2014-10-09 06:42:29 +03:00
class Wiki(HookMixin):
path = None
base_path = '/'
default_ref = 'master'
default_committer_name = 'Anon'
default_committer_email = 'anon@anon.anon'
2013-10-02 04:50:48 +03:00
index_page = 'home'
gittle = None
2013-09-29 00:33:00 +03:00
repo = None
2013-10-01 07:10:10 +03:00
def __init__(self, path):
try:
self.gittle = Gittle(path)
2013-10-05 00:42:45 +03:00
except NotGitRepository:
self.gittle = Gittle.init(path)
# Dulwich repo
self.repo = self.gittle.repo
2013-10-01 07:10:10 +03:00
self.path = path
def __repr__(self):
return "Wiki: %s" % self.path
2013-10-15 23:32:17 +03:00
def revert_page(self, name, commit_sha, message, username):
"""Revert page to passed commit sha1
:param name: Name of page to revert.
:param commit_sha: Commit Sha1 to revert to.
:param message: Commit message.
:param username:
:return: Git commit sha1
"""
2013-10-15 23:32:17 +03:00
page = self.get_page(name, commit_sha)
if not page:
raise PageNotFound()
if not message:
commit_info = gittle.utils.git.commit_info(self.gittle[commit_sha.encode('latin-1')])
message = commit_info['message']
2013-10-15 23:32:17 +03:00
return self.write_page(name, page['data'], message=message, username=username)
2013-10-03 17:58:07 +03:00
def write_page(self, name, content, message=None, create=False, username=None, email=None):
"""Write page to git repo
:param name: Name of page.
:param content: Content of page.
:param message: Commit message.
:param create: Perform git add operation?
:param username: Commit Name.
:param email: Commit Email.
:return: Git commit sha1.
"""
cname = to_canonical(name)
filename = cname_to_filename(cname)
content = self.clean(content)
with open(self.path + "/" + filename, 'w') as f:
f.write(content)
if create:
self.gittle.add(filename)
if not message:
message = "Updated %s" % name
if not username:
username = self.default_committer_name
if not email:
email = self.default_committer_email
2013-10-08 22:47:49 +03:00
ret = self.gittle.commit(name=username,
email=email,
message=message,
files=[filename])
cache.delete(cname)
return ret
def clean(self, content):
"""Clean any HTML, this might not be necessary.
:param content: Content of page.
:return: str
"""
2013-11-08 20:20:40 +02:00
def escape_repl(m):
if m.group(1):
return "```" + escape(m.group(1)) + "```"
def unescape_repl(m):
if m.group(1):
return "```" + unescape(m.group(1)) + "```"
2013-10-08 22:47:49 +03:00
# prevents p tag from being added, we remove this later
content = '<div>' + content + '</div>'
content = re.sub(r"```(.*?)```", escape_repl, content, flags=re.DOTALL)
tree = lxml.html.fromstring(content)
2013-12-03 22:09:57 +02:00
cleaner = Cleaner(remove_unknown_tags=False,
kill_tags={'style'},
2013-12-03 22:09:57 +02:00
safe_attrs_only=False)
2013-10-08 22:47:49 +03:00
tree = cleaner.clean_html(tree)
content = lxml.html.tostring(tree, encoding='utf-8', method='html')
2014-08-30 18:06:12 +03:00
# remove added div tags
2013-10-08 01:03:23 +03:00
content = content[5:-6]
2013-11-08 20:20:40 +02:00
2013-10-15 23:32:17 +03:00
# FIXME this is for block quotes, doesn't work for double ">"
content = re.sub(r"(\n&gt;)", "\n>", content)
content = re.sub(r"(^&gt;)", ">", content)
2013-11-08 20:20:40 +02:00
2014-09-07 19:54:51 +03:00
# Handlebars partial ">"
content = re.sub(r"\{\{&gt;(.*?)\}\}", r'{{>\1}}', content)
2014-09-10 18:53:22 +03:00
# Handlebars, allow {{}} inside HTML links
content = content.replace("%7B", "{")
content = content.replace("%7D", "}")
2013-10-08 22:47:49 +03:00
content = re.sub(r"```(.*?)```", unescape_repl, content, flags=re.DOTALL)
return content
def rename_page(self, old_name, new_name, user=None):
"""Rename page.
2014-08-30 18:06:12 +03:00
:param old_name: Page that will be renamed.
:param new_name: New name of page.
:param user: User object if any.
:return: str -- Commit sha1
"""
2014-10-09 23:47:12 +03:00
old_filename, new_filename = map(cname_to_filename, [old_name, new_name])
if old_filename not in self.gittle.index:
# old doesn't exist
return None
if new_filename in self.gittle.index:
# file is being overwritten, but that is ok, it's git!
pass
os.rename(os.path.join(self.path, old_filename), os.path.join(self.path, new_filename))
self.gittle.add(new_filename)
self.gittle.rm(old_filename)
commit = self.gittle.commit(name=getattr(user, 'username', self.default_committer_name),
email=getattr(user, 'email', self.default_committer_email),
message="Moved %s to %s" % (old_name, new_name),
files=[old_filename, new_filename])
cache.delete_many(old_filename, new_filename)
return commit
def delete_page(self, name, user=None):
"""Delete page.
:param name: Page that will be deleted
:param user: User object if any
:return: str -- Commit sha1
"""
self.gittle.rm(name)
commit = self.gittle.commit(name=getattr(user, 'username', self.default_committer_name),
email=getattr(user, 'email', self.default_committer_email),
message="Deleted %s" % name,
files=[name])
cache.delete_many(name)
return commit
2013-10-02 04:50:48 +03:00
def get_page(self, name, sha='HEAD'):
"""Get page data, partials, commit info.
:param name: Name of page.
:param sha: Commit sha.
:return: dict
"""
cached = cache.get(name)
if cached:
return cached
2013-11-08 20:20:40 +02:00
# commit = gittle.utils.git.commit_info(self.repo[sha])
2014-10-09 23:47:12 +03:00
filename = cname_to_filename(name).encode('latin-1')
2014-08-20 18:28:25 +03:00
sha = sha.encode('latin-1')
2013-10-01 07:10:10 +03:00
try:
2014-10-02 01:14:54 +03:00
data = self.gittle.get_commit_files(sha, paths=[filename]).get(filename)
if not data:
return None
partials = {}
if data.get('data'):
meta = self.get_meta(data['data'])
if meta and 'import' in meta:
for partial_name in meta['import']:
partials[partial_name] = self.get_page(partial_name)
data['partials'] = partials
2014-10-02 01:14:54 +03:00
data['info'] = self.get_history(name, limit=1)[0]
return data
2013-10-01 07:10:10 +03:00
except KeyError:
# HEAD doesn't exist yet
2013-10-02 04:50:48 +03:00
return None
2014-09-07 19:54:51 +03:00
def get_meta(self, content):
"""Get metadata from page if any.
:param content: Page content
:return: dict
"""
2014-09-07 19:54:51 +03:00
if not content.startswith("---"):
return None
2014-09-07 19:54:51 +03:00
meta_end = re.search("\n(\.{3}|\-{3})", content)
2014-09-07 19:54:51 +03:00
if not meta_end:
return None
try:
return yaml.safe_load(content[0:meta_end.start()])
except Exception as e:
return {'error': e.message}
2014-09-07 19:54:51 +03:00
2013-10-15 23:32:17 +03:00
def compare(self, name, old_sha, new_sha):
"""Compare two revisions of the same page.
:param name: Name of page.
:param old_sha: Older sha.
:param new_sha: Newer sha.
:return: str - Raw markup with styles
"""
# TODO: This could be effectively done in the browser
2013-10-03 17:58:07 +03:00
old = self.get_page(name, sha=old_sha)
new = self.get_page(name, sha=new_sha)
return ghdiff.diff(old['data'], new['data'])
2014-10-09 23:47:12 +03:00
def get_index(self):
"""Get repo index of head.
:return: list -- List of dicts
"""
2014-10-09 23:47:12 +03:00
rv = []
index = self.repo.open_index()
for name in index:
rv.append(dict(name=filename_to_cname(name),
filename=name,
ctime=index[name].ctime[0],
mtime=index[name].mtime[0],
sha=index[name].sha,
size=index[name].size))
return rv
2014-10-02 01:14:54 +03:00
def get_history(self, name, limit=100):
"""Get page history.
:param name: Name of page.
:param limit: Limit history size.
:return: list -- List of dicts
"""
if not len(self.repo.open_index()):
# Index is empty, no commits
return []
2014-10-09 23:47:12 +03:00
file_path = cname_to_filename(name)
versions = []
2014-10-02 01:14:54 +03:00
walker = self.repo.get_walker(paths=[file_path], max_entries=limit)
for entry in walker:
change_type = None
for change in entry.changes():
if change.old.path == file_path:
change_type = change.type
elif change.new.path == file_path:
change_type = change.type
author_name, author_email = entry.commit.author.split('<')
versions.append(dict(
author=author_name.strip(),
time=entry.commit.author_time,
message=entry.commit.message,
sha=entry.commit.id,
type=change_type))
return versions