From e3508e365f72a12608badcae997862d43bbad0f8 Mon Sep 17 00:00:00 2001 From: Matthew Scragg Date: Tue, 8 Dec 2015 17:15:11 -0600 Subject: [PATCH] allow utf8 in page names --- realms/lib/util.py | 16 ++++++++++------ realms/modules/wiki/models.py | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/realms/lib/util.py b/realms/lib/util.py index 5eed026..2acb402 100644 --- a/realms/lib/util.py +++ b/realms/lib/util.py @@ -86,14 +86,18 @@ def to_canonical(s): Double space -> single dash Double dash -> single dash Remove all non alphanumeric and dash - Limit to first 64 chars + Limit to first 128 chars """ - s = s.encode('ascii', 'ignore') + reserved_chars = "&$+,/:;=?@#" + unsafe_chars = "?<>[]{}|\^~%" + + s = s.encode('utf8') s = str(s) - s = re.sub(r"\s\s*", "-", s) - s = re.sub(r"\-\-+", "-", s) - s = re.sub(r"[^a-zA-Z0-9\-]", "", s) - s = s[:64] + s = s.strip() + s = re.sub(r"\s", "-", s) + s = re.sub(r"[" + re.escape(reserved_chars) + "]", "", s) + s = re.sub(r"[" + re.escape(unsafe_chars) + "]", "", s) + s = s[:128] return s diff --git a/realms/modules/wiki/models.py b/realms/modules/wiki/models.py index 27b0852..4a2a8e7 100644 --- a/realms/modules/wiki/models.py +++ b/realms/modules/wiki/models.py @@ -183,7 +183,7 @@ class Wiki(HookMixin): return cached # commit = gittle.utils.git.commit_info(self.repo[sha]) - filename = cname_to_filename(name).encode('latin-1') + filename = cname_to_filename(name).encode('utf8') sha = sha.encode('latin-1') try: