Merge pull request #87 from gazpachoking/subdir_tweaks

Tweak the rules for to_canonical to better handle subdirs
2015-12-10 20:59:29 -06:00 · 2015-12-10 20:59:29 -06:00 · efbaf9e217
commit efbaf9e217
parent 93df0e081a 57067a9f95
2 changed files with 13 additions and 12 deletions
--- a/realms/lib/util.py
+++ b/realms/lib/util.py
@ -83,24 +83,25 @@ def clean_url(url):
 def to_canonical(s):
    """
-    Remove leading/trailing whitespace
+    Remove leading/trailing whitespace (from all path components)
-    Remove leading underscores and backslashes "/"
+    Remove leading underscores and slashes "/"
    Convert spaces to dashes "-"
-    Limit to first 128 chars
+    Limit path components to 63 chars and total size to 436 chars
    """
    reserved_chars = "&$+,:;=?@#"
    unsafe_chars = "?<>[]{}|\^~%"
-    s = s.encode('utf8')
+    s = s.encode("utf8")
-    s = str(s)
+    s = re.sub(r"\s+", " ", s)
-    s = s.strip()
+    s = s.lstrip("_/ ")
    s = s.lstrip('_')
    s = s.lstrip('/')
    s = re.sub(r"\s", "-", s)
    s = re.sub(r"/+", "/", s)
    s = re.sub(r"[" + re.escape(reserved_chars) + "]", "", s)
    s = re.sub(r"[" + re.escape(unsafe_chars) + "]", "", s)
-    s = s[:128]
+    # Strip leading/trailing spaces from path components, replace internal spaces
    # with '-', and truncate to 63 characters.
    parts = (part.strip().replace(" ", "-")[:63] for part in s.split("/"))
    # Join any non-empty path components back together
    s = "/".join(filter(None, parts))
    s = s[:436]
    return s
--- a/realms/modules/search/models.py
+++ b/realms/modules/search/models.py
@ -40,7 +40,7 @@ class SimpleSearch(BaseSearch):
        res = []
        for entry in g.current_wiki.get_index():
            name = filename_to_cname(entry['name'])
-            if set(query.split()).intersection(name.split('-')):
+            if set(query.split()).intersection(name.replace('/', '-').split('-')):
                page = g.current_wiki.get_page(name)
                res.append(dict(name=name, content=page['data']))
        return res