diff --git a/realms/lib/util.py b/realms/lib/util.py index f25f314..050182d 100644 --- a/realms/lib/util.py +++ b/realms/lib/util.py @@ -83,24 +83,25 @@ def clean_url(url): def to_canonical(s): """ - Remove leading/trailing whitespace - Remove leading underscores and backslashes "/" + Remove leading/trailing whitespace (from all path components) + Remove leading underscores and slashes "/" Convert spaces to dashes "-" - Limit to first 128 chars + Limit path components to 63 chars and total size to 436 chars """ reserved_chars = "&$+,:;=?@#" unsafe_chars = "?<>[]{}|\^~%" - s = s.encode('utf8') - s = str(s) - s = s.strip() - s = s.lstrip('_') - s = s.lstrip('/') - s = re.sub(r"\s", "-", s) - s = re.sub(r"/+", "/", s) + s = s.encode("utf8") + s = re.sub(r"\s+", " ", s) + s = s.lstrip("_/ ") s = re.sub(r"[" + re.escape(reserved_chars) + "]", "", s) s = re.sub(r"[" + re.escape(unsafe_chars) + "]", "", s) - s = s[:128] + # Strip leading/trailing spaces from path components, replace internal spaces + # with '-', and truncate to 63 characters. + parts = (part.strip().replace(" ", "-")[:63] for part in s.split("/")) + # Join any non-empty path components back together + s = "/".join(filter(None, parts)) + s = s[:436] return s