Tweak the rules for to_canonical to better handle subdirs.
This commit is contained in:
parent
93df0e081a
commit
6d62d6b8b4
|
@ -83,24 +83,25 @@ def clean_url(url):
|
||||||
|
|
||||||
def to_canonical(s):
|
def to_canonical(s):
|
||||||
"""
|
"""
|
||||||
Remove leading/trailing whitespace
|
Remove leading/trailing whitespace (from all path components)
|
||||||
Remove leading underscores and backslashes "/"
|
Remove leading underscores and slashes "/"
|
||||||
Convert spaces to dashes "-"
|
Convert spaces to dashes "-"
|
||||||
Limit to first 128 chars
|
Limit path components to 63 chars and total size to 436 chars
|
||||||
"""
|
"""
|
||||||
reserved_chars = "&$+,:;=?@#"
|
reserved_chars = "&$+,:;=?@#"
|
||||||
unsafe_chars = "?<>[]{}|\^~%"
|
unsafe_chars = "?<>[]{}|\^~%"
|
||||||
|
|
||||||
s = s.encode('utf8')
|
s = s.encode("utf8")
|
||||||
s = str(s)
|
s = re.sub(r"\s+", " ", s)
|
||||||
s = s.strip()
|
s = s.lstrip("_/ ")
|
||||||
s = s.lstrip('_')
|
|
||||||
s = s.lstrip('/')
|
|
||||||
s = re.sub(r"\s", "-", s)
|
|
||||||
s = re.sub(r"/+", "/", s)
|
|
||||||
s = re.sub(r"[" + re.escape(reserved_chars) + "]", "", s)
|
s = re.sub(r"[" + re.escape(reserved_chars) + "]", "", s)
|
||||||
s = re.sub(r"[" + re.escape(unsafe_chars) + "]", "", s)
|
s = re.sub(r"[" + re.escape(unsafe_chars) + "]", "", s)
|
||||||
s = s[:128]
|
# Strip leading/trailing spaces from path components, replace internal spaces
|
||||||
|
# with '-', and truncate to 63 characters.
|
||||||
|
parts = (part.strip().replace(" ", "-")[:63] for part in s.split("/"))
|
||||||
|
# Join any non-empty path components back together
|
||||||
|
s = "/".join(filter(None, parts))
|
||||||
|
s = s[:436]
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue