allow utf8 in page names
This commit is contained in:
parent
4b55ea0009
commit
e3508e365f
2 changed files with 11 additions and 7 deletions
|
@ -86,14 +86,18 @@ def to_canonical(s):
|
|||
Double space -> single dash
|
||||
Double dash -> single dash
|
||||
Remove all non alphanumeric and dash
|
||||
Limit to first 64 chars
|
||||
Limit to first 128 chars
|
||||
"""
|
||||
s = s.encode('ascii', 'ignore')
|
||||
reserved_chars = "&$+,/:;=?@#"
|
||||
unsafe_chars = "?<>[]{}|\^~%"
|
||||
|
||||
s = s.encode('utf8')
|
||||
s = str(s)
|
||||
s = re.sub(r"\s\s*", "-", s)
|
||||
s = re.sub(r"\-\-+", "-", s)
|
||||
s = re.sub(r"[^a-zA-Z0-9\-]", "", s)
|
||||
s = s[:64]
|
||||
s = s.strip()
|
||||
s = re.sub(r"\s", "-", s)
|
||||
s = re.sub(r"[" + re.escape(reserved_chars) + "]", "", s)
|
||||
s = re.sub(r"[" + re.escape(unsafe_chars) + "]", "", s)
|
||||
s = s[:128]
|
||||
return s
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue