Spaces:
Runtime error
Runtime error
| import re | |
| from typing import Dict | |
| non_printing_characters_re = re.compile( | |
| f"[{''.join(map(chr, list(range(0,32)) + list(range(127,160))))}]" | |
| ) | |
| digits_re: re.Pattern = re.compile(r"\d") | |
| unicode_punctuation: Dict[str, str] = { | |
| ",": ",", | |
| "。": ".", | |
| "、": ",", | |
| "„": '"', | |
| "”": '"', | |
| "“": '"', | |
| "«": '"', | |
| "»": '"', | |
| "1": '"', | |
| "」": '"', | |
| "「": '"', | |
| "《": '"', | |
| "》": '"', | |
| "´": "'", | |
| "∶": ":", | |
| ":": ":", | |
| "?": "?", | |
| "!": "!", | |
| "(": "(", | |
| ")": ")", | |
| ";": ";", | |
| "–": "-", | |
| "—": " - ", | |
| ".": ". ", | |
| "~": "~", | |
| "’": "'", | |
| "…": "...", | |
| "━": "-", | |
| "〈": "<", | |
| "〉": ">", | |
| "【": "[", | |
| "】": "]", | |
| "%": "%", | |
| "►": "-", | |
| } | |
| normalization = { | |
| "non_printing_characters_re": non_printing_characters_re, | |
| "digits_re": digits_re, | |
| "unicode_punctuation": unicode_punctuation, | |
| } | |