Spaces:
Running
Running
| # -*- coding: utf-8 -*- | |
| # @Time : 2021/12/2 5:41 p.m. | |
| # @Author : JianingWang | |
| # @File : common.py | |
| def is_chinese_char(cp): | |
| """Checks whether CP is the codepoint of a CJK character.""" | |
| # This defines a "chinese character" as anything in the CJK Unicode block: | |
| # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) | |
| # | |
| # Note that the CJK Unicode block is NOT all Japanese and Korean characters, | |
| # despite its name. The modern Korean Hangul alphabet is a different block, | |
| # as is Japanese Hiragana and Katakana. Those alphabets are used to write | |
| # space-separated words, so they are not treated specially and handled | |
| # like the all of the other languages. | |
| if ( | |
| (0x4E00 <= cp <= 0x9FFF) | |
| or (0x3400 <= cp <= 0x4DBF) # | |
| or (0x20000 <= cp <= 0x2A6DF) # | |
| or (0x2A700 <= cp <= 0x2B73F) # | |
| or (0x2B740 <= cp <= 0x2B81F) # | |
| or (0x2B820 <= cp <= 0x2CEAF) # | |
| or (0xF900 <= cp <= 0xFAFF) | |
| or (0x2F800 <= cp <= 0x2FA1F) # | |
| ): # | |
| return True | |
| return False | |
| def is_chinese(word: str): | |
| # word like "180" or "身高" or "神" | |
| for char in word: | |
| char = ord(char) | |
| if not is_chinese_char(char): | |
| return 0 | |
| return 1 | |