Spaces:
Build error
Build error
| import re | |
| from pygments.lexers.python import PythonLexer | |
| def tokenize_code(code): | |
| lexer = PythonLexer() | |
| tokens = process_pygments_tokens(lexer.get_tokens(code)) | |
| return tokens | |
| def process_pygments_tokens(tokens): | |
| new_tokens = [] | |
| for token in tokens: | |
| if ( | |
| str(token[0]) == 'Token.Text' | |
| and re.match(r'\s+', token[1]) | |
| or str(token[0]) == 'Token.Text.Whitespace' | |
| ): | |
| continue | |
| new_tokens.append(token[1]) | |
| new_tokens_final = [] | |
| i = 0 | |
| while i < len(new_tokens) - 2: | |
| if ( | |
| new_tokens[i] == '"' | |
| and new_tokens[i + 1] == 'STR' | |
| and new_tokens[i + 2] == '"' | |
| ): | |
| new_tokens_final.append('"STR"') | |
| i = i + 3 | |
| else: | |
| new_tokens_final.append(new_tokens[i]) | |
| i = i + 1 | |
| for i in range(len(new_tokens) - 2, len(new_tokens)): | |
| if i >= 0: | |
| new_tokens_final.append(new_tokens[i]) | |
| return new_tokens_final | |