Spaces:
Sleeping
Sleeping
| import re | |
| from services.model_visitor import ModelVisitor | |
| class IbmExtractCodeblock(ModelVisitor): | |
| def visit(self, _, data): | |
| return self._get_code_block(data) | |
| def _get_code_block(self, data): | |
| r""" | |
| Extracts text blocks from the input string based on a specific pattern. | |
| Args: | |
| data (str): The input string containing text blocks. | |
| Returns: | |
| str: A text block of output which contains code extracted from the input string. | |
| Regex Pattern: | |
| (?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n``` | |
| - (?:### Output: ([\s\S]*?)): This part matches patterns that start with '### Output:' | |
| followed by any characters including newlines, capturing them within a group. | |
| - (?:\<\|endoftext\|\>|\Z): This part matches either the string <|endoftext|> | |
| or the end of the string (\Z). | |
| - |: This is an OR operator, meaning the regex will match either the pattern | |
| before or after it. | |
| - ```(?:\w+)?\n(.*?)\n```: This part matches patterns enclosed within backticks (```), | |
| possibly preceded by one or more word characters (\w+), capturing any characters | |
| including newlines. | |
| """ | |
| pattern = r'(?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n```' | |
| matches = re.findall(pattern, data, re.DOTALL) | |
| code = [] | |
| for match in matches: | |
| if match[0]: | |
| code.append(match[0].strip()) | |
| elif match[1]: | |
| code.append(match[1].strip()) | |
| return ''.join(code) | |