Spaces:
Running
Running
| """A script from https://github.com/zhopto3/morpho-baseline defining | |
| the edit tree class learned from the pledari grond dictionaries""" | |
| from difflib import SequenceMatcher | |
| class EditTreeNode(object): | |
| def __init__(self, val): | |
| self.left = None | |
| self.right = None | |
| self.val = val | |
| def apply(self, word): | |
| """Applies learned edit rules to a string; | |
| I've changed the original code so that it lemmatizes rather than completes a pradigm""" | |
| assert isinstance(word, str) | |
| if isinstance(self.val[0], str): # replace | |
| if word == self.val[0]: | |
| return self.val[1] | |
| return -1 | |
| if isinstance(self.val[0], int): # split | |
| assert isinstance(self.left, EditTreeNode) | |
| assert isinstance(self.right, EditTreeNode) | |
| word_left = word[: self.val[0]] | |
| word_mid = word[self.val[0] : len(word) - self.val[1]] | |
| word_right = word[len(word) - self.val[1] :] | |
| word_left = self.left.apply(word_left) | |
| word_right = self.right.apply(word_right) | |
| if word_left == -1 or word_right == -1: | |
| return -1 | |
| out = word_left + word_mid + word_right | |
| return out | |
| def __str__(self): | |
| if self.left is None: # leaf | |
| return str(self.val) | |
| left_str = str(self.left) | |
| right_str = str(self.right) | |
| ret = str(self.val) + "\n" | |
| for line in left_str.split("\n"): | |
| ret += " " + line + "\n" | |
| for line in right_str.split("\n"): | |
| ret += " " + line + "\n" | |
| return ret.strip() | |
| def __hash__(self): | |
| return hash(self.__str__()) | |
| def __eq__(self, other): | |
| if not isinstance(other, EditTreeNode): | |
| return False | |
| if ( | |
| (self.left == other.left) | |
| and (self.right == other.right) | |
| and (self.val == other.val) | |
| ): | |
| return True | |
| return False | |
| def longestSubstring(str1, str2): | |
| """Get the longest substring between two str""" | |
| seqMatch = SequenceMatcher(None, str1, str2) | |
| match = seqMatch.find_longest_match(0, len(str1), 0, len(str2)) | |
| return (match.a, match.b, match.size) | |
| def editTree(str1, str2): | |
| """Define the edit rules between any two strings""" | |
| if str1 is None or str2 is None: | |
| return None | |
| idx1, idx2, size = longestSubstring(str1, str2) | |
| if size == 0: | |
| return EditTreeNode((str1, str2)) | |
| node = EditTreeNode((idx1, len(str1) - idx1 - size)) | |
| node.left = editTree(str1[:idx1], str2[:idx2]) | |
| node.right = editTree(str1[idx1 + size :], str2[idx2 + size :]) | |
| return node | |