| import re | |
| class Sentence(object): | |
| def __init__(self, text): | |
| self.text = text | |
| self.tokens = text.split() | |
| def __len__(self): | |
| return len(self.tokens) | |
| class Sentences(object): | |
| def __init__(self, text): | |
| def iterate(text): | |
| for s in re.split(r"\n", text): | |
| yield s | |
| self.sents = iterate(text) | |
| def __len__(self): | |
| return len(self.sents) | |
| class SentenceParser(object): | |
| """ | |
| Iterate over the text column of a dataframe | |
| """ | |
| def __init__(self): | |
| self.sents = None | |
| def __call__(self, text): | |
| return Sentences(text) |