Spaces:
Running
Running
| """ | |
| Mostly a TaxonomicTree class that implements a taxonomy and some helpers for easily | |
| walking and looking in the tree. | |
| A tree is an arrangement of TaxonomicNodes. | |
| """ | |
| import itertools | |
| import json | |
| class TaxonomicNode: | |
| __slots__ = ("name", "index", "root", "_children") | |
| def __init__(self, name, index, root): | |
| self.name = name | |
| self.index = index | |
| self.root = root | |
| self._children = {} | |
| def add(self, name): | |
| added = 0 | |
| if not name: | |
| return added | |
| first, rest = name[0], name[1:] | |
| if first not in self._children: | |
| self._children[first] = TaxonomicNode(first, self.root.size, self.root) | |
| self.root.size += 1 | |
| self._children[first].add(rest) | |
| def children(self, name): | |
| if not name: | |
| return set((child.name, child.index) for child in self._children.values()) | |
| first, rest = name[0], name[1:] | |
| if first not in self._children: | |
| return set() | |
| return self._children[first].children(rest) | |
| def descendants(self, prefix=None): | |
| """Iterates over all values in the subtree that match prefix.""" | |
| if not prefix: | |
| yield (self.name,), self.index | |
| for child in self._children.values(): | |
| for name, i in child.descendants(): | |
| yield (self.name, *name), i | |
| return | |
| first, rest = prefix[0], prefix[1:] | |
| if first not in self._children: | |
| return | |
| for name, i in self._children[first].descendants(rest): | |
| yield (self.name, *name), i | |
| def values(self): | |
| """Iterates over all (name, i) pairs in the tree.""" | |
| yield (self.name,), self.index | |
| for child in self._children.values(): | |
| for name, index in child.values(): | |
| yield (self.name, *name), index | |
| def from_dict(cls, dct, root): | |
| node = cls(dct["name"], dct["index"], root) | |
| node._children = { | |
| child["name"]: cls.from_dict(child, root) for child in dct["children"] | |
| } | |
| return node | |
| class TaxonomicTree: | |
| """ | |
| Efficient structure for finding taxonomic names and their descendants. | |
| Also returns an integer index i for each possible name. | |
| """ | |
| def __init__(self): | |
| self.kingdoms = {} | |
| self.size = 0 | |
| def add(self, name: list[str]): | |
| if not name: | |
| return | |
| first, rest = name[0], name[1:] | |
| if first not in self.kingdoms: | |
| self.kingdoms[first] = TaxonomicNode(first, self.size, self) | |
| self.size += 1 | |
| self.kingdoms[first].add(rest) | |
| def children(self, name=None): | |
| if not name: | |
| return set( | |
| (kingdom.name, kingdom.index) for kingdom in self.kingdoms.values() | |
| ) | |
| first, rest = name[0], name[1:] | |
| if first not in self.kingdoms: | |
| return set() | |
| return self.kingdoms[first].children(rest) | |
| def descendants(self, prefix=None): | |
| """Iterates over all values in the tree that match prefix.""" | |
| if not prefix: | |
| # Give them all the subnodes | |
| for kingdom in self.kingdoms.values(): | |
| yield from kingdom.descendants() | |
| return | |
| first, rest = prefix[0], prefix[1:] | |
| if first not in self.kingdoms: | |
| return | |
| yield from self.kingdoms[first].descendants(rest) | |
| def values(self): | |
| """Iterates over all (name, i) pairs in the tree.""" | |
| for kingdom in self.kingdoms.values(): | |
| yield from kingdom.values() | |
| def __len__(self): | |
| return self.size | |
| def from_dict(cls, dct): | |
| tree = cls() | |
| tree.kingdoms = { | |
| kingdom["name"]: TaxonomicNode.from_dict(kingdom, tree) | |
| for kingdom in dct["kingdoms"] | |
| } | |
| tree.size = dct["size"] | |
| return tree | |
| class TaxonomicJsonEncoder(json.JSONEncoder): | |
| def default(self, obj): | |
| if isinstance(obj, TaxonomicNode): | |
| return { | |
| "name": obj.name, | |
| "index": obj.index, | |
| "children": list(obj._children.values()), | |
| } | |
| elif isinstance(obj, TaxonomicTree): | |
| return { | |
| "kingdoms": list(obj.kingdoms.values()), | |
| "size": obj.size, | |
| } | |
| else: | |
| super().default(self, obj) | |
| def batched(iterable, n): | |
| # batched('ABCDEFG', 3) --> ABC DEF G | |
| if n < 1: | |
| raise ValueError("n must be at least one") | |
| it = iter(iterable) | |
| while batch := tuple(itertools.islice(it, n)): | |
| yield zip(*batch) | |