Spaces:
Sleeping
Sleeping
| """Define mlbee.""" | |
| # pylint: disable=invalid-name | |
| from typing import List | |
| from cmat2aset310 import cmat2aset | |
| from logzero import logger | |
| from st_mlbee.gen_cmat import gen_cmat | |
| def st_mlbee( | |
| text1: List[str], | |
| text2: List[str], | |
| eps: float = 10, | |
| min_samples: int = 6, | |
| # show_plot: bool = True, | |
| ): | |
| """Align multilingual texts. | |
| Args: | |
| text1: list of strings | |
| text2: list of strings | |
| eps: epsilon | |
| min_samples: minimum number of points to be considered as a cluster | |
| x show_plot: whether to show plots, refactored to cmat2html's show_plot | |
| Returns: | |
| Aligned text pairs. | |
| """ | |
| if not ( | |
| [elm for elm in text1 if elm.strip()] and [elm for elm in text2 if elm.strip()] | |
| ): | |
| logger.warning("One or both inputs are empty") | |
| raise Exception("Nothing to do...exiting") | |
| try: | |
| # from json_de2zh.gen_cmat import gen_cmat # noqa # pylint: disable=import-outside-toplevel | |
| cmat = gen_cmat(text1, text2) | |
| # logger.level is reset to 20 in fastlid | |
| st_mlbee.cmat = cmat | |
| except Exception as e: | |
| logger.exception(e) | |
| raise | |
| try: | |
| # aset = cmat2aset(cmat.T) | |
| aset = cmat2aset(cmat, eps=eps, min_samples=min_samples) | |
| st_mlbee.aset = aset | |
| except Exception as e: | |
| logger.exception(e) | |
| raise | |
| # paired texts or aset? | |
| return aset | |