import json
from pathlib import Path
import pickle
import sys

from jiwer import cer

import lemmatizer.edittree as edittree

BASE_DIR = Path(__file__).parent


class Analyzer:
    """A class to obtain lemmas, unimorph analysis, and de_translations for Romansh tokens"""

    def __init__(self, idiom: str, in_voc: set,learned_et: bool = True):

        self.idiom = idiom

        self.learned_et = learned_et

        assert self.idiom in [
            "rm-rumgr",
            "rm-surmiran",
            "rm-sursilv",
            "rm-sutsilv",
            "rm-puter",
            "rm-vallader",
        ]

        json_path = BASE_DIR / "lemma_tables" / f"{self.idiom}.json"

        with open(json_path, "r", encoding="utf-8") as f:
            self.dict = json.load(f)

        lem = []
        for v in self.dict.values():
            lem += v["lemma"]

        self.lemma = lem
        if self.learned_et:
            self.edit_trees = []

            for pos in "noun", "adj", "verb":
                et_path = BASE_DIR / "edit_trees" / f"{self.idiom}" / f"{pos}" / "et.txt"
                sys.modules["edittree"] = edittree
                with open(et_path, "rb") as f:
                    self.edit_trees += pickle.load(f)

        self.in_voc = in_voc

        other_de_path = BASE_DIR / "other_de" / f"{self.idiom}.json"

        with open(other_de_path, "r", encoding="utf-8") as f:
            self.other_de = json.load(f)

    def get_lemma(self, tok: str):
        """Obtain lemma through table look up; backs off
        to unsupervised edit tree rules if no lemma found
        """
        tok = tok.lower().strip()
        entry = self.dict.get(tok)
        if entry:
            return entry["lemma"]

        # Check if there's a lemma from the edit trees
        if self.learned_et:
            et_out = self._et_lemma(tok)
            if et_out:
                return [et_out]

        # Assume the token is a lemma
        return [tok] if tok in self.in_voc else [None]

    def _et_lemma(self, tok: str):
        candidates = []

        for et_pack in self.edit_trees:
            et = et_pack["et"]
            out = et.apply(tok)

            if out != -1:
                candidates.append(out)

        strong = [c for c in candidates if c in self.lemma]

        if len(strong) > 1:
            # Choose the candidate with the lowest edit distance to the tok:
            dist = {}
            for c in strong:
                dist[c] = cer(tok, c)
            out = min(dist, key=dist.get)
            return out if out in self.in_voc else None

        return strong[0] if strong and strong[0] in self.in_voc else None

    def get_unimorph(self, tok: str):
        """Obtain Unimorph annotation for N, V, and ADJ
        in the Pledari Grond Dict"""
        tok = tok.lower().strip()
        entry = self.dict.get(tok)
        if entry:
            return entry["unimorph"]

        return [None]

    def get_de(self, tok: str):
        """Obtain the German word corresponding to Romansh terms in the Pledari Grond Dict"""
        tok = tok.lower().strip()
        entry = self.dict.get(tok)
        if entry:
            return entry["DStichwort"]
        if tok in self.other_de:
            #Check the rest of the de_translations provided by the pledari grond dict
            return self.other_de[tok]
        return [None]