File size: 1,430 Bytes
			
			| e4ca3a6 17ad6cf e4ca3a6 17ad6cf e4ca3a6 17ad6cf 297b879 17ad6cf 297b879 17ad6cf 297b879 17ad6cf 297b879 17ad6cf e4ca3a6 17ad6cf e4ca3a6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | from transformers import Pipeline, AutoModelForSequenceClassification,AutoTokenizer
import torch
from transformers.pipelines import PIPELINE_REGISTRY
class TBCP(Pipeline):
    def __init__(self,**kwargs):
      Pipeline.__init__(self,**kwargs)
      self.tokenizer = AutoTokenizer.from_pretrained(kwargs["tokenizer"])
    def _sanitize_parameters(self, **kwargs):
        postprocess_kwargs = {}
        if "text_pair" in kwargs:
            postprocess_kwargs["top_k"] = kwargs["top_k"]
        return {}, {}, postprocess_kwargs
    def preprocess(self, text):
        return self.tokenizer(text, return_tensors="pt")
    def _forward(self, model_inputs):
        return self.model(**model_inputs)
    def postprocess(self, model_outputs,top_k = None):
        logits = model_outputs.logits
        probabilities = torch.nn.functional.softmax(logits, dim=-1)
        
        best_class = probabilities.argmax().item()
        label = f"Label_{best_class}"
        # score = probabilities.squeeze()[best_class].item()
        logits = logits.squeeze().tolist()
        return {"label": label, 
                # "score": score, 
                "logits": logits}
PIPELINE_REGISTRY.register_pipeline(
    "TunBERT-classifier",
    pipeline_class=TBCP,
    pt_model=AutoModelForSequenceClassification,
    default={"pt": ("not-lain/TunBERT", "main")},
    type="text",  # current support type: text, audio, image, multimodal
) | 
