Update evaluator.py
Browse files- evaluator.py +26 -24
 
    	
        evaluator.py
    CHANGED
    
    | 
         @@ -1,39 +1,41 @@ 
     | 
|
| 
         | 
|
| 1 | 
         
             
            import torch
         
     | 
| 2 | 
         
            -
            from torchmetrics import BLEUScore 
     | 
| 
         | 
|
| 3 | 
         
             
            from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
         
     | 
| 4 | 
         | 
| 5 | 
         
             
            class CodeEvaluator:
         
     | 
| 6 | 
         
            -
                def __init__(self, model_name):
         
     | 
| 7 | 
         
             
                    self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         
     | 
| 8 | 
         
             
                    self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
         
     | 
| 9 | 
         
             
                    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         
     | 
| 10 | 
         
             
                    self.model.to(self.device)
         
     | 
| 11 | 
         
            -
                    self.bleu = BLEUScore()
         
     | 
| 12 | 
         
            -
                    self. 
     | 
| 13 | 
         | 
| 14 | 
         
             
                def evaluate(self, nl_input, target_code):
         
     | 
| 15 | 
         
            -
                     
     | 
| 16 | 
         
            -
                     
     | 
| 17 | 
         
            -
                         
     | 
| 18 | 
         
            -
             
     | 
| 19 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 20 | 
         | 
| 21 | 
         
            -
             
     | 
| 22 | 
         
            -
             
     | 
| 23 | 
         
            -
                    return bleu_score, meteor_score
         
     | 
| 24 | 
         | 
| 25 | 
         
            -
             
     | 
| 26 | 
         
            -
                model_name = "S-Dreamer/PyCodeT5"
         
     | 
| 27 | 
         
            -
                evaluator = CodeEvaluator(model_name)
         
     | 
| 28 | 
         | 
| 29 | 
         
            -
             
     | 
| 30 | 
         
            -
                 
     | 
| 31 | 
         
            -
             
     | 
| 32 | 
         
            -
             
     | 
| 33 | 
         
            -
                return 1
         
     | 
| 34 | 
         
            -
              else:
         
     | 
| 35 | 
         
            -
                return n * factorial(n-1)
         
     | 
| 36 | 
         
             
            """
         
     | 
| 37 | 
         
            -
                bleu_score,  
     | 
| 38 | 
         
             
                print(f"BLEU score: {bleu_score}")
         
     | 
| 39 | 
         
            -
                print(f" 
     | 
| 
         | 
|
| 1 | 
         
            +
            # evaluator.py
         
     | 
| 2 | 
         
             
            import torch
         
     | 
| 3 | 
         
            +
            from torchmetrics.text.bleu import BLEUScore
         
     | 
| 4 | 
         
            +
            from torchmetrics.text.rouge import ROUGEScore
         
     | 
| 5 | 
         
             
            from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
         
     | 
| 6 | 
         | 
| 7 | 
         
             
            class CodeEvaluator:
         
     | 
| 8 | 
         
            +
                def __init__(self, model_name="S-Dreamer/PyCodeT5"):
         
     | 
| 9 | 
         
             
                    self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         
     | 
| 10 | 
         
             
                    self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
         
     | 
| 11 | 
         
             
                    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         
     | 
| 12 | 
         
             
                    self.model.to(self.device)
         
     | 
| 13 | 
         
            +
                    self.bleu = BLEUScore(n_gram=4).to(self.device) # use GPU if possible
         
     | 
| 14 | 
         
            +
                    self.rouge = ROUGEScore().to(self.device)
         
     | 
| 15 | 
         | 
| 16 | 
         
             
                def evaluate(self, nl_input, target_code):
         
     | 
| 17 | 
         
            +
                    self.model.eval() # Set model to evaluation mode
         
     | 
| 18 | 
         
            +
                    with torch.no_grad(): # Disable gradient calculations
         
     | 
| 19 | 
         
            +
                        inputs = self.tokenizer(nl_input, return_tensors="pt").to(self.device)
         
     | 
| 20 | 
         
            +
                        outputs = self.model.generate(
         
     | 
| 21 | 
         
            +
                            **inputs,
         
     | 
| 22 | 
         
            +
                            max_length=512,
         
     | 
| 23 | 
         
            +
                            num_beams=5,
         
     | 
| 24 | 
         
            +
                            early_stopping=True,
         
     | 
| 25 | 
         
            +
                        )
         
     | 
| 26 | 
         
            +
                        generated_code = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         
     | 
| 27 | 
         | 
| 28 | 
         
            +
                        bleu_score = self.bleu(generated_code, target_code)
         
     | 
| 29 | 
         
            +
                        rouge_score = self.rouge(generated_code, target_code)
         
     | 
| 
         | 
|
| 30 | 
         | 
| 31 | 
         
            +
                    return bleu_score, rouge_score
         
     | 
| 
         | 
|
| 
         | 
|
| 32 | 
         | 
| 33 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 34 | 
         
            +
                evaluator = CodeEvaluator()
         
     | 
| 35 | 
         
            +
                nl_input = "Write a Python function to reverse a string."
         
     | 
| 36 | 
         
            +
                target_code = """def reverse_string(s):
         
     | 
| 37 | 
         
            +
                return s[::-1]
         
     | 
| 
         | 
|
| 
         | 
|
| 38 | 
         
             
            """
         
     | 
| 39 | 
         
            +
                bleu_score, rouge_score = evaluator.evaluate(nl_input, target_code)
         
     | 
| 40 | 
         
             
                print(f"BLEU score: {bleu_score}")
         
     | 
| 41 | 
         
            +
                print(f"ROUGE score: {rouge_score}")
         
     |