Spaces:
Sleeping
Sleeping
| import logging | |
| import os | |
| import nltk | |
| import torch | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| from nltk.tokenize import word_tokenize | |
| from sentence_transformers import SentenceTransformer, util | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| logger = logging.getLogger(__name__) | |
| # Download necessary NLTK resources | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| # Initialize NLP tools | |
| lemmatizer = WordNetLemmatizer() | |
| stop_words = set(stopwords.words("english")) | |
| negation_words = {"not", "never", "no", "none", "cannot", "n't"} | |
| # SBERT Model for Similarity | |
| sbert_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Cross-Encoder for Contextual Understanding | |
| cross_encoder_model = AutoModelForSequenceClassification.from_pretrained("cross-encoder/stsb-roberta-large") | |
| cross_encoder_tokenizer = AutoTokenizer.from_pretrained("cross-encoder/stsb-roberta-large") | |
| # ------------------------------- | |
| # Preprocessing & Negation | |
| # ------------------------------- | |
| def preprocess_text(text: str): | |
| tokens = word_tokenize(text.lower()) # Lowercase & tokenize | |
| tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words] # Remove stopwords & lemmatize | |
| return " ".join(tokens) | |
| def contains_negation(text: str): | |
| tokens = set(word_tokenize(text.lower())) | |
| return any(word in negation_words for word in tokens) | |
| # ------------------------------- | |
| # Evaluation Function | |
| # ------------------------------- | |
| def evaluate_answer(student_ans: str, teacher_ans: str): | |
| """ | |
| Returns score (0-100), and a dict breakdown. | |
| Uses pre-loaded SBERT and Cross-Encoder models internally. | |
| """ | |
| student = (student_ans or "").strip() | |
| teacher = (teacher_ans or "").strip() | |
| if not student: | |
| return 0.0, {"reason": "Empty answer"} | |
| try: | |
| # Preprocess answers | |
| student_clean = preprocess_text(student) | |
| teacher_clean = preprocess_text(teacher) | |
| # SBERT similarity | |
| emb_student = sbert_model.encode(student_clean, convert_to_tensor=True) | |
| emb_teacher = sbert_model.encode(teacher_clean, convert_to_tensor=True) | |
| sbert_score = util.pytorch_cos_sim(emb_student, emb_teacher).item() # 0..1 | |
| # Cross-Encoder score | |
| inputs = cross_encoder_tokenizer(student_clean, teacher_clean, return_tensors="pt", truncation=True) | |
| with torch.no_grad(): | |
| logits = cross_encoder_model(**inputs).logits | |
| cross_score = torch.sigmoid(logits).item() # 0..1 | |
| # Negation handling | |
| student_neg = contains_negation(student) | |
| teacher_neg = contains_negation(teacher) | |
| if student_neg != teacher_neg: | |
| sbert_score *= 0.5 | |
| cross_score *= 0.5 | |
| negation_penalty = 0.5 | |
| else: | |
| negation_penalty = 0.0 | |
| # Weighted final score | |
| final = 0.4 * sbert_score + 0.6 * cross_score | |
| final = final * (1.0 - negation_penalty) | |
| final_pct = round(final * 100, 2) | |
| breakdown = { | |
| "sbert_score": round(sbert_score * 100, 2), | |
| "cross_score": round(cross_score * 100, 2), | |
| "negation_penalty": negation_penalty, | |
| "final_pct": final_pct | |
| } | |
| return final_pct, breakdown | |
| except Exception as e: | |
| logger.exception("Evaluation failed") | |
| return 0.0, {"error": str(e)} | |