sailajaai's picture
Upload 25 files
7688281 verified
raw
history blame
3.52 kB
import logging
import os
import nltk
import torch
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForSequenceClassification
logger = logging.getLogger(__name__)
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize NLP tools
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))
negation_words = {"not", "never", "no", "none", "cannot", "n't"}
# SBERT Model for Similarity
sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
# Cross-Encoder for Contextual Understanding
cross_encoder_model = AutoModelForSequenceClassification.from_pretrained("cross-encoder/stsb-roberta-large")
cross_encoder_tokenizer = AutoTokenizer.from_pretrained("cross-encoder/stsb-roberta-large")
# -------------------------------
# Preprocessing & Negation
# -------------------------------
def preprocess_text(text: str):
tokens = word_tokenize(text.lower()) # Lowercase & tokenize
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words] # Remove stopwords & lemmatize
return " ".join(tokens)
def contains_negation(text: str):
tokens = set(word_tokenize(text.lower()))
return any(word in negation_words for word in tokens)
# -------------------------------
# Evaluation Function
# -------------------------------
def evaluate_answer(student_ans: str, teacher_ans: str):
"""
Returns score (0-100), and a dict breakdown.
Uses pre-loaded SBERT and Cross-Encoder models internally.
"""
student = (student_ans or "").strip()
teacher = (teacher_ans or "").strip()
if not student:
return 0.0, {"reason": "Empty answer"}
try:
# Preprocess answers
student_clean = preprocess_text(student)
teacher_clean = preprocess_text(teacher)
# SBERT similarity
emb_student = sbert_model.encode(student_clean, convert_to_tensor=True)
emb_teacher = sbert_model.encode(teacher_clean, convert_to_tensor=True)
sbert_score = util.pytorch_cos_sim(emb_student, emb_teacher).item() # 0..1
# Cross-Encoder score
inputs = cross_encoder_tokenizer(student_clean, teacher_clean, return_tensors="pt", truncation=True)
with torch.no_grad():
logits = cross_encoder_model(**inputs).logits
cross_score = torch.sigmoid(logits).item() # 0..1
# Negation handling
student_neg = contains_negation(student)
teacher_neg = contains_negation(teacher)
if student_neg != teacher_neg:
sbert_score *= 0.5
cross_score *= 0.5
negation_penalty = 0.5
else:
negation_penalty = 0.0
# Weighted final score
final = 0.4 * sbert_score + 0.6 * cross_score
final = final * (1.0 - negation_penalty)
final_pct = round(final * 100, 2)
breakdown = {
"sbert_score": round(sbert_score * 100, 2),
"cross_score": round(cross_score * 100, 2),
"negation_penalty": negation_penalty,
"final_pct": final_pct
}
return final_pct, breakdown
except Exception as e:
logger.exception("Evaluation failed")
return 0.0, {"error": str(e)}