Spaces:
Running
Running
| from fastapi import FastAPI, Request, HTTPException | |
| from fastapi.responses import HTMLResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| import tensorflow as tf | |
| import pickle | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| from fastapi.responses import JSONResponse | |
| # Initialize FastAPI | |
| app = FastAPI() | |
| # Load GRU model and tokenizer | |
| gru_model = tf.keras.models.load_model('hs_gru.h5') | |
| with open('tokenizerpkl_gru.pkl', 'rb') as f: | |
| gru_tokenizer = pickle.load(f) | |
| gru_maxlen = 100 | |
| # Load RoBERTa model | |
| # Load RoBERTa model | |
| roberta_model_name = "facebook/roberta-hate-speech-dynabench-r4-target" | |
| roberta_tokenizer = AutoTokenizer.from_pretrained(roberta_model_name) | |
| if roberta_tokenizer.pad_token is None: | |
| roberta_tokenizer.add_special_tokens({'pad_token': '[PAD]'}) | |
| roberta_model = AutoModelForSequenceClassification.from_pretrained(roberta_model_name) | |
| roberta_model.resize_token_embeddings(len(roberta_tokenizer)) | |
| #load toxigen-hatebert model | |
| toxigen_model_name = "tomh/toxigen_roberta" | |
| toxigen_tokenizer = AutoTokenizer.from_pretrained(toxigen_model_name) | |
| if toxigen_tokenizer.pad_token is None: | |
| toxigen_tokenizer.add_special_tokens({'pad_token': '[PAD]'}) | |
| toxigen_model = AutoModelForSequenceClassification.from_pretrained(toxigen_model_name) | |
| toxigen_model.resize_token_embeddings(len(toxigen_tokenizer)) | |
| # Enable CORS | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Mount static directory | |
| # app.mount("/static", StaticFiles(directory="static"), name="static") | |
| # Pydantic input model | |
| class TextInput(BaseModel): | |
| text: str | |
| def read_root(): | |
| with open("index.html", "r") as f: | |
| return f.read() | |
| def health_check(): | |
| return {"message": "Hate Speech Detection API is running!"} | |
| def predict_ensemble(input: TextInput): | |
| try: | |
| text = input.text | |
| # print(f"Received input: {input.text}") | |
| # ----- GRU Prediction ----- | |
| seq = gru_tokenizer.texts_to_sequences([text]) | |
| padded = pad_sequences(seq, maxlen=gru_maxlen, padding='post') | |
| gru_prob = float(gru_model.predict(padded)[0][0]) | |
| # ----- RoBERTa Prediction ----- | |
| inputs_roberta = roberta_tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
| with torch.no_grad(): | |
| logits_roberta = roberta_model(**inputs_roberta).logits | |
| probs_roberta = torch.nn.functional.softmax(logits_roberta, dim=1) | |
| roberta_prob = float(probs_roberta[0][1].item()) | |
| # -----toxigen -hatebert Prediction ----- | |
| inputs_toxigen = toxigen_tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
| with torch.no_grad(): | |
| logits_toxigen = toxigen_model(**inputs_toxigen).logits | |
| probs_toxigen = torch.nn.functional.softmax(logits_toxigen, dim=1) | |
| toxigen_prob = float(probs_toxigen[0][1].item()) | |
| # ----- Weighted Ensemble ----- | |
| final_score = (0.3 * gru_prob) + (0.4 * roberta_prob) + (0.3 * toxigen_prob) | |
| label = "Hate Speech" if final_score > 0.5 else "Not Hate Speech" | |
| return { | |
| # "text": text, | |
| "gru_prob": round(gru_prob, 4), | |
| "roberta_prob": round(roberta_prob, 4), | |
| "toxigen_prob": round(toxigen_prob, 4), | |
| "final_score": round(final_score, 4), | |
| "prediction": label | |
| } | |
| except Exception as e: | |
| print(f"Error during prediction: {str(e)}") | |
| return JSONResponse(status_code=500, content={"detail": str(e)}) | |