Spaces:

aaryan24
/

Hate-Speech-Detector

Running

App Files Files Community

aaryan24 commited on Jun 14

Commit

0cc2ad1

verified ·

1 Parent(s): bf1acc0

Upload 7 files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
Dockerfile +34 -0
app.py +103 -0
hs_gru.h5 +3 -0
hs_gru.keras +3 -0
index.html +194 -0
requirements.txt +7 -0
tokenizerpkl_gru.pkl +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+hs_gru.keras filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+FROM python:3.10-slim
+# Set environment variables for Hugging Face cache
+ENV HF_HOME=/code/hf_cache
+ENV TRANSFORMERS_CACHE=/code/hf_cache
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# Create working directory
+WORKDIR /code
+# System dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    wget \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python packages
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip \
+ && pip install --no-cache-dir -r requirements.txt
+# Copy app code
+COPY . .
+# Create model cache directory
+RUN mkdir -p /code/hf_cache
+# Expose FastAPI port
+EXPOSE 7860
+# Start the app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from fastapi import FastAPI, Request, HTTPException
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import tensorflow as tf
+import pickle
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+from fastapi.responses import JSONResponse
+# Initialize FastAPI
+app = FastAPI()
+# Load GRU model and tokenizer
+gru_model = tf.keras.models.load_model('hs_gru.h5')
+with open('tokenizerpkl_gru.pkl', 'rb') as f:
+    gru_tokenizer = pickle.load(f)
+gru_maxlen = 100
+# Load RoBERTa model
+# Load RoBERTa model
+roberta_model_name = "facebook/roberta-hate-speech-dynabench-r4-target"
+roberta_tokenizer = AutoTokenizer.from_pretrained(roberta_model_name)
+if roberta_tokenizer.pad_token is None:
+    roberta_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+roberta_model = AutoModelForSequenceClassification.from_pretrained(roberta_model_name)
+roberta_model.resize_token_embeddings(len(roberta_tokenizer))
+#load toxigen-hatebert model
+toxigen_model_name = "tomh/toxigen_roberta"
+toxigen_tokenizer = AutoTokenizer.from_pretrained(toxigen_model_name)
+if toxigen_tokenizer.pad_token is None:
+    toxigen_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+toxigen_model = AutoModelForSequenceClassification.from_pretrained(toxigen_model_name)
+toxigen_model.resize_token_embeddings(len(toxigen_tokenizer))
+# Enable CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Mount static directory
+# app.mount("/static", StaticFiles(directory="static"), name="static")
+# Pydantic input model
+class TextInput(BaseModel):
+    text: str
+@app.get("/", response_class=HTMLResponse)
+def read_root():
+    with open("index.html", "r") as f:
+        return f.read()
+@app.get("/health")
+def health_check():
+    return {"message": "Hate Speech Detection API is running!"}
+@app.post("/predict")
+def predict_ensemble(input: TextInput):
+    try:
+        text = input.text
+        # print(f"Received input: {input.text}")
+        # ----- GRU Prediction -----
+        seq = gru_tokenizer.texts_to_sequences([text])
+        padded = pad_sequences(seq, maxlen=gru_maxlen, padding='post')
+        gru_prob = float(gru_model.predict(padded)[0][0])
+        # ----- RoBERTa Prediction -----
+        inputs_roberta = roberta_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+        with torch.no_grad():
+            logits_roberta = roberta_model(**inputs_roberta).logits
+            probs_roberta = torch.nn.functional.softmax(logits_roberta, dim=1)
+            roberta_prob = float(probs_roberta[0][1].item())
+        # -----toxigen -hatebert Prediction -----
+        inputs_toxigen = toxigen_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+        with torch.no_grad():
+            logits_toxigen = toxigen_model(**inputs_toxigen).logits
+            probs_toxigen = torch.nn.functional.softmax(logits_toxigen, dim=1)
+            toxigen_prob = float(probs_toxigen[0][1].item())
+        # ----- Weighted Ensemble -----
+        final_score = (0.3 * gru_prob) + (0.4 * roberta_prob) + (0.3 * toxigen_prob)
+        label = "Hate Speech" if final_score > 0.5 else "Not Hate Speech"
+        return {
+            # "text": text,
+            "gru_prob": round(gru_prob, 4),
+            "roberta_prob": round(roberta_prob, 4),
+            "toxigen_prob": round(toxigen_prob, 4),
+            "final_score": round(final_score, 4),
+            "prediction": label
+        }
+    except Exception as e:
+        print(f"Error during prediction: {str(e)}")
+        return JSONResponse(status_code=500, content={"detail": str(e)})

hs_gru.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d64d7840832a93d75389d7e15450bcf1914b6b5be1b23dd55543deb131c11528
+size 26368672

hs_gru.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7699a8f4fbe4194d72813727483e91cccb8b3ccdcf7d234a7811bff043c0d4e4
+size 26367562

index.html ADDED Viewed

	@@ -0,0 +1,194 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <title>Hate Speech Detection</title>
+  <style>
+    body {
+      font-family: Arial, sans-serif;
+      background: #f9f9f9;
+      color: #000;
+      margin: 0;
+      padding: 20px;
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+    }
+    body.dark-mode {
+      background: #272626;
+      color: #fff;
+    }
+    h1 {
+      font-size: 40px;
+      margin-bottom: 10px;
+    }
+    .subtext {
+      font-size: 16px;
+      margin-bottom: 40px;
+      text-align: center;
+      max-width: 800px;
+    }
+    .container {
+      display: flex;
+      justify-content: center;
+      gap: 40px;
+      width: 100%;
+      max-width: 1000px;
+    }
+    .left-panel, .right-panel {
+      flex: 1;
+      display: flex;
+      flex-direction: column;
+    }
+    textarea {
+      width: 100%;
+      height: 140px;
+      padding: 10px;
+      font-size: 16px;
+    }
+    .buttons {
+      display: flex;
+      justify-content: space-between;
+      width: 100%;
+      margin-top: 10px;
+    }
+    .buttons button {
+      width: 49.5%;
+      padding: 12px;
+      font-size: 16px;
+      border: none;
+      border-radius: 4px;
+      cursor: pointer;
+    }
+    .submit-btn {
+      background: #4CAF50;
+      color: white;
+    }
+    .clear-btn {
+      background: #45a049;
+      color: white;
+    }
+    .share-btn {
+      width: 100%;
+      padding: 14px;
+      font-size: 16px;
+      background: #007BFF;
+      color: white;
+      border: none;
+      border-radius: 4px;
+      margin-top: auto;
+    }
+    .output {
+      background: white;
+      padding: 15px;
+      border-radius: 8px;
+      box-shadow: 0 0 10px rgba(0,0,0,0.1);
+      font-size: 16px;
+      min-height: 120px;
+      margin-bottom: 20px;
+    }
+    .dark-mode .output {
+      background: #1e1e1e;
+      color: #fff;
+    }
+    .mode-toggle {
+      margin-top: 60px;
+      font-size: 14px;
+    }
+  </style>
+</head>
+<body>
+  <h1 id="page-title">Hate Speech Detection</h1>
+  <p class="subtext">
+    This tool uses 3 models (GloVe-based Deep Learning (GRU), RoBERTa (Transformer), ToxiGen-HateBERT (Transformer))
+    to classify hate speech using an ensemble method.
+  </p>
+  <div class="container">
+    <div class="left-panel">
+      <form id="hs-form">
+        <textarea id="text-input" placeholder="Enter text here..."></textarea>
+        <div class="buttons">
+          <button type="button" class="clear-btn" onclick="clearText()">Clear Text</button>
+          <button type="submit" class="submit-btn">Submit</button>
+        </div>
+      </form>
+    </div>
+    <div class="right-panel">
+      <div class="output" id="output" style="display: none;"></div>
+      <button class="share-btn" onclick="shareText()">Share via Link</button>
+    </div>
+  </div>
+  <div class="mode-toggle">
+    <label><input type="checkbox" id="mode-toggle"> Dark Mode</label>
+  </div>
+  <script>
+    const outputDiv = document.getElementById("output");
+    const pageTitle = document.getElementById("page-title");
+    document.getElementById("hs-form").addEventListener("submit", async function (e) {
+      e.preventDefault();
+      const text = document.getElementById("text-input").value;
+      const response = await fetch("/predict", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ text: text }),
+      });
+      const result = await response.json();
+      outputDiv.style.display = "block";
+      outputDiv.innerHTML = `
+        <strong>Model 1:</strong> ${result.gru_prob}<br>
+        <strong>Model 2:</strong> ${result.roberta_prob}<br>
+        <strong>Model 3:</strong> ${result.toxigen_prob}<br>
+        <strong>Prediction:</strong> ${result.prediction}
+      `;
+    });
+    function clearText() {
+      document.getElementById("text-input").value = "";
+      outputDiv.style.display = "none";
+      outputDiv.innerHTML = "";
+    }
+    function shareText() {
+      const text = document.getElementById("text-input").value;
+      const shareUrl = `${window.location.href}?text=${encodeURIComponent(text)}`;
+      navigator.clipboard.writeText(shareUrl);
+      alert("Sharable link copied to clipboard!");
+    }
+    document.getElementById("mode-toggle").addEventListener("change", function () {
+      document.body.classList.toggle("dark-mode", this.checked);
+      pageTitle.style.color = this.checked ? "white" : "black";
+    });
+    // On load: apply shared text if present
+    window.onload = function () {
+      const urlParams = new URLSearchParams(window.location.search);
+      const sharedText = urlParams.get("text");
+      if (sharedText) {
+        document.getElementById("text-input").value = decodeURIComponent(sharedText);
+      }
+    };
+  </script>
+</body>
+</html>

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+transformers
+torch
+tensorflow
+pydantic
+python-multipart

tokenizerpkl_gru.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f933c6a038133f7d49d3cda5e5e7a1c930813dafdde2ddd0f6dfc0037d2c8108
+size 5651170