Spaces:

chauhoangkha
/

sentiment-analysis-app

Runtime error

App Files Files Community

Hoang Kha commited on Oct 7

Commit

6c510a2

1 Parent(s): 0514b67

Fix huggingface cache to /data for permission issues

Browse files

Files changed (2) hide show

Dockerfile +2 -1
main.py +23 -27

Dockerfile CHANGED Viewed

@@ -10,7 +10,8 @@ RUN pip install --no-cache-dir -r requirements.txt
 EXPOSE 7860
 ENV HF_HUB_DISABLE_CACHE=1
 ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1
-ENV TRANSFORMERS_OFFLINE=0
 CMD ["python", "main.py"]

 EXPOSE 7860
 ENV HF_HUB_DISABLE_CACHE=1
+ENV TRANSFORMERS_CACHE=/dev/null
+ENV HF_HOME=/dev/null
 ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1
 CMD ["python", "main.py"]

main.py CHANGED Viewed

@@ -201,15 +201,14 @@ import os
 from flask import Flask, render_template, request, jsonify
 from langdetect import detect
 import torch
-import torch.nn.functional as F
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-# ✅ Tắt toàn bộ cache và ghi đĩa
 os.environ["HF_HUB_DISABLE_CACHE"] = "1"
-os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 os.environ["TRANSFORMERS_OFFLINE"] = "0"
-os.environ["HF_DATASETS_OFFLINE"] = "1"
 os.environ["DISABLE_TELEMETRY"] = "1"
 app = Flask(__name__)
@@ -220,38 +219,38 @@ EN_MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-print("🔄 Loading Vietnamese model (memory-only mode)...")
 vi_tokenizer = AutoTokenizer.from_pretrained(
-    VI_MODEL_NAME, use_fast=False, cache_dir=None, local_files_only=False
 )
 vi_model = AutoModelForSequenceClassification.from_pretrained(
-    VI_MODEL_NAME, cache_dir=None, local_files_only=False
 ).to(device)
 vi_model.eval()
 sentiment_pipeline = pipeline("sentiment-analysis", model=vi_model, tokenizer=vi_tokenizer)
-print("✅ Vietnamese model loaded!")
-print("🔄 Loading English model (memory-only mode)...")
 en_tokenizer = AutoTokenizer.from_pretrained(
-    EN_MODEL_NAME, cache_dir=None, local_files_only=False
 )
 en_model = AutoModelForSequenceClassification.from_pretrained(
-    EN_MODEL_NAME, cache_dir=None, local_files_only=False
 ).to(device)
 en_model.eval()
-print("✅ English model loaded!")
 # -----------------------------
-# Label mapping
-# -----------------------------
-vi_label_map = {
-    "POS": "Tích cực",
-    "NEG": "Tiêu cực",
-    "NEU": "Trung tính"
-}
-# -----------------------------
-# Language detection
 # -----------------------------
 def detect_lang(text: str) -> str:
     try:
@@ -270,14 +269,13 @@ def detect_lang(text: str) -> str:
 # Vietnamese analysis
 # -----------------------------
 def analyze_vi(text: str):
-    if not text.strip():
-        return {"error": "Empty text."}
     result = sentiment_pipeline(text)[0]
     label = result["label"]
     score = round(result["score"], 3)
     return {
         "language": "vi",
-        "label": vi_label_map.get(label, label),
         "english_label": label,
         "score": score,
         "scores": {
@@ -318,10 +316,8 @@ def analyze():
     lang = (data.get("lang") or "auto").lower()
     if not text:
         return jsonify({"error": "Text is empty."}), 400
     if lang == "auto":
         lang = detect_lang(text)
     result = analyze_vi(text) if lang == "vi" else analyze_en(text)
     return jsonify({"ok": True, "input": {"text": text, "lang": lang}, "result": result})

 from flask import Flask, render_template, request, jsonify
 from langdetect import detect
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+# ⚙️ Ép Hugging Face không ghi cache, chỉ load vào RAM
 os.environ["HF_HUB_DISABLE_CACHE"] = "1"
+os.environ["TRANSFORMERS_CACHE"] = "/dev/null"        # ⛔ cache về null
+os.environ["HF_HOME"] = "/dev/null"                   # ⛔ home cache về null
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 os.environ["TRANSFORMERS_OFFLINE"] = "0"
 os.environ["DISABLE_TELEMETRY"] = "1"
 app = Flask(__name__)
 device = "cuda" if torch.cuda.is_available() else "cpu"
+print("🔄 Loading Vietnamese model (RAM-only mode)...")
 vi_tokenizer = AutoTokenizer.from_pretrained(
+    VI_MODEL_NAME,
+    use_fast=False,
+    local_files_only=False,
+    cache_dir=None,
 )
 vi_model = AutoModelForSequenceClassification.from_pretrained(
+    VI_MODEL_NAME,
+    local_files_only=False,
+    cache_dir=None,
 ).to(device)
 vi_model.eval()
 sentiment_pipeline = pipeline("sentiment-analysis", model=vi_model, tokenizer=vi_tokenizer)
+print("✅ Vietnamese model loaded successfully.")
+print("🔄 Loading English model (RAM-only mode)...")
 en_tokenizer = AutoTokenizer.from_pretrained(
+    EN_MODEL_NAME,
+    local_files_only=False,
+    cache_dir=None,
 )
 en_model = AutoModelForSequenceClassification.from_pretrained(
+    EN_MODEL_NAME,
+    local_files_only=False,
+    cache_dir=None,
 ).to(device)
 en_model.eval()
+print("✅ English model loaded successfully.")
 # -----------------------------
+# Detect language
 # -----------------------------
 def detect_lang(text: str) -> str:
     try:
 # Vietnamese analysis
 # -----------------------------
 def analyze_vi(text: str):
     result = sentiment_pipeline(text)[0]
+    label_map = {"POS": "Tích cực", "NEG": "Tiêu cực", "NEU": "Trung tính"}
     label = result["label"]
     score = round(result["score"], 3)
     return {
         "language": "vi",
+        "label": label_map.get(label, label),
         "english_label": label,
         "score": score,
         "scores": {
     lang = (data.get("lang") or "auto").lower()
     if not text:
         return jsonify({"error": "Text is empty."}), 400
     if lang == "auto":
         lang = detect_lang(text)
     result = analyze_vi(text) if lang == "vi" else analyze_en(text)
     return jsonify({"ok": True, "input": {"text": text, "lang": lang}, "result": result})