Spaces:
Runtime error
Runtime error
Hoang Kha
commited on
Commit
·
6c510a2
1
Parent(s):
0514b67
Fix huggingface cache to /data for permission issues
Browse files- Dockerfile +2 -1
- main.py +23 -27
Dockerfile
CHANGED
|
@@ -10,7 +10,8 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 10 |
EXPOSE 7860
|
| 11 |
|
| 12 |
ENV HF_HUB_DISABLE_CACHE=1
|
|
|
|
|
|
|
| 13 |
ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1
|
| 14 |
-
ENV TRANSFORMERS_OFFLINE=0
|
| 15 |
|
| 16 |
CMD ["python", "main.py"]
|
|
|
|
| 10 |
EXPOSE 7860
|
| 11 |
|
| 12 |
ENV HF_HUB_DISABLE_CACHE=1
|
| 13 |
+
ENV TRANSFORMERS_CACHE=/dev/null
|
| 14 |
+
ENV HF_HOME=/dev/null
|
| 15 |
ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1
|
|
|
|
| 16 |
|
| 17 |
CMD ["python", "main.py"]
|
main.py
CHANGED
|
@@ -201,15 +201,14 @@ import os
|
|
| 201 |
from flask import Flask, render_template, request, jsonify
|
| 202 |
from langdetect import detect
|
| 203 |
import torch
|
| 204 |
-
import torch.nn.functional as F
|
| 205 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
| 206 |
|
| 207 |
-
#
|
| 208 |
os.environ["HF_HUB_DISABLE_CACHE"] = "1"
|
| 209 |
-
os.environ["
|
|
|
|
| 210 |
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
|
| 211 |
os.environ["TRANSFORMERS_OFFLINE"] = "0"
|
| 212 |
-
os.environ["HF_DATASETS_OFFLINE"] = "1"
|
| 213 |
os.environ["DISABLE_TELEMETRY"] = "1"
|
| 214 |
|
| 215 |
app = Flask(__name__)
|
|
@@ -220,38 +219,38 @@ EN_MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
|
|
| 220 |
|
| 221 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 222 |
|
| 223 |
-
print("🔄 Loading Vietnamese model (
|
| 224 |
vi_tokenizer = AutoTokenizer.from_pretrained(
|
| 225 |
-
VI_MODEL_NAME,
|
|
|
|
|
|
|
|
|
|
| 226 |
)
|
| 227 |
vi_model = AutoModelForSequenceClassification.from_pretrained(
|
| 228 |
-
VI_MODEL_NAME,
|
|
|
|
|
|
|
| 229 |
).to(device)
|
| 230 |
vi_model.eval()
|
| 231 |
sentiment_pipeline = pipeline("sentiment-analysis", model=vi_model, tokenizer=vi_tokenizer)
|
| 232 |
-
print("✅ Vietnamese model loaded
|
| 233 |
|
| 234 |
-
print("🔄 Loading English model (
|
| 235 |
en_tokenizer = AutoTokenizer.from_pretrained(
|
| 236 |
-
EN_MODEL_NAME,
|
|
|
|
|
|
|
| 237 |
)
|
| 238 |
en_model = AutoModelForSequenceClassification.from_pretrained(
|
| 239 |
-
EN_MODEL_NAME,
|
|
|
|
|
|
|
| 240 |
).to(device)
|
| 241 |
en_model.eval()
|
| 242 |
-
print("✅ English model loaded
|
| 243 |
|
| 244 |
# -----------------------------
|
| 245 |
-
#
|
| 246 |
-
# -----------------------------
|
| 247 |
-
vi_label_map = {
|
| 248 |
-
"POS": "Tích cực",
|
| 249 |
-
"NEG": "Tiêu cực",
|
| 250 |
-
"NEU": "Trung tính"
|
| 251 |
-
}
|
| 252 |
-
|
| 253 |
-
# -----------------------------
|
| 254 |
-
# Language detection
|
| 255 |
# -----------------------------
|
| 256 |
def detect_lang(text: str) -> str:
|
| 257 |
try:
|
|
@@ -270,14 +269,13 @@ def detect_lang(text: str) -> str:
|
|
| 270 |
# Vietnamese analysis
|
| 271 |
# -----------------------------
|
| 272 |
def analyze_vi(text: str):
|
| 273 |
-
if not text.strip():
|
| 274 |
-
return {"error": "Empty text."}
|
| 275 |
result = sentiment_pipeline(text)[0]
|
|
|
|
| 276 |
label = result["label"]
|
| 277 |
score = round(result["score"], 3)
|
| 278 |
return {
|
| 279 |
"language": "vi",
|
| 280 |
-
"label":
|
| 281 |
"english_label": label,
|
| 282 |
"score": score,
|
| 283 |
"scores": {
|
|
@@ -318,10 +316,8 @@ def analyze():
|
|
| 318 |
lang = (data.get("lang") or "auto").lower()
|
| 319 |
if not text:
|
| 320 |
return jsonify({"error": "Text is empty."}), 400
|
| 321 |
-
|
| 322 |
if lang == "auto":
|
| 323 |
lang = detect_lang(text)
|
| 324 |
-
|
| 325 |
result = analyze_vi(text) if lang == "vi" else analyze_en(text)
|
| 326 |
return jsonify({"ok": True, "input": {"text": text, "lang": lang}, "result": result})
|
| 327 |
|
|
|
|
| 201 |
from flask import Flask, render_template, request, jsonify
|
| 202 |
from langdetect import detect
|
| 203 |
import torch
|
|
|
|
| 204 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
| 205 |
|
| 206 |
+
# ⚙️ Ép Hugging Face không ghi cache, chỉ load vào RAM
|
| 207 |
os.environ["HF_HUB_DISABLE_CACHE"] = "1"
|
| 208 |
+
os.environ["TRANSFORMERS_CACHE"] = "/dev/null" # ⛔ cache về null
|
| 209 |
+
os.environ["HF_HOME"] = "/dev/null" # ⛔ home cache về null
|
| 210 |
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
|
| 211 |
os.environ["TRANSFORMERS_OFFLINE"] = "0"
|
|
|
|
| 212 |
os.environ["DISABLE_TELEMETRY"] = "1"
|
| 213 |
|
| 214 |
app = Flask(__name__)
|
|
|
|
| 219 |
|
| 220 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 221 |
|
| 222 |
+
print("🔄 Loading Vietnamese model (RAM-only mode)...")
|
| 223 |
vi_tokenizer = AutoTokenizer.from_pretrained(
|
| 224 |
+
VI_MODEL_NAME,
|
| 225 |
+
use_fast=False,
|
| 226 |
+
local_files_only=False,
|
| 227 |
+
cache_dir=None,
|
| 228 |
)
|
| 229 |
vi_model = AutoModelForSequenceClassification.from_pretrained(
|
| 230 |
+
VI_MODEL_NAME,
|
| 231 |
+
local_files_only=False,
|
| 232 |
+
cache_dir=None,
|
| 233 |
).to(device)
|
| 234 |
vi_model.eval()
|
| 235 |
sentiment_pipeline = pipeline("sentiment-analysis", model=vi_model, tokenizer=vi_tokenizer)
|
| 236 |
+
print("✅ Vietnamese model loaded successfully.")
|
| 237 |
|
| 238 |
+
print("🔄 Loading English model (RAM-only mode)...")
|
| 239 |
en_tokenizer = AutoTokenizer.from_pretrained(
|
| 240 |
+
EN_MODEL_NAME,
|
| 241 |
+
local_files_only=False,
|
| 242 |
+
cache_dir=None,
|
| 243 |
)
|
| 244 |
en_model = AutoModelForSequenceClassification.from_pretrained(
|
| 245 |
+
EN_MODEL_NAME,
|
| 246 |
+
local_files_only=False,
|
| 247 |
+
cache_dir=None,
|
| 248 |
).to(device)
|
| 249 |
en_model.eval()
|
| 250 |
+
print("✅ English model loaded successfully.")
|
| 251 |
|
| 252 |
# -----------------------------
|
| 253 |
+
# Detect language
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
# -----------------------------
|
| 255 |
def detect_lang(text: str) -> str:
|
| 256 |
try:
|
|
|
|
| 269 |
# Vietnamese analysis
|
| 270 |
# -----------------------------
|
| 271 |
def analyze_vi(text: str):
|
|
|
|
|
|
|
| 272 |
result = sentiment_pipeline(text)[0]
|
| 273 |
+
label_map = {"POS": "Tích cực", "NEG": "Tiêu cực", "NEU": "Trung tính"}
|
| 274 |
label = result["label"]
|
| 275 |
score = round(result["score"], 3)
|
| 276 |
return {
|
| 277 |
"language": "vi",
|
| 278 |
+
"label": label_map.get(label, label),
|
| 279 |
"english_label": label,
|
| 280 |
"score": score,
|
| 281 |
"scores": {
|
|
|
|
| 316 |
lang = (data.get("lang") or "auto").lower()
|
| 317 |
if not text:
|
| 318 |
return jsonify({"error": "Text is empty."}), 400
|
|
|
|
| 319 |
if lang == "auto":
|
| 320 |
lang = detect_lang(text)
|
|
|
|
| 321 |
result = analyze_vi(text) if lang == "vi" else analyze_en(text)
|
| 322 |
return jsonify({"ok": True, "input": {"text": text, "lang": lang}, "result": result})
|
| 323 |
|