Spaces:
Runtime error
Runtime error
| # import os | |
| # from flask import Flask, render_template, request, jsonify | |
| # from langdetect import detect | |
| # import torch | |
| # import torch.nn.functional as F | |
| # from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline | |
| # os.environ["HF_HOME"] = "/data/huggingface" | |
| # os.environ["TRANSFORMERS_CACHE"] = "/data/huggingface" | |
| # os.makedirs("/data/huggingface", exist_ok=True) | |
| # os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1" | |
| # os.environ["TRANSFORMERS_OFFLINE"] = "0" | |
| # os.environ["HF_HUB_DISABLE_CACHE"] = "1" | |
| # app = Flask(__name__) | |
| # # --------- Models ---------- | |
| # VI_MODEL_NAME = "wonrax/phobert-base-vietnamese-sentiment" | |
| # EN_MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english" | |
| # device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # # Vietnamese model | |
| # # vi_tokenizer = AutoTokenizer.from_pretrained(VI_MODEL_NAME, use_fast=False) | |
| # # vi_model = AutoModelForSequenceClassification.from_pretrained(VI_MODEL_NAME).to(device) | |
| # # vi_model.eval() | |
| # # vi_tokenizer = AutoTokenizer.from_pretrained(VI_MODEL_NAME, use_fast=False) | |
| # # vi_model = AutoModelForSequenceClassification.from_pretrained(VI_MODEL_NAME) | |
| # # vi_model.eval() | |
| # # sentiment_pipeline = pipeline("sentiment-analysis", model=vi_model, tokenizer=vi_tokenizer) | |
| # # # English model | |
| # # en_tokenizer = AutoTokenizer.from_pretrained(EN_MODEL_NAME) | |
| # # en_model = AutoModelForSequenceClassification.from_pretrained(EN_MODEL_NAME).to(device) | |
| # # en_model.eval() | |
| # print("Loading Vietnamese model from Hugging Face Hub (no cache)...") | |
| # vi_tokenizer = AutoTokenizer.from_pretrained(VI_MODEL_NAME, use_fast=False, local_files_only=False) | |
| # vi_model = AutoModelForSequenceClassification.from_pretrained(VI_MODEL_NAME, local_files_only=False) | |
| # vi_model.eval() | |
| # sentiment_pipeline = pipeline("sentiment-analysis", model=vi_model, tokenizer=vi_tokenizer) | |
| # print("Loading English model from Hugging Face Hub (no cache)...") | |
| # en_tokenizer = AutoTokenizer.from_pretrained(EN_MODEL_NAME, local_files_only=False) | |
| # en_model = AutoModelForSequenceClassification.from_pretrained(EN_MODEL_NAME, local_files_only=False) | |
| # en_model.eval() | |
| # # Label mapping cho PhoBERT | |
| # vi_label_map = { | |
| # 0: ("NEGATIVE", "Tiêu cực"), | |
| # 1: ("NEUTRAL", "Trung tính"), | |
| # 2: ("POSITIVE", "Tích cực") | |
| # } | |
| # # Label mapping cho tiếng Anh | |
| # en_label_map = { | |
| # 0: ("NEGATIVE", "Negative"), | |
| # 1: ("POSITIVE", "Positive") | |
| # } | |
| # # ----------------------------- | |
| # # Ngôn ngữ nhận diện | |
| # # ----------------------------- | |
| # def detect_lang(text: str) -> str: | |
| # try: | |
| # lang = detect(text) | |
| # if lang.startswith("vi"): | |
| # return "vi" | |
| # elif lang.startswith("en"): | |
| # return "en" | |
| # else: | |
| # if any(ch in text for ch in "ăâđêôơưáàạảãấầậẩẫắằặẳẵéèẹẻẽếềệểễóòọỏõốồộổỗớờợởỡíìịỉĩúùụủũứừựửữýỳỵỷỹ"): | |
| # return "vi" | |
| # return "en" | |
| # except Exception: | |
| # if any(ch in text for ch in "ăâđêôơưáàạảãấầậẩẫắằặẳẵéèẹẻẽếềệểễóòọỏõốồộổỗớờợởỡíìịỉĩúùụủũứừựửữýỳỵỷỹ"): | |
| # return "vi" | |
| # return "en" | |
| # # ----------------------------- | |
| # # Phân tích tiếng Việt (PhoBERT) | |
| # # ----------------------------- | |
| # # def analyze_vi(text: str): | |
| # # inputs = vi_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device) | |
| # # with torch.no_grad(): | |
| # # outputs = vi_model(**inputs) | |
| # # logits = outputs.logits.squeeze(0) | |
| # # probs = torch.softmax(logits, dim=-1) | |
| # # label_idx = int(torch.argmax(probs).item()) | |
| # # eng_label, vi_label = vi_label_map[label_idx] | |
| # # confidence = float(probs[label_idx].item()) | |
| # # scores = { | |
| # # vi_label_map[i][1]: round(float(probs[i].item()), 3) for i in range(3) | |
| # # } | |
| # # return { | |
| # # "language": "vi", | |
| # # "label": vi_label, | |
| # # "english_label": eng_label, | |
| # # "score": round(confidence, 3), | |
| # # "scores": scores | |
| # # } | |
| # def analyze_vi(text: str): | |
| # if not text.strip(): | |
| # return {"error": "Text is empty."} | |
| # # Dùng pipeline của transformers | |
| # result = sentiment_pipeline(text)[0] | |
| # label = result["label"] | |
| # score = round(result["score"], 3) | |
| # # Map nhãn tiếng Việt | |
| # label_map = { | |
| # "POS": "Tích cực", | |
| # "NEG": "Tiêu cực", | |
| # "NEU": "Trung tính" | |
| # } | |
| # vi_label = label_map.get(label, label) | |
| # # Trả kết quả tương thích với frontend | |
| # return { | |
| # "language": "vi", | |
| # "label": vi_label, | |
| # "english_label": label, # Giữ nhãn gốc POS/NEG/NEU | |
| # "score": score, | |
| # "scores": { | |
| # "Tích cực": score if label == "POS" else 0.0, | |
| # "Trung tính": score if label == "NEU" else 0.0, | |
| # "Tiêu cực": score if label == "NEG" else 0.0 | |
| # } | |
| # } | |
| # # ----------------------------- | |
| # # Phân tích tiếng Anh | |
| # # ----------------------------- | |
| # def analyze_en(text: str): | |
| # inputs = en_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device) | |
| # with torch.no_grad(): | |
| # outputs = en_model(**inputs) | |
| # logits = outputs.logits.squeeze(0) | |
| # probs = torch.softmax(logits, dim=-1) | |
| # label_idx = int(torch.argmax(probs).item()) | |
| # eng_label, vi_label = en_label_map[label_idx] | |
| # confidence = float(probs[label_idx].item()) | |
| # scores = { | |
| # en_label_map[i][1]: round(float(probs[i].item()), 3) for i in range(2) | |
| # } | |
| # return { | |
| # "language": "en", | |
| # "label": vi_label, # Giữ English, có thể đổi sang tiếng Việt nếu muốn | |
| # "english_label": eng_label, | |
| # "score": round(confidence, 3), | |
| # "scores": scores | |
| # } | |
| # # ----------------------------- | |
| # # Flask routes | |
| # # ----------------------------- | |
| # @app.route("/", methods=["GET"]) | |
| # def home(): | |
| # return render_template("index.html") | |
| # @app.route("/analyze", methods=["POST"]) | |
| # def analyze(): | |
| # data = request.get_json(force=True) | |
| # text = (data.get("text") or "").strip() | |
| # lang = (data.get("lang") or "auto").lower() | |
| # if not text: | |
| # return jsonify({"error": "Text is empty."}), 400 | |
| # if lang == "auto": | |
| # lang = detect_lang(text) | |
| # if lang == "vi": | |
| # result = analyze_vi(text) | |
| # else: | |
| # result = analyze_en(text) | |
| # return jsonify({ | |
| # "ok": True, | |
| # "input": {"text": text, "lang": lang}, | |
| # "result": result | |
| # }) | |
| # if __name__ == "__main__": | |
| # port = int(os.environ.get("PORT", 7860)) | |
| # app.run(host="0.0.0.0", port=port) | |
| import os | |
| from flask import Flask, render_template, request, jsonify | |
| from langdetect import detect | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline | |
| # ⚙️ Ép Hugging Face không ghi cache, chỉ load vào RAM | |
| os.environ["HF_HUB_DISABLE_CACHE"] = "1" | |
| os.environ["TRANSFORMERS_CACHE"] = "/dev/null" # ⛔ cache về null | |
| os.environ["HF_HOME"] = "/dev/null" # ⛔ home cache về null | |
| os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1" | |
| os.environ["TRANSFORMERS_OFFLINE"] = "0" | |
| os.environ["DISABLE_TELEMETRY"] = "1" | |
| app = Flask(__name__) | |
| # --------- Models ---------- | |
| VI_MODEL_NAME = "wonrax/phobert-base-vietnamese-sentiment" | |
| EN_MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print("🔄 Loading Vietnamese model (RAM-only mode)...") | |
| vi_tokenizer = AutoTokenizer.from_pretrained( | |
| VI_MODEL_NAME, | |
| use_fast=False, | |
| local_files_only=False, | |
| cache_dir=None, | |
| ) | |
| vi_model = AutoModelForSequenceClassification.from_pretrained( | |
| VI_MODEL_NAME, | |
| local_files_only=False, | |
| cache_dir=None, | |
| ).to(device) | |
| vi_model.eval() | |
| sentiment_pipeline = pipeline("sentiment-analysis", model=vi_model, tokenizer=vi_tokenizer) | |
| print("✅ Vietnamese model loaded successfully.") | |
| print("🔄 Loading English model (RAM-only mode)...") | |
| en_tokenizer = AutoTokenizer.from_pretrained( | |
| EN_MODEL_NAME, | |
| local_files_only=False, | |
| cache_dir=None, | |
| ) | |
| en_model = AutoModelForSequenceClassification.from_pretrained( | |
| EN_MODEL_NAME, | |
| local_files_only=False, | |
| cache_dir=None, | |
| ).to(device) | |
| en_model.eval() | |
| print("✅ English model loaded successfully.") | |
| # ----------------------------- | |
| # Detect language | |
| # ----------------------------- | |
| def detect_lang(text: str) -> str: | |
| try: | |
| lang = detect(text) | |
| if lang.startswith("vi"): | |
| return "vi" | |
| elif lang.startswith("en"): | |
| return "en" | |
| except Exception: | |
| pass | |
| if any(ch in text for ch in "ăâđêôơưáàạảãấầậẩẫắằặẳẵéèẹẻẽếềệểễóòọỏõốồộổỗớờợởỡíìịỉĩúùụủũứừựửữýỳỵỷỹ"): | |
| return "vi" | |
| return "en" | |
| # ----------------------------- | |
| # Vietnamese analysis | |
| # ----------------------------- | |
| def analyze_vi(text: str): | |
| result = sentiment_pipeline(text)[0] | |
| label_map = {"POS": "Tích cực", "NEG": "Tiêu cực", "NEU": "Trung tính"} | |
| label = result["label"] | |
| score = round(result["score"], 3) | |
| return { | |
| "language": "vi", | |
| "label": label_map.get(label, label), | |
| "english_label": label, | |
| "score": score, | |
| "scores": { | |
| "Tích cực": score if label == "POS" else 0.0, | |
| "Trung tính": score if label == "NEU" else 0.0, | |
| "Tiêu cực": score if label == "NEG" else 0.0, | |
| }, | |
| } | |
| # ----------------------------- | |
| # English analysis | |
| # ----------------------------- | |
| def analyze_en(text: str): | |
| inputs = en_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device) | |
| with torch.no_grad(): | |
| logits = en_model(**inputs).logits.squeeze(0) | |
| probs = torch.softmax(logits, dim=-1) | |
| label_idx = int(torch.argmax(probs)) | |
| labels = ["Negative", "Positive"] | |
| return { | |
| "language": "en", | |
| "label": labels[label_idx], | |
| "score": round(float(probs[label_idx]), 3), | |
| "scores": {labels[i]: round(float(probs[i]), 3) for i in range(2)}, | |
| } | |
| # ----------------------------- | |
| # Flask routes | |
| # ----------------------------- | |
| def home(): | |
| return render_template("index.html") | |
| def analyze(): | |
| data = request.get_json(force=True) | |
| text = (data.get("text") or "").strip() | |
| lang = (data.get("lang") or "auto").lower() | |
| if not text: | |
| return jsonify({"error": "Text is empty."}), 400 | |
| if lang == "auto": | |
| lang = detect_lang(text) | |
| result = analyze_vi(text) if lang == "vi" else analyze_en(text) | |
| return jsonify({"ok": True, "input": {"text": text, "lang": lang}, "result": result}) | |
| if __name__ == "__main__": | |
| port = int(os.environ.get("PORT", 7860)) | |
| app.run(host="0.0.0.0", port=port) | |