Spaces:

sirochild
/

mari

Sleeping

App Files Files Community

sirochild commited on Jul 24

Commit

ebc78dc

verified ·

1 Parent(s): 5a7c79c

Upload 4 files

Browse files

Files changed (2) hide show

app.py +25 -47
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -30,51 +30,16 @@ try:
     model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
     print(f"モデルファイルのダウンロード完了: {model_path}")
-    # Hugging Face Spaceでの実行時はメモリ効率の良い設定を使用
-    if os.getenv("SPACE_ID"):
-        print("Hugging Face Space環境を検出しました。メモリ効率の良い設定を使用します。")
-        try:
-            # まずGPUを使用してみる
-            swallow_model = Llama(
-                model_path=model_path,
-                n_ctx=2048,  # コンテキスト長
-                n_gpu_layers=-1,  # 可能な限りGPUを使用
-                n_threads=4,  # スレッド数を制限
-                verbose=False  # デバッグ出力を無効化
-            )
-            print("GPUを使用してモデルをロードしました")
-        except Exception as gpu_error:
-            print(f"GPUでのロードに失敗しました: {gpu_error}")
-            print("CPUモードでモデルをロードします")
-            # GPUが使用できない場合はCPUのみで実行
-            swallow_model = Llama(
-                model_path=model_path,
-                n_ctx=2048,  # コンテキスト長
-                n_gpu_layers=0,  # GPUを使用しない
-                n_threads=4,  # スレッド数を制限
-                verbose=False  # デバッグ出力を無効化
-            )
-    else:
-        # ローカル環境での実行時の設定
-        try:
-            # GPUを使用
-            swallow_model = Llama(
-                model_path=model_path,
-                n_ctx=4096,  # より長いコンテキスト長
-                n_gpu_layers=-1,  # 可能な限りGPUを使用
-                verbose=True  # デバッグ出力を有効化
-            )
-            print("GPUを使用してモデルをロードしました")
-        except Exception as gpu_error:
-            print(f"GPUでのロードに失敗しました: {gpu_error}")
-            print("CPUモードでモデルをロードします")
-            # GPUが使用できない場合はCPUのみで実行
-            swallow_model = Llama(
-                model_path=model_path,
-                n_ctx=4096,  # より長いコンテキスト長
-                n_gpu_layers=0,  # GPUを使用しない
-                verbose=True  # デバッグ出力を有効化
-            )
     print("Swallowモデルのロード完了")
     tokenizer = None  # llama-cppではtokenizerは不要
 except Exception as e:
@@ -448,8 +413,21 @@ def respond(message, chat_history, affection, history, scene_params):
     return "", chat_history, new_affection, stage_name, new_affection, new_history, final_scene_params, background_html
 # カスタムCSSを読み込む
-with open("style.css", "r") as f:
-    custom_css = f.read()
 # Gradio 5.x用のシンプルなテーマ設定
 custom_theme = gr.themes.Soft(

     model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
     print(f"モデルファイルのダウンロード完了: {model_path}")
+    # 最も安全な設定でモデルをロード（CPUのみ）
+    print("CPUモードでモデルをロードします")
+    swallow_model = Llama(
+        model_path=model_path,
+        n_ctx=2048,  # コンテキスト長
+        n_gpu_layers=0,  # GPUを使用しない
+        n_threads=4,  # スレッド数を制限
+        verbose=True  # デバッグ出力を有効化
+    )
+    print("モデルのロード完了")
     print("Swallowモデルのロード完了")
     tokenizer = None  # llama-cppではtokenizerは不要
 except Exception as e:
     return "", chat_history, new_affection, stage_name, new_affection, new_history, final_scene_params, background_html
 # カスタムCSSを読み込む
+try:
+    with open("style.css", "r") as f:
+        custom_css = f.read()
+except FileNotFoundError:
+    print("style.cssファイルが見つかりません。デフォルトのスタイルを使用します。")
+    custom_css = """
+    /* デフォルトのスタイル */
+    .chatbot {
+        background-color: rgba(255, 255, 255, 0.7) !important;
+        border-radius: 12px !important;
+        padding: 15px !important;
+        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1) !important;
+        margin-bottom: 20px !important;
+    }
+    """
 # Gradio 5.x用のシンプルなテーマ設定
 custom_theme = gr.themes.Soft(

requirements.txt CHANGED Viewed

@@ -6,4 +6,5 @@ fugashi
 unidic_lite
 transformers>=4.34.0
 protobuf>=3.20.0
-llama-cpp-python==0.2.19+cpuavx2

 unidic_lite
 transformers>=4.34.0
 protobuf>=3.20.0
+# CPUのみのバージョンを使用
+CMAKE_ARGS="-DLLAMA_CUBLAS=OFF" FORCE_CMAKE=1 llama-cpp-python==0.2.19