Spaces:

Luigi
/

ZeroGPU-LLM-Inference

Running

App Files Files Community

Luigi commited on Apr 10

Commit

37ee1f3

1 Parent(s): 3190ad6

improve storage management

Browse files

Files changed (1) hide show

app.py +29 -16

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import os
 import gc
 # Available models
 MODELS = {
@@ -16,10 +17,10 @@ MODELS = {
         "filename": "gemma-3-4b-it-Q5_K_M.gguf",
         "description": "Gemma 3 4B IT (Q5_K_M)"
     },
-    "Phi-4-mini-Instruct (Q5_K_M)": {
         "repo_id": "unsloth/Phi-4-mini-instruct-GGUF",
-        "filename": "Phi-4-mini-instruct-Q5_K_M.gguf",
-        "description": "Phi-4 Mini Instruct (Q5_K_M)"
     },
 }
@@ -37,30 +38,42 @@ with st.sidebar:
 selected_model = MODELS[selected_model_name]
 model_path = os.path.join("models", selected_model["filename"])
-# Initialize model cache state
 if "model_name" not in st.session_state:
     st.session_state.model_name = None
 if "llm" not in st.session_state:
     st.session_state.llm = None
-# Download model if needed
-if not os.path.exists(model_path):
-    hf_hub_download(
-        repo_id=selected_model["repo_id"],
-        filename=selected_model["filename"],
-        local_dir="./models",
-        local_dir_use_symlinks=False,
-    )
-# Load model only if it changed
 if st.session_state.model_name != selected_model_name:
     if st.session_state.llm is not None:
-        # Clean up old model to free memory
         del st.session_state.llm
         gc.collect()
     st.session_state.llm = Llama(
         model_path=model_path,
-        n_ctx=1024,  # Reduced for RAM safety
         n_threads=2,
         n_threads_batch=2,
         n_batch=4,

 from huggingface_hub import hf_hub_download
 import os
 import gc
+import shutil
 # Available models
 MODELS = {
         "filename": "gemma-3-4b-it-Q5_K_M.gguf",
         "description": "Gemma 3 4B IT (Q5_K_M)"
     },
+    "Phi-4-mini-Instruct (Q4_K_M)": {
         "repo_id": "unsloth/Phi-4-mini-instruct-GGUF",
+        "filename": "Phi-4-mini-instruct-Q4_K_M.gguf",
+        "description": "Phi-4 Mini Instruct (Q4_K_M)"
     },
 }
 selected_model = MODELS[selected_model_name]
 model_path = os.path.join("models", selected_model["filename"])
+# Make sure models dir exists
+os.makedirs("models", exist_ok=True)
+# Clear old models if new one isn't present
+if not os.path.exists(model_path):
+    for file in os.listdir("models"):
+        if file.endswith(".gguf"):
+            try:
+                os.remove(os.path.join("models", file))
+            except Exception as e:
+                st.warning(f"Failed to delete {file}: {e}")
+    # Download the selected model
+    with st.spinner(f"Downloading {selected_model['filename']}..."):
+        hf_hub_download(
+            repo_id=selected_model["repo_id"],
+            filename=selected_model["filename"],
+            local_dir="./models",
+            local_dir_use_symlinks=False,
+        )
+# Init state
 if "model_name" not in st.session_state:
     st.session_state.model_name = None
 if "llm" not in st.session_state:
     st.session_state.llm = None
+# Load model if changed
 if st.session_state.model_name != selected_model_name:
     if st.session_state.llm is not None:
         del st.session_state.llm
         gc.collect()
     st.session_state.llm = Llama(
         model_path=model_path,
+        n_ctx=1024,
         n_threads=2,
         n_threads_batch=2,
         n_batch=4,