ZeroGPU-LLM-Inference

Running

App Files Files Community

Luigi commited on Apr 10

Commit

0813164

1 Parent(s): 37ee1f3

improve model management

Browse files

Files changed (1) hide show

app.py +44 -27

app.py CHANGED Viewed

@@ -12,10 +12,10 @@ MODELS = {
         "filename": "qwen2.5-7b-instruct-q2_k.gguf",
         "description": "Qwen2.5-7B Instruct (Q2_K)"
     },
-    "Gemma-3-4B-IT (Q5_K_M)": {
         "repo_id": "unsloth/gemma-3-4b-it-GGUF",
-        "filename": "gemma-3-4b-it-Q5_K_M.gguf",
-        "description": "Gemma 3 4B IT (Q5_K_M)"
     },
     "Phi-4-mini-Instruct (Q4_K_M)": {
         "repo_id": "unsloth/Phi-4-mini-instruct-GGUF",
@@ -38,19 +38,37 @@ with st.sidebar:
 selected_model = MODELS[selected_model_name]
 model_path = os.path.join("models", selected_model["filename"])
 # Make sure models dir exists
 os.makedirs("models", exist_ok=True)
-# Clear old models if new one isn't present
-if not os.path.exists(model_path):
-    for file in os.listdir("models"):
-        if file.endswith(".gguf"):
             try:
-                os.remove(os.path.join("models", file))
             except Exception as e:
-                st.warning(f"Failed to delete {file}: {e}")
-    # Download the selected model
     with st.spinner(f"Downloading {selected_model['filename']}..."):
         hf_hub_download(
             repo_id=selected_model["repo_id"],
@@ -59,29 +77,28 @@ if not os.path.exists(model_path):
             local_dir_use_symlinks=False,
         )
-# Init state
-if "model_name" not in st.session_state:
-    st.session_state.model_name = None
-if "llm" not in st.session_state:
-    st.session_state.llm = None
 # Load model if changed
 if st.session_state.model_name != selected_model_name:
     if st.session_state.llm is not None:
         del st.session_state.llm
         gc.collect()
-    st.session_state.llm = Llama(
-        model_path=model_path,
-        n_ctx=1024,
-        n_threads=2,
-        n_threads_batch=2,
-        n_batch=4,
-        n_gpu_layers=0,
-        use_mlock=False,
-        use_mmap=True,
-        verbose=False,
-    )
     st.session_state.model_name = selected_model_name
 llm = st.session_state.llm

         "filename": "qwen2.5-7b-instruct-q2_k.gguf",
         "description": "Qwen2.5-7B Instruct (Q2_K)"
     },
+    "Gemma-3-4B-IT (Q4_K_M)": {
         "repo_id": "unsloth/gemma-3-4b-it-GGUF",
+        "filename": "gemma-3-4b-it-Q4_K_M.gguf",
+        "description": "Gemma 3 4B IT (Q4_K_M)"
     },
     "Phi-4-mini-Instruct (Q4_K_M)": {
         "repo_id": "unsloth/Phi-4-mini-instruct-GGUF",
 selected_model = MODELS[selected_model_name]
 model_path = os.path.join("models", selected_model["filename"])
+# Init state
+if "model_name" not in st.session_state:
+    st.session_state.model_name = None
+if "llm" not in st.session_state:
+    st.session_state.llm = None
 # Make sure models dir exists
 os.makedirs("models", exist_ok=True)
+# If the selected model file does not exist or is invalid, clean up and re-download
+def validate_or_download_model():
+    if not os.path.exists(model_path):
+        cleanup_old_models()
+        download_model()
+        return
+    try:
+        _ = Llama(model_path=model_path, n_ctx=16, n_threads=1)  # dummy check
+    except Exception as e:
+        st.warning(f"Model file was invalid or corrupt: {e}\nRedownloading...")
+        cleanup_old_models()
+        download_model()
+def cleanup_old_models():
+    for f in os.listdir("models"):
+        if f.endswith(".gguf") and f != selected_model["filename"]:
             try:
+                os.remove(os.path.join("models", f))
             except Exception as e:
+                st.warning(f"Couldn't delete old model {f}: {e}")
+def download_model():
     with st.spinner(f"Downloading {selected_model['filename']}..."):
         hf_hub_download(
             repo_id=selected_model["repo_id"],
             local_dir_use_symlinks=False,
         )
+validate_or_download_model()
 # Load model if changed
 if st.session_state.model_name != selected_model_name:
     if st.session_state.llm is not None:
         del st.session_state.llm
         gc.collect()
+    try:
+        st.session_state.llm = Llama(
+            model_path=model_path,
+            n_ctx=1024,
+            n_threads=2,
+            n_threads_batch=2,
+            n_batch=4,
+            n_gpu_layers=0,
+            use_mlock=False,
+            use_mmap=True,
+            verbose=False,
+        )
+    except Exception as e:
+        st.error(f"Failed to load model: {e}")
+        st.stop()
     st.session_state.model_name = selected_model_name
 llm = st.session_state.llm