Spaces:

Luigi
/

SmolVLM2-on-llama.cpp

Runtime error

Luigi commited on Jun 16

Commit

65b3c3a

1 Parent(s): c1d8038

minor update then add todos

Files changed (1) hide show

app.py CHANGED Viewed

@@ -43,10 +43,10 @@ class SmolVLM2ChatHandler(Llava15ChatHandler):
 # —————————————————————————————————————————
 # 2) Model & CLIP files — download if missing
-MODEL_FILE = "SmolVLM2-500M-Video-Instruct.Q8_0.gguf"
-CLIP_FILE  = "mmproj-SmolVLM2-500M-Video-Instruct-Q8_0.gguf"
-MODEL_REPO = "mradermacher/SmolVLM2-500M-Video-Instruct-GGUF"
-CLIP_REPO  = "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF"
 def ensure_models():
     logging.debug("Ensuring model files are present...")
@@ -125,9 +125,7 @@ def caption_frame(frame):
         # stateless completion call
         logging.debug("Resetting LLM and clearing cache.")
-        llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=CLIP_FILE, verbose=False)
-        llm.reset()                           # reset n_tokens back to 0
-        llm._ctx.kv_cache_clear()            # clear any cached key/values
         logging.debug("Sending chat completion request...")
         resp = llm.create_chat_completion(
             messages=messages,
@@ -162,3 +160,8 @@ with demo:
 if __name__ == "__main__":
     logging.debug("Launching Gradio demo...")
     demo.launch()

 # —————————————————————————————————————————
 # 2) Model & CLIP files — download if missing
+MODEL_FILE = "SmolVLM2-2.2B-Instruct.IQ4_XS.gguf"
+CLIP_FILE  = "mmproj-SmolVLM2-2.2B-Instruct-Q8_0.gguf"
+MODEL_REPO = "mradermacher/SmolVLM2-2.2B-Instruct-GGUF"
+CLIP_REPO  = "ggml-org/SmolVLM2-2.2B-Instruct-GGUF"
 def ensure_models():
     logging.debug("Ensuring model files are present...")
         # stateless completion call
         logging.debug("Resetting LLM and clearing cache.")
+        llm.chat_handler.__init__(clip_model_path=CLIP_FILE, verbose=False)
         logging.debug("Sending chat completion request...")
         resp = llm.create_chat_completion(
             messages=messages,
 if __name__ == "__main__":
     logging.debug("Launching Gradio demo...")
     demo.launch()
+# todos:
+# 1. add list of models: smolvml2 256m, 500m, 2.2b with varouis precision in choice
+# 2. customizable interval
+# 3. customizable system and user prompts