Spaces:

Luigi
/

SmolVLM2-on-llama.cpp

Runtime error

App Files Files Community

Luigi commited on Jun 13

Commit

36dacc6

1 Parent(s): d05da82

add debug messages

Browse files

Files changed (1) hide show

app.py +42 -5

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import cv2
 import tempfile
@@ -8,6 +9,13 @@ from llama_cpp import Llama
 from llama_cpp.llama_chat_format import Llava15ChatHandler
 from termcolor import cprint
 # —————————————————————————————————————————
 # 1) Inline definition & registration of SmolVLM2ChatHandler
 class SmolVLM2ChatHandler(Llava15ChatHandler):
@@ -41,35 +49,58 @@ MODEL_REPO = "mradermacher/SmolVLM2-500M-Video-Instruct-GGUF"
 CLIP_REPO  = "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF"
 def ensure_models():
     if not os.path.exists(MODEL_FILE):
         path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
         os.symlink(path, MODEL_FILE)
     if not os.path.exists(CLIP_FILE):
         path = hf_hub_download(repo_id=CLIP_REPO, filename=CLIP_FILE)
         os.symlink(path, CLIP_FILE)
 ensure_models()
 def load_llm():
     handler = SmolVLM2ChatHandler(clip_model_path=CLIP_FILE, verbose=False)
-    return Llama(
         model_path=MODEL_FILE,
         chat_handler=handler,
         n_ctx=8192,
         verbose=False,
     )
 llm = load_llm()
 # —————————————————————————————————————————
 # 4) Captioning helper (stateless prompt)
 def caption_frame(frame):
     # make a writable copy
     frame = frame.copy()
     # save frame to temporary file for URI
     with tempfile.NamedTemporaryFile(suffix='.jpg') as f:
-        cv2.imwrite(f.name, frame)
         uri = Path(f.name).absolute().as_uri()
         # build a single prompt string
         messages = [
@@ -89,20 +120,25 @@ def caption_frame(frame):
                 ],
             },
         ]
         # stateless completion call
         llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=CLIP_FILE, verbose=False)
         llm.reset()                           # reset n_tokens back to 0
-        llm._ctx.kv_cache_clear()      # clear any cached key/values
         resp = llm.create_chat_completion(
-            messages = messages,
             max_tokens=256,
             temperature=0.1,
             stop=["<end_of_utterance>"],
         )
     # extract caption
-    caption = (resp.get("choices", [])[0]['message'].get("content", "") or "").strip()
     return caption
 # —————————————————————————————————————————
@@ -123,4 +159,5 @@ with demo:
     )
 if __name__ == "__main__":
     demo.launch()

+import logging
 import gradio as gr
 import cv2
 import tempfile
 from llama_cpp.llama_chat_format import Llava15ChatHandler
 from termcolor import cprint
+# Configure logging
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='[%(asctime)s] %(levelname)s: %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
 # —————————————————————————————————————————
 # 1) Inline definition & registration of SmolVLM2ChatHandler
 class SmolVLM2ChatHandler(Llava15ChatHandler):
 CLIP_REPO  = "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF"
 def ensure_models():
+    logging.debug("Ensuring model files are present...")
     if not os.path.exists(MODEL_FILE):
+        logging.info(f"Downloading model file {MODEL_FILE} from {MODEL_REPO}...")
         path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
         os.symlink(path, MODEL_FILE)
+        logging.info(f"Created symlink: {path} -> {MODEL_FILE}")
+    else:
+        logging.debug(f"Model file {MODEL_FILE} already exists.")
     if not os.path.exists(CLIP_FILE):
+        logging.info(f"Downloading CLIP file {CLIP_FILE} from {CLIP_REPO}...")
         path = hf_hub_download(repo_id=CLIP_REPO, filename=CLIP_FILE)
         os.symlink(path, CLIP_FILE)
+        logging.info(f"Created symlink: {path} -> {CLIP_FILE}")
+    else:
+        logging.debug(f"CLIP file {CLIP_FILE} already exists.")
 ensure_models()
 def load_llm():
+    logging.debug("Loading Llama model with SmolVLM2ChatHandler...")
     handler = SmolVLM2ChatHandler(clip_model_path=CLIP_FILE, verbose=False)
+    llm = Llama(
         model_path=MODEL_FILE,
         chat_handler=handler,
         n_ctx=8192,
         verbose=False,
     )
+    logging.info("Llama model loaded successfully.")
+    return llm
 llm = load_llm()
 # —————————————————————————————————————————
 # 4) Captioning helper (stateless prompt)
 def caption_frame(frame):
+    logging.debug("caption_frame called.")
     # make a writable copy
     frame = frame.copy()
+    logging.debug(f"Frame shape: {frame.shape}, dtype: {frame.dtype}")
     # save frame to temporary file for URI
     with tempfile.NamedTemporaryFile(suffix='.jpg') as f:
+        success = cv2.imwrite(f.name, frame)
+        if not success:
+            logging.error(f"Failed to write frame to {f.name}")
+        else:
+            logging.debug(f"Frame written to temp file: {f.name}")
         uri = Path(f.name).absolute().as_uri()
+        logging.debug(f"Frame URI: {uri}")
         # build a single prompt string
         messages = [
                 ],
             },
         ]
+        logging.debug(f"Constructed messages: {messages}")
         # stateless completion call
+        logging.debug("Resetting LLM and clearing cache.")
         llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=CLIP_FILE, verbose=False)
         llm.reset()                           # reset n_tokens back to 0
+        llm._ctx.kv_cache_clear()            # clear any cached key/values
+        logging.debug("Sending chat completion request...")
         resp = llm.create_chat_completion(
+            messages=messages,
             max_tokens=256,
             temperature=0.1,
             stop=["<end_of_utterance>"],
         )
+        logging.debug(f"LLM raw response: {resp}")
     # extract caption
+    caption = (resp.get("choices", [])[0]["message"].get("content", "") or "").strip()
+    logging.debug(f"Extracted caption: {caption}")
     return caption
 # —————————————————————————————————————————
     )
 if __name__ == "__main__":
+    logging.debug("Launching Gradio demo...")
     demo.launch()