Spaces:
Runtime error
Runtime error
inject verbose message to debug window
Browse files
app.py
CHANGED
|
@@ -9,6 +9,8 @@ from llama_cpp import Llama
|
|
| 9 |
from llama_cpp.llama_chat_format import Llava15ChatHandler
|
| 10 |
import base64
|
| 11 |
import gc
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# ----------------------------------------
|
| 14 |
# Model configurations: per-size prefixes and repos
|
|
@@ -157,12 +159,20 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
| 157 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
| 158 |
|
| 159 |
t_start = time.time()
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
elapsed = (time.time() - t_start) * 1000
|
| 167 |
timestamp = time.strftime('%H:%M:%S')
|
| 168 |
debug_msgs.append(f"[{timestamp}] LLM response in {elapsed:.1f} ms")
|
|
|
|
| 9 |
from llama_cpp.llama_chat_format import Llava15ChatHandler
|
| 10 |
import base64
|
| 11 |
import gc
|
| 12 |
+
import io
|
| 13 |
+
from contextlib import redirect_stdout, redirect_stderr
|
| 14 |
|
| 15 |
# ----------------------------------------
|
| 16 |
# Model configurations: per-size prefixes and repos
|
|
|
|
| 159 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
| 160 |
|
| 161 |
t_start = time.time()
|
| 162 |
+
# right before you call the Llama API:
|
| 163 |
+
buf = io.StringIO()
|
| 164 |
+
with redirect_stdout(buf), redirect_stderr(buf):
|
| 165 |
+
resp = model_cache['llm'].create_chat_completion(
|
| 166 |
+
messages=messages,
|
| 167 |
+
max_tokens=128,
|
| 168 |
+
temperature=0.1,
|
| 169 |
+
stop=["<end_of_utterance>"]
|
| 170 |
+
)
|
| 171 |
+
# grab every line the Llama client printed
|
| 172 |
+
for line in buf.getvalue().splitlines():
|
| 173 |
+
timestamp = time.strftime('%H:%M:%S')
|
| 174 |
+
debug_msgs.append(f"[{timestamp}] {line}")
|
| 175 |
+
|
| 176 |
elapsed = (time.time() - t_start) * 1000
|
| 177 |
timestamp = time.strftime('%H:%M:%S')
|
| 178 |
debug_msgs.append(f"[{timestamp}] LLM response in {elapsed:.1f} ms")
|