Spaces:
Sleeping
Sleeping
support reasoning tag
Browse files
README.md
CHANGED
|
@@ -8,7 +8,7 @@ sdk_version: 1.44.1
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
-
short_description: Run GGUF models
|
| 12 |
---
|
| 13 |
|
| 14 |
This Streamlit app enables **chat-based inference** on various GGUF models using `llama.cpp` and `llama-cpp-python`.
|
|
@@ -26,6 +26,8 @@ This Streamlit app enables **chat-based inference** on various GGUF models using
|
|
| 26 |
- Model selection in the sidebar
|
| 27 |
- Customizable system prompt and generation parameters
|
| 28 |
- Chat-style UI with streaming responses
|
|
|
|
|
|
|
| 29 |
|
| 30 |
### 🧠 Memory-Safe Design (for HuggingFace Spaces):
|
| 31 |
- Loads only **one model at a time** to prevent memory bloat
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
+
short_description: Run GGUF models with llama.cpp
|
| 12 |
---
|
| 13 |
|
| 14 |
This Streamlit app enables **chat-based inference** on various GGUF models using `llama.cpp` and `llama-cpp-python`.
|
|
|
|
| 26 |
- Model selection in the sidebar
|
| 27 |
- Customizable system prompt and generation parameters
|
| 28 |
- Chat-style UI with streaming responses
|
| 29 |
+
- **Markdown output rendering** for readable, styled output
|
| 30 |
+
- **DeepSeek-compatible `<think>` tag handling** — shows model reasoning in a collapsible expander
|
| 31 |
|
| 32 |
### 🧠 Memory-Safe Design (for HuggingFace Spaces):
|
| 33 |
- Loads only **one model at a time** to prevent memory bloat
|
app.py
CHANGED
|
@@ -4,6 +4,7 @@ from huggingface_hub import hf_hub_download
|
|
| 4 |
import os
|
| 5 |
import gc
|
| 6 |
import shutil
|
|
|
|
| 7 |
|
| 8 |
# Available models
|
| 9 |
MODELS = {
|
|
@@ -184,6 +185,13 @@ if user_input:
|
|
| 184 |
if "choices" in chunk:
|
| 185 |
delta = chunk["choices"][0]["delta"].get("content", "")
|
| 186 |
full_response += delta
|
| 187 |
-
|
|
|
|
| 188 |
|
| 189 |
st.session_state.chat_history.append({"role": "assistant", "content": full_response})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import os
|
| 5 |
import gc
|
| 6 |
import shutil
|
| 7 |
+
import re
|
| 8 |
|
| 9 |
# Available models
|
| 10 |
MODELS = {
|
|
|
|
| 185 |
if "choices" in chunk:
|
| 186 |
delta = chunk["choices"][0]["delta"].get("content", "")
|
| 187 |
full_response += delta
|
| 188 |
+
visible = re.sub(r"<think>.*?</think>", "", full_response, flags=re.DOTALL)
|
| 189 |
+
response_area.markdown(visible)
|
| 190 |
|
| 191 |
st.session_state.chat_history.append({"role": "assistant", "content": full_response})
|
| 192 |
+
|
| 193 |
+
thinking = re.findall(r"<think>(.*?)</think>", full_response, flags=re.DOTALL)
|
| 194 |
+
if thinking:
|
| 195 |
+
with st.expander("🧠 Model's Internal Reasoning"):
|
| 196 |
+
for t in thinking:
|
| 197 |
+
st.markdown(t.strip())
|