Spaces:
Running
Running
fix: prevent self-talking issue by using tokenizer chat_template formatting
Browse files- Updated prompt construction to use `tokenizer.apply_chat_template()` for chat-tuned models
- Ensured backward compatibility with non-chat models by falling back to manual prompt formatting
- Resolves issue where models would echo both user and assistant messages due to incorrect flat-text prompts
app.py
CHANGED
|
@@ -95,22 +95,21 @@ def retrieve_context(query, max_results=6, max_chars=600):
|
|
| 95 |
except Exception:
|
| 96 |
return []
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
prompt
|
| 113 |
-
return prompt
|
| 114 |
|
| 115 |
@spaces.GPU(duration=60)
|
| 116 |
def chat_response(user_msg, chat_history, system_prompt,
|
|
@@ -166,9 +165,8 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 166 |
else:
|
| 167 |
enriched = system_prompt
|
| 168 |
|
| 169 |
-
prompt = format_conversation(history, enriched)
|
| 170 |
-
|
| 171 |
pipe = load_pipeline(model_name)
|
|
|
|
| 172 |
streamer = TextIteratorStreamer(pipe.tokenizer,
|
| 173 |
skip_prompt=True,
|
| 174 |
skip_special_tokens=True)
|
|
|
|
| 95 |
except Exception:
|
| 96 |
return []
|
| 97 |
|
| 98 |
+
def format_conversation(history, system_prompt, tokenizer):
|
| 99 |
+
if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
|
| 100 |
+
messages = [{"role": "system", "content": system_prompt.strip()}] + history
|
| 101 |
+
return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 102 |
+
else:
|
| 103 |
+
# Fallback for base LMs without chat template
|
| 104 |
+
prompt = system_prompt.strip() + "\n"
|
| 105 |
+
for msg in history:
|
| 106 |
+
if msg['role'] == 'user':
|
| 107 |
+
prompt += "User: " + msg['content'].strip() + "\n"
|
| 108 |
+
elif msg['role'] == 'assistant':
|
| 109 |
+
prompt += "Assistant: " + msg['content'].strip() + "\n"
|
| 110 |
+
if not prompt.strip().endswith("Assistant:"):
|
| 111 |
+
prompt += "Assistant: "
|
| 112 |
+
return prompt
|
|
|
|
| 113 |
|
| 114 |
@spaces.GPU(duration=60)
|
| 115 |
def chat_response(user_msg, chat_history, system_prompt,
|
|
|
|
| 165 |
else:
|
| 166 |
enriched = system_prompt
|
| 167 |
|
|
|
|
|
|
|
| 168 |
pipe = load_pipeline(model_name)
|
| 169 |
+
prompt = format_conversation(history, enriched, pipe.tokenizer)
|
| 170 |
streamer = TextIteratorStreamer(pipe.tokenizer,
|
| 171 |
skip_prompt=True,
|
| 172 |
skip_special_tokens=True)
|