Spaces:
Running
on
Zero
Running
on
Zero
adds special tokens strip with fallback
Browse files
app.py
CHANGED
|
@@ -220,19 +220,49 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
| 220 |
eos_token_id=tokenizer.eos_token_id,
|
| 221 |
# cache_implementation="static"
|
| 222 |
)
|
| 223 |
-
|
|
|
|
| 224 |
|
| 225 |
-
# Debug: Print the full raw response
|
| 226 |
-
logger.info(f"=== FULL RAW RESPONSE DEBUG ===")
|
| 227 |
-
logger.info(f"Raw response length: {len(
|
| 228 |
-
logger.info(f"Raw response: {repr(
|
| 229 |
-
logger.info(f"Full prompt length: {len(full_prompt)}")
|
| 230 |
-
logger.info(f"Full prompt: {repr(full_prompt)}")
|
| 231 |
|
| 232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
|
| 234 |
-
# Debug: Print the extracted assistant response
|
| 235 |
-
logger.info(f"=== EXTRACTED ASSISTANT RESPONSE DEBUG ===")
|
| 236 |
logger.info(f"Extracted response length: {len(assistant_response)}")
|
| 237 |
logger.info(f"Extracted response: {repr(assistant_response)}")
|
| 238 |
|
|
|
|
| 220 |
eos_token_id=tokenizer.eos_token_id,
|
| 221 |
# cache_implementation="static"
|
| 222 |
)
|
| 223 |
+
# First decode WITH special tokens to find markers
|
| 224 |
+
response_with_tokens = tokenizer.decode(output_ids[0], skip_special_tokens=False)
|
| 225 |
|
| 226 |
+
# Debug: Print the full raw response with tokens
|
| 227 |
+
logger.info(f"=== FULL RAW RESPONSE WITH TOKENS DEBUG ===")
|
| 228 |
+
logger.info(f"Raw response with tokens length: {len(response_with_tokens)}")
|
| 229 |
+
logger.info(f"Raw response with tokens: {repr(response_with_tokens)}")
|
|
|
|
|
|
|
| 230 |
|
| 231 |
+
# More robust response extraction - look for assistant marker
|
| 232 |
+
logger.info(f"Looking for assistant marker in response...")
|
| 233 |
+
if "<|im_start|>assistant" in response_with_tokens:
|
| 234 |
+
logger.info(f"Found assistant marker in response")
|
| 235 |
+
# Find the start of assistant response
|
| 236 |
+
assistant_start = response_with_tokens.find("<|im_start|>assistant")
|
| 237 |
+
logger.info(f"Assistant marker found at position: {assistant_start}")
|
| 238 |
+
if assistant_start != -1:
|
| 239 |
+
# Find the end of the assistant marker
|
| 240 |
+
marker_end = response_with_tokens.find("\n", assistant_start)
|
| 241 |
+
logger.info(f"Marker end found at position: {marker_end}")
|
| 242 |
+
if marker_end != -1:
|
| 243 |
+
assistant_response = response_with_tokens[marker_end + 1:].strip()
|
| 244 |
+
logger.info(f"Using marker-based extraction")
|
| 245 |
+
else:
|
| 246 |
+
assistant_response = response_with_tokens[assistant_start + len("<|im_start|>assistant"):].strip()
|
| 247 |
+
logger.info(f"Using fallback marker extraction")
|
| 248 |
+
else:
|
| 249 |
+
# Fallback to prompt-based extraction
|
| 250 |
+
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 251 |
+
assistant_response = response[len(full_prompt):].strip()
|
| 252 |
+
logger.info(f"Using prompt-based extraction (marker not found)")
|
| 253 |
+
else:
|
| 254 |
+
# Fallback to original method
|
| 255 |
+
logger.info(f"No assistant marker found, using prompt-based extraction")
|
| 256 |
+
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 257 |
+
assistant_response = response[len(full_prompt):].strip()
|
| 258 |
+
|
| 259 |
+
# Clean up any remaining special tokens
|
| 260 |
+
assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
|
| 261 |
+
assistant_response = re.sub(r'<\|im_start\|>', '', assistant_response)
|
| 262 |
+
assistant_response = re.sub(r'<\|im_end\|>', '', assistant_response)
|
| 263 |
|
| 264 |
+
# Debug: Print the extracted assistant response after cleanup
|
| 265 |
+
logger.info(f"=== EXTRACTED ASSISTANT RESPONSE AFTER CLEANUP DEBUG ===")
|
| 266 |
logger.info(f"Extracted response length: {len(assistant_response)}")
|
| 267 |
logger.info(f"Extracted response: {repr(assistant_response)}")
|
| 268 |
|