Spaces:
Running
Running
Use actual parameter count for AOT decision instead of string matching
Browse files
app.py
CHANGED
|
@@ -453,37 +453,10 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 453 |
|
| 454 |
pipe = load_pipeline(model_name)
|
| 455 |
|
| 456 |
-
#
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
model_size = 30
|
| 461 |
-
elif '20B' in model_name:
|
| 462 |
-
model_size = 20
|
| 463 |
-
elif '15B' in model_name or '14B' in model_name:
|
| 464 |
-
model_size = 15
|
| 465 |
-
elif '4B' in model_name or '3B' in model_name:
|
| 466 |
-
model_size = 4
|
| 467 |
-
elif '2B' in model_name or '1.7B' in model_name:
|
| 468 |
-
model_size = 2
|
| 469 |
-
elif '1.5B' in model_name or '1.2B' in model_name or '1.1B' in model_name:
|
| 470 |
-
model_size = 1.5
|
| 471 |
-
elif '1B' in model_name:
|
| 472 |
-
model_size = 1
|
| 473 |
-
elif '700M' in model_name or '600M' in model_name:
|
| 474 |
-
model_size = 0.7
|
| 475 |
-
elif '500M' in model_name:
|
| 476 |
-
model_size = 0.5
|
| 477 |
-
elif '360M' in model_name or '350M' in model_name:
|
| 478 |
-
model_size = 0.35
|
| 479 |
-
elif '270M' in model_name:
|
| 480 |
-
model_size = 0.27
|
| 481 |
-
elif '135M' in model_name:
|
| 482 |
-
model_size = 0.135
|
| 483 |
-
else:
|
| 484 |
-
model_size = 4 # default
|
| 485 |
-
|
| 486 |
-
use_aot = model_size >= 2 # Only compile models >= 2B parameters
|
| 487 |
|
| 488 |
if use_aot:
|
| 489 |
try:
|
|
@@ -510,11 +483,11 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 510 |
)
|
| 511 |
compiled = spaces.aoti_compile(exported)
|
| 512 |
spaces.aoti_apply(compiled, pipe.model)
|
| 513 |
-
print(f"AOT compilation successful for {model_name}")
|
| 514 |
except Exception as e:
|
| 515 |
print(f"AOT compilation failed for {model_name}: {e}")
|
| 516 |
else:
|
| 517 |
-
print(f"Skipping AOT compilation for small model {model_name}")
|
| 518 |
|
| 519 |
prompt = format_conversation(history, enriched, pipe.tokenizer)
|
| 520 |
prompt_debug = f"\n\n--- Prompt Preview ---\n```\n{prompt}\n```"
|
|
|
|
| 453 |
|
| 454 |
pipe = load_pipeline(model_name)
|
| 455 |
|
| 456 |
+
# Determine actual model size for AOT decision
|
| 457 |
+
actual_params = sum(p.numel() for p in pipe.model.parameters())
|
| 458 |
+
model_size_b = actual_params / 1e9 # Convert to billions
|
| 459 |
+
use_aot = model_size_b >= 2 # Only compile models >= 2B parameters
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
|
| 461 |
if use_aot:
|
| 462 |
try:
|
|
|
|
| 483 |
)
|
| 484 |
compiled = spaces.aoti_compile(exported)
|
| 485 |
spaces.aoti_apply(compiled, pipe.model)
|
| 486 |
+
print(f"AOT compilation successful for {model_name} ({model_size_b:.1f}B parameters)")
|
| 487 |
except Exception as e:
|
| 488 |
print(f"AOT compilation failed for {model_name}: {e}")
|
| 489 |
else:
|
| 490 |
+
print(f"Skipping AOT compilation for small model {model_name} ({model_size_b:.1f}B parameters)")
|
| 491 |
|
| 492 |
prompt = format_conversation(history, enriched, pipe.tokenizer)
|
| 493 |
prompt_debug = f"\n\n--- Prompt Preview ---\n```\n{prompt}\n```"
|