Luigi commited on
Commit
e3e334f
·
1 Parent(s): 4500f92

Use actual parameter count for AOT decision instead of string matching

Browse files
Files changed (1) hide show
  1. app.py +6 -33
app.py CHANGED
@@ -453,37 +453,10 @@ def chat_response(user_msg, chat_history, system_prompt,
453
 
454
  pipe = load_pipeline(model_name)
455
 
456
- # AOT compilation for performance optimization (only for larger models)
457
- # Estimate model size
458
- model_size = 0
459
- if '30B' in model_name or '32B' in model_name:
460
- model_size = 30
461
- elif '20B' in model_name:
462
- model_size = 20
463
- elif '15B' in model_name or '14B' in model_name:
464
- model_size = 15
465
- elif '4B' in model_name or '3B' in model_name:
466
- model_size = 4
467
- elif '2B' in model_name or '1.7B' in model_name:
468
- model_size = 2
469
- elif '1.5B' in model_name or '1.2B' in model_name or '1.1B' in model_name:
470
- model_size = 1.5
471
- elif '1B' in model_name:
472
- model_size = 1
473
- elif '700M' in model_name or '600M' in model_name:
474
- model_size = 0.7
475
- elif '500M' in model_name:
476
- model_size = 0.5
477
- elif '360M' in model_name or '350M' in model_name:
478
- model_size = 0.35
479
- elif '270M' in model_name:
480
- model_size = 0.27
481
- elif '135M' in model_name:
482
- model_size = 0.135
483
- else:
484
- model_size = 4 # default
485
-
486
- use_aot = model_size >= 2 # Only compile models >= 2B parameters
487
 
488
  if use_aot:
489
  try:
@@ -510,11 +483,11 @@ def chat_response(user_msg, chat_history, system_prompt,
510
  )
511
  compiled = spaces.aoti_compile(exported)
512
  spaces.aoti_apply(compiled, pipe.model)
513
- print(f"AOT compilation successful for {model_name}")
514
  except Exception as e:
515
  print(f"AOT compilation failed for {model_name}: {e}")
516
  else:
517
- print(f"Skipping AOT compilation for small model {model_name}")
518
 
519
  prompt = format_conversation(history, enriched, pipe.tokenizer)
520
  prompt_debug = f"\n\n--- Prompt Preview ---\n```\n{prompt}\n```"
 
453
 
454
  pipe = load_pipeline(model_name)
455
 
456
+ # Determine actual model size for AOT decision
457
+ actual_params = sum(p.numel() for p in pipe.model.parameters())
458
+ model_size_b = actual_params / 1e9 # Convert to billions
459
+ use_aot = model_size_b >= 2 # Only compile models >= 2B parameters
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
 
461
  if use_aot:
462
  try:
 
483
  )
484
  compiled = spaces.aoti_compile(exported)
485
  spaces.aoti_apply(compiled, pipe.model)
486
+ print(f"AOT compilation successful for {model_name} ({model_size_b:.1f}B parameters)")
487
  except Exception as e:
488
  print(f"AOT compilation failed for {model_name}: {e}")
489
  else:
490
+ print(f"Skipping AOT compilation for small model {model_name} ({model_size_b:.1f}B parameters)")
491
 
492
  prompt = format_conversation(history, enriched, pipe.tokenizer)
493
  prompt_debug = f"\n\n--- Prompt Preview ---\n```\n{prompt}\n```"