""" Test rapide de comparaison des modèles pour les tâches MCP Focus sur les tests les plus importants """ import sys import os import json import time # Ajouter le chemin pour les imports sys.path.append(os.path.dirname(os.path.abspath(__file__))) def test_model(model_path, model_name): """Test rapide d'un modèle""" try: from llama_cpp import Llama print(f"🔄 Test de {model_name}...") # Initialiser avec des paramètres plus rapides llm = Llama( model_path=model_path, n_ctx=1024, n_threads=1, verbose=False ) # Test simple de compréhension MCP prompt = """Tu es un assistant IA pour un jeu RTS via MCP. Outils: get_game_state(), move_units(unit_ids, target_x, target_y) Commande: "Montre-moi l'état du jeu" Réponds avec JSON: {{"tool": "nom_outil", "args": {{}}}}""" start_time = time.time() response = llm( prompt, max_tokens=100, temperature=0.1, stop=["", "<|im_end|>"] ) response_time = time.time() - start_time response_text = response['choices'][0]['text'].strip() # Analyser la réponse score = 0 # Vérifier JSON try: json.loads(response_text) score += 3 except: pass # Vérifier outil correct if "get_game_state" in response_text: score += 4 # Vérifier structure if "tool" in response_text: score += 2 # Vérifier cohérence if "game" in response_text.lower(): score += 1 score = min(score, 10) print(f"✅ {model_name}: {score}/10 | Temps: {response_time:.2f}s") print(f" Réponse: {response_text[:100]}...") return { 'name': model_name, 'score': score, 'time': response_time, 'response': response_text } except Exception as e: print(f"❌ {model_name}: Erreur - {e}") return { 'name': model_name, 'score': 0, 'time': 0, 'error': str(e) } def main(): """Test rapide comparatif""" print("🚀 TEST RAPIDE COMPARATIF MCP") print("=" * 50) # Modèles à tester models = [ { 'name': 'Qwen2.5-0.5B', 'path': 'qwen2.5-0.5b-instruct-q4_0.gguf' }, { 'name': 'Qwen3-0.6B', 'path': 'Qwen3-0.6B-Q8_0.gguf' }, { 'name': 'Gemma-3-1B', 'path': 'google_gemma-3-1b-it-qat-Q4_0.gguf' } ] results = [] for model in models: if os.path.exists(model['path']): result = test_model(model['path'], model['name']) results.append(result) else: print(f"❌ Fichier non trouvé: {model['path']}") # Résultats print("\n" + "=" * 50) print("📊 RÉSULTATS COMPARATIFS") print("=" * 50) # Classement sorted_results = sorted(results, key=lambda x: x['score'], reverse=True) print(f"\n🏆 CLASSEMENT:") for i, result in enumerate(sorted_results, 1): if 'error' not in result: print(f" {i}. {result['name']}: {result['score']}/10 ({result['time']:.2f}s)") else: print(f" {i}. {result['name']}: ÉCHEC") # Analyse successful_results = [r for r in results if 'error' not in r and r['score'] > 0] if successful_results: best_model = successful_results[0] print(f"\n🎯 MEILLEUR MODÈLE: {best_model['name']}") print(f" Score: {best_model['score']}/10") print(f" Temps: {best_model['time']:.2f}s") # Recommandations if best_model['score'] >= 7: print(f"\n✅ RECOMMANDATION: {best_model['name']} est EXCELLENT pour MCP") elif best_model['score'] >= 5: print(f"\n👍 RECOMMANDATION: {best_model['name']} est BON pour MCP") else: print(f"\n⚠️ RECOMMANDATION: {best_model['name']} est LIMITÉ pour MCP") # Performance vs taille print(f"\n⚖️ PERFORMANCE:") for result in successful_results: efficiency = result['score'] / result['time'] if result['time'] > 0 else 0 file_size = os.path.getsize([m['path'] for m in models if m['name'] == result['name']][0]) / (1024*1024) print(f" {result['name']}: {efficiency:.2f} score/s | {file_size:.0f} MB") # Sauvegarder with open("quick_model_comparison.json", "w", encoding="utf-8") as f: json.dump({ 'results': results, 'ranking': sorted_results, 'best_model': successful_results[0]['name'] if successful_results else None }, f, indent=2, ensure_ascii=False) print(f"\n📄 Résultats sauvegardés dans: quick_model_comparison.json") if __name__ == "__main__": main()