#!/usr/bin/env python3 """ Test des modèles Q8_0 pour voir si la meilleure quantisation résout les problèmes """ import sys import os import time import json def test_q8_models(): """Tester les modèles Q8_0 avec différents formats de prompt""" print("🚀 TEST DES MODÈLES Q8_0 - FORMATS AMÉLIORÉS") print("=" * 70) # Modèles Q8_0 téléchargés models = [ { 'name': 'MCP-Instruct-v1-Q8', 'path': 'mcp-instruct-v1.Q8_0.gguf', 'format': 'mcp_structured', 'n_ctx': 4096 }, { 'name': 'MCPR-L-3B-Exa-Q8', 'path': 'mcprl-3b-exa.Q8_0.gguf', 'format': 'instruct', 'n_ctx': 4096 }, { 'name': 'Gemma-3n-E2B-it-Q8', 'path': 'google_gemma-3n-E2B-it-Q8_0.gguf', 'format': 'code', 'n_ctx': 4096 } ] # Formats de prompts optimisés prompt_templates = { 'mcp_structured': '''<|im_start|>system You are an MCP assistant for an RTS game. Respond with ONLY JSON format: {"tool": "tool_name", "args": {}} Available tools: - get_game_state(): Get current game state - move_units(unit_ids, target_x, target_y): Move units to coordinates - attack_unit(attacker_ids, target_id): Attack a specific unit - build_building(building_type, position_x, position_y): Construct a building Always respond with valid JSON, no additional text.<|im_end|> <|im_start|>user {command}<|im_end|> <|im_start|>assistant ''', 'instruct': '''[INST] You are an MCP assistant. Respond with ONLY JSON format: {"tool": "tool_name", "args": {}} Available tools: - get_game_state(): Get current game state - move_units(unit_ids, target_x, target_y): Move units to coordinates - attack_unit(attacker_ids, target_id): Attack a specific unit - build_building(building_type, position_x, position_y): Construct a building User command: {command} Respond with only JSON: [/INST] ''', 'code': '''// MCP assistant for RTS game // Tools: get_game_state(), move_units(), attack_unit(), build_building() // Command: {command} // Output JSON only: {"tool": "tool_name", "args": {}}''' } test_command = "show game state" for model in models: print(f"\n🧪 TEST: {model['name']}") print("-" * 50) if not os.path.exists(model['path']): print(f" ❌ Fichier non trouvé: {model['path']}") continue file_size_mb = os.path.getsize(model['path']) / (1024*1024) print(f" 📏 Taille: {file_size_mb:.1f} MB") print(f" 🔧 Format: {model['format']}") print(f" 🧠 Contexte: {model['n_ctx']} tokens") try: from llama_cpp import Llama llm = Llama( model_path=model['path'], n_ctx=model['n_ctx'], n_threads=1, verbose=False, n_gpu_layers=0 ) prompt = prompt_templates[model['format']].format(command=test_command) print(f" 📝 Prompt (début): {prompt[:80]}...") start_time = time.time() try: response = llm( prompt, max_tokens=100, temperature=0.3, stop=["", "<|im_end|>", "```", "\n\n"] ) response_time = time.time() - start_time # Gérer la réponse (compatible avec différentes versions de llama-cpp) if isinstance(response, dict) and 'choices' in response: response_text = response['choices'][0]['text'].strip() elif hasattr(response, '__iter__') and not isinstance(response, str): # Gérer les réponses stream response_text = "" for chunk in response: if isinstance(chunk, dict) and 'choices' in chunk: response_text += chunk['choices'][0]['text'] else: response_text = str(response) print(f" ⏱️ Temps: {response_time:.2f}s") print(f" 📄 Réponse: {response_text[:200]}") # Vérifier JSON try: parsed = json.loads(response_text) print(f" ✅ JSON VALIDE: {json.dumps(parsed, indent=2)}") # Évaluer la pertinence if 'tool' in parsed: print(f" 🎯 Outil identifié: {parsed['tool']}") if 'args' in parsed: print(f" 📋 Arguments: {parsed['args']}") except json.JSONDecodeError: print(f" ❌ JSON INVALIDE") # Essayer d'extraire JSON import re json_match = re.search(r'\{[^{}]*\{[^{}]*\}[^{}]*\}|\{[^{}]*\}', response_text) if json_match: try: extracted_json = json.loads(json_match.group()) print(f" 🔍 JSON extrait: {json.dumps(extracted_json, indent=2)}") except: print(f" ❌ Impossible d'extraire JSON valide") except Exception as e: print(f" ❌ Erreur génération: {e}") except Exception as e: print(f" ❌ Erreur chargement: {e}") print(f"\n{'='*70}") print("🎯 COMPARAISON AVEC ANCIENNES VERSIONS") print("=" * 70) print("\n📊 RÉSULTATS ANCIENNES VERSIONS:") print(" • MCP-Instruct-v1 (Q4_K_M): Erreur technique (llama_decode=-1)") print(" • MCPR L-3B-Exa (Q2_K): Texte corrompu avec caractères spéciaux") print(" • Gemma-3n-E2B-it (IQ2_XXS): Réponses vides, pas de sortie") print("\n📈 ATTENTES POUR Q8_0:") print(" • Meilleure qualité de quantisation") print(" • Moins de corruption de texte") print(" • Réponses plus cohérentes") print(" • JSON valide possible") if __name__ == "__main__": test_q8_models()