Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test des modèles Q8_0 pour voir si la meilleure quantisation résout les problèmes | |
| """ | |
| import sys | |
| import os | |
| import time | |
| import json | |
| def test_q8_models(): | |
| """Tester les modèles Q8_0 avec différents formats de prompt""" | |
| print("🚀 TEST DES MODÈLES Q8_0 - FORMATS AMÉLIORÉS") | |
| print("=" * 70) | |
| # Modèles Q8_0 téléchargés | |
| models = [ | |
| { | |
| 'name': 'MCP-Instruct-v1-Q8', | |
| 'path': 'mcp-instruct-v1.Q8_0.gguf', | |
| 'format': 'mcp_structured', | |
| 'n_ctx': 4096 | |
| }, | |
| { | |
| 'name': 'MCPR-L-3B-Exa-Q8', | |
| 'path': 'mcprl-3b-exa.Q8_0.gguf', | |
| 'format': 'instruct', | |
| 'n_ctx': 4096 | |
| }, | |
| { | |
| 'name': 'Gemma-3n-E2B-it-Q8', | |
| 'path': 'google_gemma-3n-E2B-it-Q8_0.gguf', | |
| 'format': 'code', | |
| 'n_ctx': 4096 | |
| } | |
| ] | |
| # Formats de prompts optimisés | |
| prompt_templates = { | |
| 'mcp_structured': '''<|im_start|>system | |
| You are an MCP assistant for an RTS game. Respond with ONLY JSON format: | |
| {"tool": "tool_name", "args": {}} | |
| Available tools: | |
| - get_game_state(): Get current game state | |
| - move_units(unit_ids, target_x, target_y): Move units to coordinates | |
| - attack_unit(attacker_ids, target_id): Attack a specific unit | |
| - build_building(building_type, position_x, position_y): Construct a building | |
| Always respond with valid JSON, no additional text.<|im_end|> | |
| <|im_start|>user | |
| {command}<|im_end|> | |
| <|im_start|>assistant | |
| ''', | |
| 'instruct': '''[INST] You are an MCP assistant. Respond with ONLY JSON format: | |
| {"tool": "tool_name", "args": {}} | |
| Available tools: | |
| - get_game_state(): Get current game state | |
| - move_units(unit_ids, target_x, target_y): Move units to coordinates | |
| - attack_unit(attacker_ids, target_id): Attack a specific unit | |
| - build_building(building_type, position_x, position_y): Construct a building | |
| User command: {command} | |
| Respond with only JSON: [/INST] | |
| ''', | |
| 'code': '''// MCP assistant for RTS game | |
| // Tools: get_game_state(), move_units(), attack_unit(), build_building() | |
| // Command: {command} | |
| // Output JSON only: | |
| {"tool": "tool_name", "args": {}}''' | |
| } | |
| test_command = "show game state" | |
| for model in models: | |
| print(f"\n🧪 TEST: {model['name']}") | |
| print("-" * 50) | |
| if not os.path.exists(model['path']): | |
| print(f" ❌ Fichier non trouvé: {model['path']}") | |
| continue | |
| file_size_mb = os.path.getsize(model['path']) / (1024*1024) | |
| print(f" 📏 Taille: {file_size_mb:.1f} MB") | |
| print(f" 🔧 Format: {model['format']}") | |
| print(f" 🧠 Contexte: {model['n_ctx']} tokens") | |
| try: | |
| from llama_cpp import Llama | |
| llm = Llama( | |
| model_path=model['path'], | |
| n_ctx=model['n_ctx'], | |
| n_threads=1, | |
| verbose=False, | |
| n_gpu_layers=0 | |
| ) | |
| prompt = prompt_templates[model['format']].format(command=test_command) | |
| print(f" 📝 Prompt (début): {prompt[:80]}...") | |
| start_time = time.time() | |
| try: | |
| response = llm( | |
| prompt, | |
| max_tokens=100, | |
| temperature=0.3, | |
| stop=["</s>", "<|im_end|>", "```", "\n\n"] | |
| ) | |
| response_time = time.time() - start_time | |
| # Gérer la réponse (compatible avec différentes versions de llama-cpp) | |
| if isinstance(response, dict) and 'choices' in response: | |
| response_text = response['choices'][0]['text'].strip() | |
| elif hasattr(response, '__iter__') and not isinstance(response, str): | |
| # Gérer les réponses stream | |
| response_text = "" | |
| for chunk in response: | |
| if isinstance(chunk, dict) and 'choices' in chunk: | |
| response_text += chunk['choices'][0]['text'] | |
| else: | |
| response_text = str(response) | |
| print(f" ⏱️ Temps: {response_time:.2f}s") | |
| print(f" 📄 Réponse: {response_text[:200]}") | |
| # Vérifier JSON | |
| try: | |
| parsed = json.loads(response_text) | |
| print(f" ✅ JSON VALIDE: {json.dumps(parsed, indent=2)}") | |
| # Évaluer la pertinence | |
| if 'tool' in parsed: | |
| print(f" 🎯 Outil identifié: {parsed['tool']}") | |
| if 'args' in parsed: | |
| print(f" 📋 Arguments: {parsed['args']}") | |
| except json.JSONDecodeError: | |
| print(f" ❌ JSON INVALIDE") | |
| # Essayer d'extraire JSON | |
| import re | |
| json_match = re.search(r'\{[^{}]*\{[^{}]*\}[^{}]*\}|\{[^{}]*\}', response_text) | |
| if json_match: | |
| try: | |
| extracted_json = json.loads(json_match.group()) | |
| print(f" 🔍 JSON extrait: {json.dumps(extracted_json, indent=2)}") | |
| except: | |
| print(f" ❌ Impossible d'extraire JSON valide") | |
| except Exception as e: | |
| print(f" ❌ Erreur génération: {e}") | |
| except Exception as e: | |
| print(f" ❌ Erreur chargement: {e}") | |
| print(f"\n{'='*70}") | |
| print("🎯 COMPARAISON AVEC ANCIENNES VERSIONS") | |
| print("=" * 70) | |
| print("\n📊 RÉSULTATS ANCIENNES VERSIONS:") | |
| print(" • MCP-Instruct-v1 (Q4_K_M): Erreur technique (llama_decode=-1)") | |
| print(" • MCPR L-3B-Exa (Q2_K): Texte corrompu avec caractères spéciaux") | |
| print(" • Gemma-3n-E2B-it (IQ2_XXS): Réponses vides, pas de sortie") | |
| print("\n📈 ATTENTES POUR Q8_0:") | |
| print(" • Meilleure qualité de quantisation") | |
| print(" • Moins de corruption de texte") | |
| print(" • Réponses plus cohérentes") | |
| print(" • JSON valide possible") | |
| if __name__ == "__main__": | |
| test_q8_models() |