rts-commander / tests /scripts /test_q8_models.py
Luigi's picture
Organize project structure: move test scripts to tests/scripts and documentation to docs/reports
d28c36c
#!/usr/bin/env python3
"""
Test des modèles Q8_0 pour voir si la meilleure quantisation résout les problèmes
"""
import sys
import os
import time
import json
def test_q8_models():
"""Tester les modèles Q8_0 avec différents formats de prompt"""
print("🚀 TEST DES MODÈLES Q8_0 - FORMATS AMÉLIORÉS")
print("=" * 70)
# Modèles Q8_0 téléchargés
models = [
{
'name': 'MCP-Instruct-v1-Q8',
'path': 'mcp-instruct-v1.Q8_0.gguf',
'format': 'mcp_structured',
'n_ctx': 4096
},
{
'name': 'MCPR-L-3B-Exa-Q8',
'path': 'mcprl-3b-exa.Q8_0.gguf',
'format': 'instruct',
'n_ctx': 4096
},
{
'name': 'Gemma-3n-E2B-it-Q8',
'path': 'google_gemma-3n-E2B-it-Q8_0.gguf',
'format': 'code',
'n_ctx': 4096
}
]
# Formats de prompts optimisés
prompt_templates = {
'mcp_structured': '''<|im_start|>system
You are an MCP assistant for an RTS game. Respond with ONLY JSON format:
{"tool": "tool_name", "args": {}}
Available tools:
- get_game_state(): Get current game state
- move_units(unit_ids, target_x, target_y): Move units to coordinates
- attack_unit(attacker_ids, target_id): Attack a specific unit
- build_building(building_type, position_x, position_y): Construct a building
Always respond with valid JSON, no additional text.<|im_end|>
<|im_start|>user
{command}<|im_end|>
<|im_start|>assistant
''',
'instruct': '''[INST] You are an MCP assistant. Respond with ONLY JSON format:
{"tool": "tool_name", "args": {}}
Available tools:
- get_game_state(): Get current game state
- move_units(unit_ids, target_x, target_y): Move units to coordinates
- attack_unit(attacker_ids, target_id): Attack a specific unit
- build_building(building_type, position_x, position_y): Construct a building
User command: {command}
Respond with only JSON: [/INST]
''',
'code': '''// MCP assistant for RTS game
// Tools: get_game_state(), move_units(), attack_unit(), build_building()
// Command: {command}
// Output JSON only:
{"tool": "tool_name", "args": {}}'''
}
test_command = "show game state"
for model in models:
print(f"\n🧪 TEST: {model['name']}")
print("-" * 50)
if not os.path.exists(model['path']):
print(f" ❌ Fichier non trouvé: {model['path']}")
continue
file_size_mb = os.path.getsize(model['path']) / (1024*1024)
print(f" 📏 Taille: {file_size_mb:.1f} MB")
print(f" 🔧 Format: {model['format']}")
print(f" 🧠 Contexte: {model['n_ctx']} tokens")
try:
from llama_cpp import Llama
llm = Llama(
model_path=model['path'],
n_ctx=model['n_ctx'],
n_threads=1,
verbose=False,
n_gpu_layers=0
)
prompt = prompt_templates[model['format']].format(command=test_command)
print(f" 📝 Prompt (début): {prompt[:80]}...")
start_time = time.time()
try:
response = llm(
prompt,
max_tokens=100,
temperature=0.3,
stop=["</s>", "<|im_end|>", "```", "\n\n"]
)
response_time = time.time() - start_time
# Gérer la réponse (compatible avec différentes versions de llama-cpp)
if isinstance(response, dict) and 'choices' in response:
response_text = response['choices'][0]['text'].strip()
elif hasattr(response, '__iter__') and not isinstance(response, str):
# Gérer les réponses stream
response_text = ""
for chunk in response:
if isinstance(chunk, dict) and 'choices' in chunk:
response_text += chunk['choices'][0]['text']
else:
response_text = str(response)
print(f" ⏱️ Temps: {response_time:.2f}s")
print(f" 📄 Réponse: {response_text[:200]}")
# Vérifier JSON
try:
parsed = json.loads(response_text)
print(f" ✅ JSON VALIDE: {json.dumps(parsed, indent=2)}")
# Évaluer la pertinence
if 'tool' in parsed:
print(f" 🎯 Outil identifié: {parsed['tool']}")
if 'args' in parsed:
print(f" 📋 Arguments: {parsed['args']}")
except json.JSONDecodeError:
print(f" ❌ JSON INVALIDE")
# Essayer d'extraire JSON
import re
json_match = re.search(r'\{[^{}]*\{[^{}]*\}[^{}]*\}|\{[^{}]*\}', response_text)
if json_match:
try:
extracted_json = json.loads(json_match.group())
print(f" 🔍 JSON extrait: {json.dumps(extracted_json, indent=2)}")
except:
print(f" ❌ Impossible d'extraire JSON valide")
except Exception as e:
print(f" ❌ Erreur génération: {e}")
except Exception as e:
print(f" ❌ Erreur chargement: {e}")
print(f"\n{'='*70}")
print("🎯 COMPARAISON AVEC ANCIENNES VERSIONS")
print("=" * 70)
print("\n📊 RÉSULTATS ANCIENNES VERSIONS:")
print(" • MCP-Instruct-v1 (Q4_K_M): Erreur technique (llama_decode=-1)")
print(" • MCPR L-3B-Exa (Q2_K): Texte corrompu avec caractères spéciaux")
print(" • Gemma-3n-E2B-it (IQ2_XXS): Réponses vides, pas de sortie")
print("\n📈 ATTENTES POUR Q8_0:")
print(" • Meilleure qualité de quantisation")
print(" • Moins de corruption de texte")
print(" • Réponses plus cohérentes")
print(" • JSON valide possible")
if __name__ == "__main__":
test_q8_models()