Spaces:
Sleeping
Sleeping
File size: 6,484 Bytes
d28c36c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
#!/usr/bin/env python3
"""
Test des modèles Q8_0 pour voir si la meilleure quantisation résout les problèmes
"""
import sys
import os
import time
import json
def test_q8_models():
"""Tester les modèles Q8_0 avec différents formats de prompt"""
print("🚀 TEST DES MODÈLES Q8_0 - FORMATS AMÉLIORÉS")
print("=" * 70)
# Modèles Q8_0 téléchargés
models = [
{
'name': 'MCP-Instruct-v1-Q8',
'path': 'mcp-instruct-v1.Q8_0.gguf',
'format': 'mcp_structured',
'n_ctx': 4096
},
{
'name': 'MCPR-L-3B-Exa-Q8',
'path': 'mcprl-3b-exa.Q8_0.gguf',
'format': 'instruct',
'n_ctx': 4096
},
{
'name': 'Gemma-3n-E2B-it-Q8',
'path': 'google_gemma-3n-E2B-it-Q8_0.gguf',
'format': 'code',
'n_ctx': 4096
}
]
# Formats de prompts optimisés
prompt_templates = {
'mcp_structured': '''<|im_start|>system
You are an MCP assistant for an RTS game. Respond with ONLY JSON format:
{"tool": "tool_name", "args": {}}
Available tools:
- get_game_state(): Get current game state
- move_units(unit_ids, target_x, target_y): Move units to coordinates
- attack_unit(attacker_ids, target_id): Attack a specific unit
- build_building(building_type, position_x, position_y): Construct a building
Always respond with valid JSON, no additional text.<|im_end|>
<|im_start|>user
{command}<|im_end|>
<|im_start|>assistant
''',
'instruct': '''[INST] You are an MCP assistant. Respond with ONLY JSON format:
{"tool": "tool_name", "args": {}}
Available tools:
- get_game_state(): Get current game state
- move_units(unit_ids, target_x, target_y): Move units to coordinates
- attack_unit(attacker_ids, target_id): Attack a specific unit
- build_building(building_type, position_x, position_y): Construct a building
User command: {command}
Respond with only JSON: [/INST]
''',
'code': '''// MCP assistant for RTS game
// Tools: get_game_state(), move_units(), attack_unit(), build_building()
// Command: {command}
// Output JSON only:
{"tool": "tool_name", "args": {}}'''
}
test_command = "show game state"
for model in models:
print(f"\n🧪 TEST: {model['name']}")
print("-" * 50)
if not os.path.exists(model['path']):
print(f" ❌ Fichier non trouvé: {model['path']}")
continue
file_size_mb = os.path.getsize(model['path']) / (1024*1024)
print(f" 📏 Taille: {file_size_mb:.1f} MB")
print(f" 🔧 Format: {model['format']}")
print(f" 🧠 Contexte: {model['n_ctx']} tokens")
try:
from llama_cpp import Llama
llm = Llama(
model_path=model['path'],
n_ctx=model['n_ctx'],
n_threads=1,
verbose=False,
n_gpu_layers=0
)
prompt = prompt_templates[model['format']].format(command=test_command)
print(f" 📝 Prompt (début): {prompt[:80]}...")
start_time = time.time()
try:
response = llm(
prompt,
max_tokens=100,
temperature=0.3,
stop=["</s>", "<|im_end|>", "```", "\n\n"]
)
response_time = time.time() - start_time
# Gérer la réponse (compatible avec différentes versions de llama-cpp)
if isinstance(response, dict) and 'choices' in response:
response_text = response['choices'][0]['text'].strip()
elif hasattr(response, '__iter__') and not isinstance(response, str):
# Gérer les réponses stream
response_text = ""
for chunk in response:
if isinstance(chunk, dict) and 'choices' in chunk:
response_text += chunk['choices'][0]['text']
else:
response_text = str(response)
print(f" ⏱️ Temps: {response_time:.2f}s")
print(f" 📄 Réponse: {response_text[:200]}")
# Vérifier JSON
try:
parsed = json.loads(response_text)
print(f" ✅ JSON VALIDE: {json.dumps(parsed, indent=2)}")
# Évaluer la pertinence
if 'tool' in parsed:
print(f" 🎯 Outil identifié: {parsed['tool']}")
if 'args' in parsed:
print(f" 📋 Arguments: {parsed['args']}")
except json.JSONDecodeError:
print(f" ❌ JSON INVALIDE")
# Essayer d'extraire JSON
import re
json_match = re.search(r'\{[^{}]*\{[^{}]*\}[^{}]*\}|\{[^{}]*\}', response_text)
if json_match:
try:
extracted_json = json.loads(json_match.group())
print(f" 🔍 JSON extrait: {json.dumps(extracted_json, indent=2)}")
except:
print(f" ❌ Impossible d'extraire JSON valide")
except Exception as e:
print(f" ❌ Erreur génération: {e}")
except Exception as e:
print(f" ❌ Erreur chargement: {e}")
print(f"\n{'='*70}")
print("🎯 COMPARAISON AVEC ANCIENNES VERSIONS")
print("=" * 70)
print("\n📊 RÉSULTATS ANCIENNES VERSIONS:")
print(" • MCP-Instruct-v1 (Q4_K_M): Erreur technique (llama_decode=-1)")
print(" • MCPR L-3B-Exa (Q2_K): Texte corrompu avec caractères spéciaux")
print(" • Gemma-3n-E2B-it (IQ2_XXS): Réponses vides, pas de sortie")
print("\n📈 ATTENTES POUR Q8_0:")
print(" • Meilleure qualité de quantisation")
print(" • Moins de corruption de texte")
print(" • Réponses plus cohérentes")
print(" • JSON valide possible")
if __name__ == "__main__":
test_q8_models() |