rts-commander / tests /scripts /test_improved_mcp_formats.py
Luigi's picture
Organize project structure: move test scripts to tests/scripts and documentation to docs/reports
d28c36c
"""
Test amélioré avec formats de prompts optimisés pour modèles spécialisés MCP
"""
import sys
import os
import json
import time
from llama_cpp import Llama
def test_mcp_formats(model_path, model_name):
"""Tester différents formats de prompt pour les modèles MCP"""
print(f"\n🧪 TEST AVEC FORMATS OPTIMISÉS: {model_name}")
print("=" * 60)
if not os.path.exists(model_path):
print(f"❌ Fichier modèle non trouvé: {model_path}")
return
try:
# Initialiser avec différents paramètres pour modèles MCP
llm = Llama(
model_path=model_path,
n_ctx=2048, # Plus grand contexte pour modèles MCP
n_threads=1,
verbose=False,
n_gpu_layers=0,
temperature=0.3, # Température légèrement plus élevée
top_p=0.9
)
test_command = "show game state"
# Formats de prompt optimisés basés sur les standards MCP
prompt_formats = [
{
'name': 'Format MCP structuré complet',
'template': '''[INST] You are an MCP (Model Context Protocol) assistant for an RTS game. Your task is to respond ONLY with JSON format.
Available tools:
- get_game_state(): Returns current game state information
- move_units(unit_ids, target_x, target_y): Move specified units to coordinates
- attack_unit(attacker_ids, target_id): Attack target unit with attackers
- build_building(building_type, position_x, position_y): Construct building at position
User command: "{command}"
Respond with ONLY JSON in this format:
{{"tool": "tool_name", "args": {{<parameters>}}}}
Do not include any explanations, comments, or additional text. Only JSON. [/INST]'''
},
{
'name': 'Format MCP instruct simple',
'template': '''<|system|>
You are an MCP assistant for an RTS game. Respond with ONLY JSON.
Available tools: get_game_state, move_units, attack_unit, build_building
</|system|>
<|user|>
{command}
</|user|>
<|assistant|>'''
},
{
'name': 'Format JSON strict',
'template': '''Your role: MCP assistant for RTS game
Tools: get_game_state(), move_units(), attack_unit(), build_building()
Command: {command}
JSON Response:'''
},
{
'name': 'Format conversation MCP',
'template': '''User: {command}
Assistant: {{"tool": "get_game_state", "args": {{}}}}'''
},
{
'name': 'Format code-oriented',
'template': '''// MCP assistant for RTS game
// Available functions:
// - get_game_state()
// - move_units(unit_ids, target_x, target_y)
// - attack_unit(attacker_ids, target_id)
// - build_building(building_type, position_x, position_y)
// User: {command}
// Response (JSON only):'''
}
]
for i, format_info in enumerate(prompt_formats, 1):
print(f"\n{i}. {format_info['name']}:")
prompt = format_info['template'].format(command=test_command)
try:
# Essayer avec différents paramètres
response = llm(
prompt,
max_tokens=100,
temperature=0.3,
stop=["</s>", "<|im_end|>", "```", "\n\n", "User:"]
)
# Extraire la réponse selon le format retourné
if hasattr(response, 'choices') and len(response.choices) > 0:
response_text = response.choices[0].text.strip()
elif isinstance(response, dict) and 'choices' in response:
response_text = response['choices'][0]['text'].strip()
else:
response_text = str(response)
print(f" Prompt (début): {prompt[:80]}...")
print(f" Réponse: {response_text[:150]}")
# Vérifier JSON
try:
parsed = json.loads(response_text)
print(f" ✅ JSON VALIDE: {parsed}")
except json.JSONDecodeError:
# Essayer d'extraire JSON
import re
json_match = re.search(r'\{[^{}]*\{[^{}]*\}[^{}]*\}|\{[^{}]*\}', response_text)
if json_match:
try:
parsed = json.loads(json_match.group())
print(f" ✅ JSON EXTRACTÉ: {parsed}")
except:
print(f" ❌ JSON INVALIDE")
else:
print(f" ❌ PAS DE JSON DÉTECTÉ")
except Exception as e:
print(f" ❌ Erreur: {e}")
except Exception as e:
print(f"❌ Erreur de chargement: {e}")
def test_model_capabilities(model_path, model_name):
"""Tester les capacités de base du modèle"""
print(f"\n🔍 TEST DE CAPACITÉS: {model_name}")
print("=" * 60)
if not os.path.exists(model_path):
print(f"❌ Fichier modèle non trouvé: {model_path}")
return
try:
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=1,
verbose=False,
n_gpu_layers=0
)
# Test de génération de base
test_prompts = [
"Hello",
"What is your name?",
"Write JSON",
"{\"test\": \"value\"}"
]
for prompt in test_prompts:
try:
response = llm(
prompt,
max_tokens=20,
temperature=0.1
)
response_text = response['choices'][0]['text'].strip()
print(f" '{prompt}' → '{response_text}'")
except Exception as e:
print(f" '{prompt}' → Erreur: {e}")
except Exception as e:
print(f"❌ Erreur de chargement: {e}")
def main():
"""Test des modèles problématiques avec formats améliorés"""
print("🚀 TEST AMÉLIORÉ DES MODÈLES SPÉCIALISÉS MCP")
print("=" * 70)
models_to_test = [
{
'name': 'MCP-Instruct-v1',
'path': 'mcp-instruct-v1.Q4_K_M.gguf',
'type': 'mcp_specialized'
},
{
'name': 'MCPR L-3B-Exa',
'path': 'mcprl-3b-exa.Q2_K.gguf',
'type': 'mcp_specialized'
},
{
'name': 'Gemma-3n-E2B-it',
'path': 'gemma-3n-E2B-it-UD-IQ2_XXS.gguf',
'type': 'mcp_specialized'
},
{
'name': 'Qwen2.5-Coder-0.5B (référence)',
'path': 'qwen2.5-coder-0.5b-instruct-q4_0.gguf',
'type': 'code_specialized'
}
]
for model in models_to_test:
if os.path.exists(model['path']):
test_model_capabilities(model['path'], model['name'])
test_mcp_formats(model['path'], model['name'])
else:
print(f"\n❌ Modèle non trouvé: {model['path']}")
print("\n" + "=" * 70)
print("📊 RÉCAPITULATIF DES PROBLÈMES:")
print("""
1. **MCP-Instruct-v1**: Erreur technique (llama_decode=-1) - probablement incompatible
2. **MCPR L-3B-Exa**: Génération de texte corrompu - mauvaise quantisation ou format
3. **Gemma-3n-E2B-it**: Réponses vides - format de prompt incompatible
4. **Modèles code-spécialisés**: Fonctionnent mieux car habitués au JSON
""")
if __name__ == "__main__":
main()