Spaces:

Luigi
/

rts-commander

Sleeping

App Files Files Community

rts-commander / tests /scripts /investigate_mcp_failures.py

Luigi

Organize project structure: move test scripts to tests/scripts and documentation to docs/reports

d28c36c about 1 month ago

raw

history blame contribute delete

6.85 kB

	"""
	Investigation détaillée des échecs des modèles spécialisés MCP
	Analyse pourquoi MCP-Instruct-v1, Gemma-3n-E2B-it et MCPR L-3B-Exa échouent
	"""

	import sys
	import os
	import json
	import time
	from llama_cpp import Llama

	def analyze_model_failures():
	"""Analyser en détail les échecs des modèles MCP"""

	print("🔍 INVESTIGATION DÉTAILLÉE DES ÉCHECS MCP")
	print("=" * 70)

	# Modèles problématiques avec analyse détaillée
	problematic_models = [
	{
	'name': 'MCP-Instruct-v1',
	'path': 'mcp-instruct-v1.Q4_K_M.gguf',
	'size_mb': 697.0,
	'issues': ['llama_decode returned -1'],
	'hypothesis': 'Incompatibilité technique avec llama-cpp-python'
	},
	{
	'name': 'MCPR L-3B-Exa',
	'path': 'mcprl-3b-exa.Q2_K.gguf',
	'size_mb': 1215.7,
	'issues': ['texte corrompu', 'caractères spéciaux', 'sortie incohérente'],
	'hypothesis': 'Quantisation Q2_K trop agressive pour ce modèle'
	},
	{
	'name': 'Gemma-3n-E2B-it',
	'path': 'gemma-3n-E2B-it-UD-IQ2_XXS.gguf',
	'size_mb': 1958.3,
	'issues': ['réponses vides', 'pas de sortie'],
	'hypothesis': 'Format de prompt incompatible avec le modèle'
	}
	]

	# Analyse technique détaillée
	print("\n📊 ANALYSE TECHNIQUE DÉTAILLÉE:")

	for model in problematic_models:
	print(f"\n🧩 {model['name']}:")
	print(f" 📏 Taille: {model['size_mb']:.1f} MB")
	print(f" 🔧 Problèmes: {', '.join(model['issues'])}")
	print(f" 💭 Hypothèse: {model['hypothesis']}")

	if not os.path.exists(model['path']):
	print(f" ❌ Fichier non trouvé")
	continue

	# Test de chargement basique
	print(f" 🔄 Test de chargement...")
	try:
	llm = Llama(
	model_path=model['path'],
	n_ctx=2048,
	n_threads=1,
	verbose=False,
	n_gpu_layers=0
	)
	print(f" ✅ Chargement réussi")

	# Test de génération très simple
	print(f" 🧪 Test de génération basique...")
	try:
	response = llm(
	"Hello",
	max_tokens=10,
	temperature=0.1
	)

	# Extraire la réponse correctement
	response_text = str(response)
	if hasattr(response, 'choices') and response.choices:
	response_text = response.choices[0].text if hasattr(response.choices[0], 'text') else str(response.choices[0])
	elif isinstance(response, dict) and 'choices' in response:
	choice = response['choices'][0]
	response_text = choice.get('text', str(choice))

	print(f" 📝 Réponse: '{response_text.strip()}'")

	except Exception as e:
	print(f" ❌ Erreur génération: {e}")

	except Exception as e:
	print(f" ❌ Erreur chargement: {e}")

	# Analyse comparative avec modèle qui fonctionne
	print(f"\n🔍 COMPARAISON AVEC MODÈLE FONCTIONNEL:")

	working_model = 'qwen2.5-coder-0.5b-instruct-q4_0.gguf'
	if os.path.exists(working_model):
	print(f"\n✅ Qwen2.5-Coder-0.5B (fonctionne):")

	try:
	llm = Llama(
	model_path=working_model,
	n_ctx=1024,
	n_threads=1,
	verbose=False,
	n_gpu_layers=0
	)

	# Test avec le même prompt MCP
	prompt = '''You are an AI assistant for an RTS game using MCP (Model Context Protocol).

	Available tools:
	- get_game_state()
	- move_units(unit_ids, target_x, target_y)
	- attack_unit(attacker_ids, target_id)
	- build_building(building_type, position_x, position_y)

	User command: "show game state"

	Respond with JSON only: {"tool": "tool_name", "args": {}}}'''

	response = llm(
	prompt,
	max_tokens=50,
	temperature=0.1
	)

	# Extraire la réponse correctement
	response_text = str(response)
	if hasattr(response, 'choices') and response.choices:
	response_text = response.choices[0].text if hasattr(response.choices[0], 'text') else str(response.choices[0])
	elif isinstance(response, dict) and 'choices' in response:
	choice = response['choices'][0]
	response_text = choice.get('text', str(choice))

	print(f" 📝 Réponse: {response_text[:100]}...")

	# Vérifier JSON
	try:
	json.loads(response_text)
	print(f" ✅ JSON valide")
	except:
	print(f" ❌ JSON invalide")

	except Exception as e:
	print(f" ❌ Erreur: {e}")

	# Conclusions et recommandations
	print(f"\n🎯 CONCLUSIONS ET RECOMMANDATIONS:")
	print("=" * 70)

	print("\n1. MCP-INSTRUCT-V1 (Q4_K_M):")
	print(" ❌ Problème: Erreur technique 'llama_decode returned -1'")
	print(" 💡 Solution: Essayer version Q8_0 ou vérifier compatibilité llama-cpp-python")

	print("\n2. MCPR L-3B-EXA (Q2_K):")
	print(" ❌ Problème: Texte corrompu avec caractères spéciaux")
	print(" 💡 Solution: Quantisation Q2_K trop agressive, essayer Q4_K_M ou Q8_0")

	print("\n3. GEMMA-3N-E2B-IT (IQ2_XXS):")
	print(" ❌ Problème: Réponses vides, modèle ne répond pas")
	print(" 💡 Solution: Quantisation IQ2_XXS extrême, essayer version moins compressée")

	print("\n4. POURQUOI LES MODÈLES CODE FONCTIONNENT MIEUX:")
	print(" ✅ Habitués au format JSON et aux structures de données")
	print(" ✅ Meilleure compréhension des formats structurés")
	print(" ✅ Formation sur du code et des données techniques")

	print("\n🚀 PLAN D'ACTION:")
	print(" 1. Télécharger versions Q8_0 des modèles problématiques")
	print(" 2. Tester avec formats de prompts MCP spécifiques")
	print(" 3. Augmenter contexte (n_ctx) pour modèles plus grands")
	print(" 4. Utiliser température légèrement plus élevée (0.3)")

	print(f"\n📋 MODÈLES À TÉLÉCHARGER:")
	print(" • mcp-instruct-v1.Q8_0.gguf")
	print(" • mcprl-3b-exa.Q8_0.gguf")
	print(" • google_gemma-3n-E2B-it-Q8_0.gguf")

	if __name__ == "__main__":
	analyze_model_failures()