"""
Investigation détaillée des échecs des modèles spécialisés MCP
Analyse pourquoi MCP-Instruct-v1, Gemma-3n-E2B-it et MCPR L-3B-Exa échouent
"""

import sys
import os
import json
import time
from llama_cpp import Llama

def analyze_model_failures():
    """Analyser en détail les échecs des modèles MCP"""
    
    print("🔍 INVESTIGATION DÉTAILLÉE DES ÉCHECS MCP")
    print("=" * 70)
    
    # Modèles problématiques avec analyse détaillée
    problematic_models = [
        {
            'name': 'MCP-Instruct-v1',
            'path': 'mcp-instruct-v1.Q4_K_M.gguf',
            'size_mb': 697.0,
            'issues': ['llama_decode returned -1'],
            'hypothesis': 'Incompatibilité technique avec llama-cpp-python'
        },
        {
            'name': 'MCPR L-3B-Exa', 
            'path': 'mcprl-3b-exa.Q2_K.gguf',
            'size_mb': 1215.7,
            'issues': ['texte corrompu', 'caractères spéciaux', 'sortie incohérente'],
            'hypothesis': 'Quantisation Q2_K trop agressive pour ce modèle'
        },
        {
            'name': 'Gemma-3n-E2B-it',
            'path': 'gemma-3n-E2B-it-UD-IQ2_XXS.gguf',
            'size_mb': 1958.3,
            'issues': ['réponses vides', 'pas de sortie'],
            'hypothesis': 'Format de prompt incompatible avec le modèle'
        }
    ]
    
    # Analyse technique détaillée
    print("\n📊 ANALYSE TECHNIQUE DÉTAILLÉE:")
    
    for model in problematic_models:
        print(f"\n🧩 {model['name']}:")
        print(f"   📏 Taille: {model['size_mb']:.1f} MB")
        print(f"   🔧 Problèmes: {', '.join(model['issues'])}")
        print(f"   💭 Hypothèse: {model['hypothesis']}")
        
        if not os.path.exists(model['path']):
            print(f"   ❌ Fichier non trouvé")
            continue
            
        # Test de chargement basique
        print(f"   🔄 Test de chargement...")
        try:
            llm = Llama(
                model_path=model['path'],
                n_ctx=2048,
                n_threads=1,
                verbose=False,
                n_gpu_layers=0
            )
            print(f"   ✅ Chargement réussi")
            
            # Test de génération très simple
            print(f"   🧪 Test de génération basique...")
            try:
                response = llm(
                    "Hello",
                    max_tokens=10,
                    temperature=0.1
                )
                
                # Extraire la réponse correctement
                response_text = str(response)
                if hasattr(response, 'choices') and response.choices:
                    response_text = response.choices[0].text if hasattr(response.choices[0], 'text') else str(response.choices[0])
                elif isinstance(response, dict) and 'choices' in response:
                    choice = response['choices'][0]
                    response_text = choice.get('text', str(choice))
                
                print(f"   📝 Réponse: '{response_text.strip()}'")
                
            except Exception as e:
                print(f"   ❌ Erreur génération: {e}")
                
        except Exception as e:
            print(f"   ❌ Erreur chargement: {e}")
    
    # Analyse comparative avec modèle qui fonctionne
    print(f"\n🔍 COMPARAISON AVEC MODÈLE FONCTIONNEL:")
    
    working_model = 'qwen2.5-coder-0.5b-instruct-q4_0.gguf'
    if os.path.exists(working_model):
        print(f"\n✅ Qwen2.5-Coder-0.5B (fonctionne):")
        
        try:
            llm = Llama(
                model_path=working_model,
                n_ctx=1024,
                n_threads=1,
                verbose=False,
                n_gpu_layers=0
            )
            
            # Test avec le même prompt MCP
            prompt = '''You are an AI assistant for an RTS game using MCP (Model Context Protocol).

Available tools:
- get_game_state()
- move_units(unit_ids, target_x, target_y)
- attack_unit(attacker_ids, target_id)
- build_building(building_type, position_x, position_y)

User command: "show game state"

Respond with JSON only: {"tool": "tool_name", "args": {}}}'''
            
            response = llm(
                prompt,
                max_tokens=50,
                temperature=0.1
            )
            
            # Extraire la réponse correctement
            response_text = str(response)
            if hasattr(response, 'choices') and response.choices:
                response_text = response.choices[0].text if hasattr(response.choices[0], 'text') else str(response.choices[0])
            elif isinstance(response, dict) and 'choices' in response:
                choice = response['choices'][0]
                response_text = choice.get('text', str(choice))
            
            print(f"   📝 Réponse: {response_text[:100]}...")
            
            # Vérifier JSON
            try:
                json.loads(response_text)
                print(f"   ✅ JSON valide")
            except:
                print(f"   ❌ JSON invalide")
                
        except Exception as e:
            print(f"   ❌ Erreur: {e}")
    
    # Conclusions et recommandations
    print(f"\n🎯 CONCLUSIONS ET RECOMMANDATIONS:")
    print("=" * 70)
    
    print("\n1. MCP-INSTRUCT-V1 (Q4_K_M):")
    print("   ❌ Problème: Erreur technique 'llama_decode returned -1'")
    print("   💡 Solution: Essayer version Q8_0 ou vérifier compatibilité llama-cpp-python")
    
    print("\n2. MCPR L-3B-EXA (Q2_K):")
    print("   ❌ Problème: Texte corrompu avec caractères spéciaux")
    print("   💡 Solution: Quantisation Q2_K trop agressive, essayer Q4_K_M ou Q8_0")
    
    print("\n3. GEMMA-3N-E2B-IT (IQ2_XXS):")
    print("   ❌ Problème: Réponses vides, modèle ne répond pas")
    print("   💡 Solution: Quantisation IQ2_XXS extrême, essayer version moins compressée")
    
    print("\n4. POURQUOI LES MODÈLES CODE FONCTIONNENT MIEUX:")
    print("   ✅ Habitués au format JSON et aux structures de données")
    print("   ✅ Meilleure compréhension des formats structurés")
    print("   ✅ Formation sur du code et des données techniques")
    
    print("\n🚀 PLAN D'ACTION:")
    print("   1. Télécharger versions Q8_0 des modèles problématiques")
    print("   2. Tester avec formats de prompts MCP spécifiques")
    print("   3. Augmenter contexte (n_ctx) pour modèles plus grands")
    print("   4. Utiliser température légèrement plus élevée (0.3)")
    
    print(f"\n📋 MODÈLES À TÉLÉCHARGER:")
    print("   • mcp-instruct-v1.Q8_0.gguf")
    print("   • mcprl-3b-exa.Q8_0.gguf") 
    print("   • google_gemma-3n-E2B-it-Q8_0.gguf")

if __name__ == "__main__":
    analyze_model_failures()