"""
Diagnostic des échecs des modèles spécialisés MCP
Analyse pourquoi les modèles plus grands et spécialisés échouent
"""

import sys
import os

def diagnose_model_failures():
    """Diagnostiquer les problèmes avec les modèles spécialisés MCP"""
    
    print("🔍 DIAGNOSTIC DES ÉCHECS DES MODÈLES SPÉCIALISÉS MCP")
    print("=" * 70)
    
    # Modèles problématiques
    problematic_models = [
        {
            'name': 'MCP-Instruct-v1',
            'path': 'mcp-instruct-v1.Q4_K_M.gguf',
            'issues': ['llama_decode returned -1']
        },
        {
            'name': 'MCPR L-3B-Exa', 
            'path': 'mcprl-3b-exa.Q2_K.gguf',
            'issues': ['texte corrompu', 'caractères spéciaux', 'sortie incohérente']
        },
        {
            'name': 'Gemma-3n-E2B-it',
            'path': 'gemma-3n-E2B-it-UD-IQ2_XXS.gguf',
            'issues': ['réponses vides', 'pas de sortie']
        }
    ]
    
    # Analyse des problèmes potentiels
    print("\n📋 ANALYSE DES PROBLÈMES IDENTIFIÉS:")
    
    for model in problematic_models:
        print(f"\n🧩 {model['name']}:")
        
        if not os.path.exists(model['path']):
            print(f"   ❌ Fichier modèle non trouvé: {model['path']}")
            continue
            
        file_size_mb = os.path.getsize(model['path']) / (1024*1024)
        print(f"   📏 Taille: {file_size_mb:.1f} MB")
        
        print(f"   🔧 Problèmes détectés:")
        for issue in model['issues']:
            print(f"      • {issue}")
    
    # Causes potentielles
    print("\n🔍 CAUSES POTENTIELLES DES ÉCHECS:")
    print("   1. FORMAT DE PROMPT INADÉQUAT")
    print("      • Les modèles spécialisés MCP peuvent attendre un format de prompt différent")
    print("      • Format actuel trop simple vs format de formation MCP spécialisé")
    print("      • Besoin de tokens spéciaux ou de formatage MCP spécifique")
    
    print("\n   2. PROBLÈMES DE CHARGEMENT DU MODÈLE")
    print("      • llama_decode=-1: Erreur de décodeur LLAMA")
    print("      • Modèles potentiellement incompatibles avec llama-cpp-python")
    print("      • Paramètres d'initialisation incorrects")
    
    print("\n   3. CONTEXTE ET TOKENS SPÉCIAUX")
    print("      • Les modèles MCP peuvent nécessiter des tokens de début/fin spécifiques")
    print("      • Format d'entrée différent de l'attente du modèle")
    print("      • Besoin de format de conversation MCP structuré")
    
    print("\n   4. QUANTISATION PROBLÉMATIQUE")
    print("      • Q2_K pour MCPR L-3B-Exa peut être trop agressif")
    print("      • IQ2_XXS pour Gemma-3n-E2B-it: très forte compression")
    print("      • Perte de capacité avec quantisation extrême")
    
    # Test de diagnostic avec prompts alternatifs
    print("\n🚀 TEST DE DIAGNOSTIC AVEC PROMPTS ALTERNATIFS")
    print("=" * 70)
    
    from llama_cpp import Llama
    
    # Formats de prompt alternatifs pour tester
    prompt_formats = [
        {
            'name': 'Format actuel',
            'template': '''You are an AI assistant for an RTS game using MCP (Model Context Protocol).

Available tools:
- get_game_state()
- move_units(unit_ids, target_x, target_y)
- attack_unit(attacker_ids, target_id)
- build_building(building_type, position_x, position_y)

User command: "{command}"

Respond with JSON only: {{"tool": "tool_name", "args": {{}}}}'''
        },
        {
            'name': 'Format MCP structuré',
            'template': '''<|im_start|>system
You are an AI assistant for an RTS game using the Model Context Protocol (MCP). You must respond with only JSON following this exact format:
{{"tool": "tool_name", "args": {{<arguments>}}}}

Available tools:
- get_game_state(): Get current game state
- move_units(unit_ids, target_x, target_y): Move units to coordinates
- attack_unit(attacker_ids, target_id): Attack a specific unit  
- build_building(building_type, position_x, position_y): Construct a building

Always respond with valid JSON, no additional text.<|im_end|>
<|im_start|>user
{command}<|im_end|>
<|im_start|>assistant
'''
        },
        {
            'name': 'Format simple JSON',
            'template': '''User: {command}

Tools: get_game_state, move_units, attack_unit, build_building

Response (JSON only):'''
        }
    ]
    
    # Tester avec MCPR L-3B-Exa (le modèle qui sort du texte corrompu)
    test_model = 'mcprl-3b-exa.Q2_K.gguf'
    
    if os.path.exists(test_model):
        print(f"\n🧪 Test avec {test_model}...")
        
        try:
            llm = Llama(
                model_path=test_model,
                n_ctx=1024,
                n_threads=1,
                verbose=False,
                n_gpu_layers=0
            )
            
            test_command = "show game state"
            
            for format_info in prompt_formats:
                print(f"\n   📝 Format: {format_info['name']}")
                
                prompt = format_info['template'].format(command=test_command)
                
                try:
                    response = llm(
                        prompt,
                        max_tokens=50,
                        temperature=0.1,
                        stop=["</s>", "<|im_end|>", "```"]
                    )
                    
                    response_text = response['choices'][0]['text'].strip()
                    
                    print(f"      Prompt (premiers 100 caractères): {prompt[:100]}...")
                    print(f"      Réponse: {response_text[:200]}")
                    
                    # Vérifier si c'est du JSON valide
                    try:
                        import json
                        json.loads(response_text)
                        print(f"      ✅ JSON VALIDE!")
                    except:
                        print(f"      ❌ JSON INVALIDE")
                        
                except Exception as e:
                    print(f"      ❌ Erreur: {e}")
                    
        except Exception as e:
            print(f"   ❌ Impossible de charger le modèle: {e}")
    
    # Recommandations
    print("\n💡 RECOMMANDATIONS POUR CORRIGER:")
    print("   1. Essayer différentes versions de quantisation (Q4_K_M, Q8_0)")
    print("   2. Utiliser des formats de prompt structurés avec tokens MCP")
    print("   3. Augmenter la température (0.3-0.5) pour plus de variabilité")
    print("   4. Tester avec plus de tokens max (200-300)")
    print("   5. Vérifier la compatibilité avec llama-cpp-python")
    print("   6. Utiliser des modèles de taille similaire mais avec moins de compression")

if __name__ == "__main__":
    diagnose_model_failures()