#!/usr/bin/env python3
"""
Test des modèles Q8_0 pour voir si la meilleure quantisation résout les problèmes
"""

import sys
import os
import time
import json

def test_q8_models():
    """Tester les modèles Q8_0 avec différents formats de prompt"""
    
    print("🚀 TEST DES MODÈLES Q8_0 - FORMATS AMÉLIORÉS")
    print("=" * 70)
    
    # Modèles Q8_0 téléchargés
    models = [
        {
            'name': 'MCP-Instruct-v1-Q8',
            'path': 'mcp-instruct-v1.Q8_0.gguf',
            'format': 'mcp_structured',
            'n_ctx': 4096
        },
        {
            'name': 'MCPR-L-3B-Exa-Q8', 
            'path': 'mcprl-3b-exa.Q8_0.gguf',
            'format': 'instruct',
            'n_ctx': 4096
        },
        {
            'name': 'Gemma-3n-E2B-it-Q8',
            'path': 'google_gemma-3n-E2B-it-Q8_0.gguf',
            'format': 'code',
            'n_ctx': 4096
        }
    ]
    
    # Formats de prompts optimisés
    prompt_templates = {
        'mcp_structured': '''<|im_start|>system
You are an MCP assistant for an RTS game. Respond with ONLY JSON format:
{"tool": "tool_name", "args": {}}

Available tools:
- get_game_state(): Get current game state
- move_units(unit_ids, target_x, target_y): Move units to coordinates
- attack_unit(attacker_ids, target_id): Attack a specific unit
- build_building(building_type, position_x, position_y): Construct a building

Always respond with valid JSON, no additional text.<|im_end|>
<|im_start|>user
{command}<|im_end|>
<|im_start|>assistant
''',
        
        'instruct': '''[INST] You are an MCP assistant. Respond with ONLY JSON format:
{"tool": "tool_name", "args": {}}

Available tools:
- get_game_state(): Get current game state
- move_units(unit_ids, target_x, target_y): Move units to coordinates
- attack_unit(attacker_ids, target_id): Attack a specific unit
- build_building(building_type, position_x, position_y): Construct a building

User command: {command}

Respond with only JSON: [/INST]
''',
        
        'code': '''// MCP assistant for RTS game
// Tools: get_game_state(), move_units(), attack_unit(), build_building()
// Command: {command}
// Output JSON only:
{"tool": "tool_name", "args": {}}'''
    }
    
    test_command = "show game state"
    
    for model in models:
        print(f"\n🧪 TEST: {model['name']}")
        print("-" * 50)
        
        if not os.path.exists(model['path']):
            print(f"   ❌ Fichier non trouvé: {model['path']}")
            continue
        
        file_size_mb = os.path.getsize(model['path']) / (1024*1024)
        print(f"   📏 Taille: {file_size_mb:.1f} MB")
        print(f"   🔧 Format: {model['format']}")
        print(f"   🧠 Contexte: {model['n_ctx']} tokens")
        
        try:
            from llama_cpp import Llama
            
            llm = Llama(
                model_path=model['path'],
                n_ctx=model['n_ctx'],
                n_threads=1,
                verbose=False,
                n_gpu_layers=0
            )
            
            prompt = prompt_templates[model['format']].format(command=test_command)
            
            print(f"   📝 Prompt (début): {prompt[:80]}...")
            
            start_time = time.time()
            
            try:
                response = llm(
                    prompt,
                    max_tokens=100,
                    temperature=0.3,
                    stop=["</s>", "<|im_end|>", "```", "\n\n"]
                )
                
                response_time = time.time() - start_time
                
                # Gérer la réponse (compatible avec différentes versions de llama-cpp)
                if isinstance(response, dict) and 'choices' in response:
                    response_text = response['choices'][0]['text'].strip()
                elif hasattr(response, '__iter__') and not isinstance(response, str):
                    # Gérer les réponses stream
                    response_text = ""
                    for chunk in response:
                        if isinstance(chunk, dict) and 'choices' in chunk:
                            response_text += chunk['choices'][0]['text']
                else:
                    response_text = str(response)
                
                print(f"   ⏱️  Temps: {response_time:.2f}s")
                print(f"   📄 Réponse: {response_text[:200]}")
                
                # Vérifier JSON
                try:
                    parsed = json.loads(response_text)
                    print(f"   ✅ JSON VALIDE: {json.dumps(parsed, indent=2)}")
                    
                    # Évaluer la pertinence
                    if 'tool' in parsed:
                        print(f"   🎯 Outil identifié: {parsed['tool']}")
                    if 'args' in parsed:
                        print(f"   📋 Arguments: {parsed['args']}")
                        
                except json.JSONDecodeError:
                    print(f"   ❌ JSON INVALIDE")
                    # Essayer d'extraire JSON
                    import re
                    json_match = re.search(r'\{[^{}]*\{[^{}]*\}[^{}]*\}|\{[^{}]*\}', response_text)
                    if json_match:
                        try:
                            extracted_json = json.loads(json_match.group())
                            print(f"   🔍 JSON extrait: {json.dumps(extracted_json, indent=2)}")
                        except:
                            print(f"   ❌ Impossible d'extraire JSON valide")
                            
            except Exception as e:
                print(f"   ❌ Erreur génération: {e}")
                
        except Exception as e:
            print(f"   ❌ Erreur chargement: {e}")
    
    print(f"\n{'='*70}")
    print("🎯 COMPARAISON AVEC ANCIENNES VERSIONS")
    print("=" * 70)
    
    print("\n📊 RÉSULTATS ANCIENNES VERSIONS:")
    print("   • MCP-Instruct-v1 (Q4_K_M): Erreur technique (llama_decode=-1)")
    print("   • MCPR L-3B-Exa (Q2_K): Texte corrompu avec caractères spéciaux")
    print("   • Gemma-3n-E2B-it (IQ2_XXS): Réponses vides, pas de sortie")
    
    print("\n📈 ATTENTES POUR Q8_0:")
    print("   • Meilleure qualité de quantisation")
    print("   • Moins de corruption de texte")
    print("   • Réponses plus cohérentes")
    print("   • JSON valide possible")

if __name__ == "__main__":
    test_q8_models()