Spaces:

Luigi
/

rts-commander

Sleeping

File size: 7,831 Bytes

d28c36c

"""
Test amélioré avec formats de prompts optimisés pour modèles spécialisés MCP
"""

import sys
import os
import json
import time
from llama_cpp import Llama

def test_mcp_formats(model_path, model_name):
    """Tester différents formats de prompt pour les modèles MCP"""
    
    print(f"\n🧪 TEST AVEC FORMATS OPTIMISÉS: {model_name}")
    print("=" * 60)
    
    if not os.path.exists(model_path):
        print(f"❌ Fichier modèle non trouvé: {model_path}")
        return
    
    try:
        # Initialiser avec différents paramètres pour modèles MCP
        llm = Llama(
            model_path=model_path,
            n_ctx=2048,  # Plus grand contexte pour modèles MCP
            n_threads=1,
            verbose=False,
            n_gpu_layers=0,
            temperature=0.3,  # Température légèrement plus élevée
            top_p=0.9
        )
        
        test_command = "show game state"
        
        # Formats de prompt optimisés basés sur les standards MCP
        prompt_formats = [
            {
                'name': 'Format MCP structuré complet',
                'template': '''[INST] You are an MCP (Model Context Protocol) assistant for an RTS game. Your task is to respond ONLY with JSON format.

Available tools:
- get_game_state(): Returns current game state information
- move_units(unit_ids, target_x, target_y): Move specified units to coordinates
- attack_unit(attacker_ids, target_id): Attack target unit with attackers
- build_building(building_type, position_x, position_y): Construct building at position

User command: "{command}"

Respond with ONLY JSON in this format:
{{"tool": "tool_name", "args": {{<parameters>}}}}

Do not include any explanations, comments, or additional text. Only JSON. [/INST]'''
            },
            {
                'name': 'Format MCP instruct simple',
                'template': '''<|system|>
You are an MCP assistant for an RTS game. Respond with ONLY JSON.
Available tools: get_game_state, move_units, attack_unit, build_building
</|system|>
<|user|>
{command}
</|user|>
<|assistant|>'''
            },
            {
                'name': 'Format JSON strict',
                'template': '''Your role: MCP assistant for RTS game
Tools: get_game_state(), move_units(), attack_unit(), build_building()
Command: {command}

JSON Response:'''
            },
            {
                'name': 'Format conversation MCP',
                'template': '''User: {command}

Assistant: {{"tool": "get_game_state", "args": {{}}}}'''
            },
            {
                'name': 'Format code-oriented',
                'template': '''// MCP assistant for RTS game
// Available functions:
// - get_game_state()
// - move_units(unit_ids, target_x, target_y) 
// - attack_unit(attacker_ids, target_id)
// - build_building(building_type, position_x, position_y)

// User: {command}
// Response (JSON only):'''
            }
        ]
        
        for i, format_info in enumerate(prompt_formats, 1):
            print(f"\n{i}. {format_info['name']}:")
            
            prompt = format_info['template'].format(command=test_command)
            
            try:
                # Essayer avec différents paramètres
                response = llm(
                    prompt,
                    max_tokens=100,
                    temperature=0.3,
                    stop=["</s>", "<|im_end|>", "```", "\n\n", "User:"]
                )
                
                # Extraire la réponse selon le format retourné
                if hasattr(response, 'choices') and len(response.choices) > 0:
                    response_text = response.choices[0].text.strip()
                elif isinstance(response, dict) and 'choices' in response:
                    response_text = response['choices'][0]['text'].strip()
                else:
                    response_text = str(response)
                
                print(f"   Prompt (début): {prompt[:80]}...")
                print(f"   Réponse: {response_text[:150]}")
                
                # Vérifier JSON
                try:
                    parsed = json.loads(response_text)
                    print(f"   ✅ JSON VALIDE: {parsed}")
                except json.JSONDecodeError:
                    # Essayer d'extraire JSON
                    import re
                    json_match = re.search(r'\{[^{}]*\{[^{}]*\}[^{}]*\}|\{[^{}]*\}', response_text)
                    if json_match:
                        try:
                            parsed = json.loads(json_match.group())
                            print(f"   ✅ JSON EXTRACTÉ: {parsed}")
                        except:
                            print(f"   ❌ JSON INVALIDE")
                    else:
                        print(f"   ❌ PAS DE JSON DÉTECTÉ")
                        
            except Exception as e:
                print(f"   ❌ Erreur: {e}")
                
    except Exception as e:
        print(f"❌ Erreur de chargement: {e}")

def test_model_capabilities(model_path, model_name):
    """Tester les capacités de base du modèle"""
    
    print(f"\n🔍 TEST DE CAPACITÉS: {model_name}")
    print("=" * 60)
    
    if not os.path.exists(model_path):
        print(f"❌ Fichier modèle non trouvé: {model_path}")
        return
    
    try:
        llm = Llama(
            model_path=model_path,
            n_ctx=2048,
            n_threads=1,
            verbose=False,
            n_gpu_layers=0
        )
        
        # Test de génération de base
        test_prompts = [
            "Hello",
            "What is your name?",
            "Write JSON",
            "{\"test\": \"value\"}"
        ]
        
        for prompt in test_prompts:
            try:
                response = llm(
                    prompt,
                    max_tokens=20,
                    temperature=0.1
                )
                response_text = response['choices'][0]['text'].strip()
                print(f"   '{prompt}' → '{response_text}'")
            except Exception as e:
                print(f"   '{prompt}' → Erreur: {e}")
                
    except Exception as e:
        print(f"❌ Erreur de chargement: {e}")

def main():
    """Test des modèles problématiques avec formats améliorés"""
    
    print("🚀 TEST AMÉLIORÉ DES MODÈLES SPÉCIALISÉS MCP")
    print("=" * 70)
    
    models_to_test = [
        {
            'name': 'MCP-Instruct-v1',
            'path': 'mcp-instruct-v1.Q4_K_M.gguf',
            'type': 'mcp_specialized'
        },
        {
            'name': 'MCPR L-3B-Exa', 
            'path': 'mcprl-3b-exa.Q2_K.gguf',
            'type': 'mcp_specialized'
        },
        {
            'name': 'Gemma-3n-E2B-it',
            'path': 'gemma-3n-E2B-it-UD-IQ2_XXS.gguf',
            'type': 'mcp_specialized'
        },
        {
            'name': 'Qwen2.5-Coder-0.5B (référence)',
            'path': 'qwen2.5-coder-0.5b-instruct-q4_0.gguf',
            'type': 'code_specialized'
        }
    ]
    
    for model in models_to_test:
        if os.path.exists(model['path']):
            test_model_capabilities(model['path'], model['name'])
            test_mcp_formats(model['path'], model['name'])
        else:
            print(f"\n❌ Modèle non trouvé: {model['path']}")
    
    print("\n" + "=" * 70)
    print("📊 RÉCAPITULATIF DES PROBLÈMES:")
    print("""
1. **MCP-Instruct-v1**: Erreur technique (llama_decode=-1) - probablement incompatible
2. **MCPR L-3B-Exa**: Génération de texte corrompu - mauvaise quantisation ou format
3. **Gemma-3n-E2B-it**: Réponses vides - format de prompt incompatible
4. **Modèles code-spécialisés**: Fonctionnent mieux car habitués au JSON
    """)

if __name__ == "__main__":
    main()