File size: 6,484 Bytes
d28c36c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#!/usr/bin/env python3
"""
Test des modèles Q8_0 pour voir si la meilleure quantisation résout les problèmes
"""

import sys
import os
import time
import json

def test_q8_models():
    """Tester les modèles Q8_0 avec différents formats de prompt"""
    
    print("🚀 TEST DES MODÈLES Q8_0 - FORMATS AMÉLIORÉS")
    print("=" * 70)
    
    # Modèles Q8_0 téléchargés
    models = [
        {
            'name': 'MCP-Instruct-v1-Q8',
            'path': 'mcp-instruct-v1.Q8_0.gguf',
            'format': 'mcp_structured',
            'n_ctx': 4096
        },
        {
            'name': 'MCPR-L-3B-Exa-Q8', 
            'path': 'mcprl-3b-exa.Q8_0.gguf',
            'format': 'instruct',
            'n_ctx': 4096
        },
        {
            'name': 'Gemma-3n-E2B-it-Q8',
            'path': 'google_gemma-3n-E2B-it-Q8_0.gguf',
            'format': 'code',
            'n_ctx': 4096
        }
    ]
    
    # Formats de prompts optimisés
    prompt_templates = {
        'mcp_structured': '''<|im_start|>system
You are an MCP assistant for an RTS game. Respond with ONLY JSON format:
{"tool": "tool_name", "args": {}}

Available tools:
- get_game_state(): Get current game state
- move_units(unit_ids, target_x, target_y): Move units to coordinates
- attack_unit(attacker_ids, target_id): Attack a specific unit
- build_building(building_type, position_x, position_y): Construct a building

Always respond with valid JSON, no additional text.<|im_end|>
<|im_start|>user
{command}<|im_end|>
<|im_start|>assistant
''',
        
        'instruct': '''[INST] You are an MCP assistant. Respond with ONLY JSON format:
{"tool": "tool_name", "args": {}}

Available tools:
- get_game_state(): Get current game state
- move_units(unit_ids, target_x, target_y): Move units to coordinates
- attack_unit(attacker_ids, target_id): Attack a specific unit
- build_building(building_type, position_x, position_y): Construct a building

User command: {command}

Respond with only JSON: [/INST]
''',
        
        'code': '''// MCP assistant for RTS game
// Tools: get_game_state(), move_units(), attack_unit(), build_building()
// Command: {command}
// Output JSON only:
{"tool": "tool_name", "args": {}}'''
    }
    
    test_command = "show game state"
    
    for model in models:
        print(f"\n🧪 TEST: {model['name']}")
        print("-" * 50)
        
        if not os.path.exists(model['path']):
            print(f"   ❌ Fichier non trouvé: {model['path']}")
            continue
        
        file_size_mb = os.path.getsize(model['path']) / (1024*1024)
        print(f"   📏 Taille: {file_size_mb:.1f} MB")
        print(f"   🔧 Format: {model['format']}")
        print(f"   🧠 Contexte: {model['n_ctx']} tokens")
        
        try:
            from llama_cpp import Llama
            
            llm = Llama(
                model_path=model['path'],
                n_ctx=model['n_ctx'],
                n_threads=1,
                verbose=False,
                n_gpu_layers=0
            )
            
            prompt = prompt_templates[model['format']].format(command=test_command)
            
            print(f"   📝 Prompt (début): {prompt[:80]}...")
            
            start_time = time.time()
            
            try:
                response = llm(
                    prompt,
                    max_tokens=100,
                    temperature=0.3,
                    stop=["</s>", "<|im_end|>", "```", "\n\n"]
                )
                
                response_time = time.time() - start_time
                
                # Gérer la réponse (compatible avec différentes versions de llama-cpp)
                if isinstance(response, dict) and 'choices' in response:
                    response_text = response['choices'][0]['text'].strip()
                elif hasattr(response, '__iter__') and not isinstance(response, str):
                    # Gérer les réponses stream
                    response_text = ""
                    for chunk in response:
                        if isinstance(chunk, dict) and 'choices' in chunk:
                            response_text += chunk['choices'][0]['text']
                else:
                    response_text = str(response)
                
                print(f"   ⏱️  Temps: {response_time:.2f}s")
                print(f"   📄 Réponse: {response_text[:200]}")
                
                # Vérifier JSON
                try:
                    parsed = json.loads(response_text)
                    print(f"   ✅ JSON VALIDE: {json.dumps(parsed, indent=2)}")
                    
                    # Évaluer la pertinence
                    if 'tool' in parsed:
                        print(f"   🎯 Outil identifié: {parsed['tool']}")
                    if 'args' in parsed:
                        print(f"   📋 Arguments: {parsed['args']}")
                        
                except json.JSONDecodeError:
                    print(f"   ❌ JSON INVALIDE")
                    # Essayer d'extraire JSON
                    import re
                    json_match = re.search(r'\{[^{}]*\{[^{}]*\}[^{}]*\}|\{[^{}]*\}', response_text)
                    if json_match:
                        try:
                            extracted_json = json.loads(json_match.group())
                            print(f"   🔍 JSON extrait: {json.dumps(extracted_json, indent=2)}")
                        except:
                            print(f"   ❌ Impossible d'extraire JSON valide")
                            
            except Exception as e:
                print(f"   ❌ Erreur génération: {e}")
                
        except Exception as e:
            print(f"   ❌ Erreur chargement: {e}")
    
    print(f"\n{'='*70}")
    print("🎯 COMPARAISON AVEC ANCIENNES VERSIONS")
    print("=" * 70)
    
    print("\n📊 RÉSULTATS ANCIENNES VERSIONS:")
    print("   • MCP-Instruct-v1 (Q4_K_M): Erreur technique (llama_decode=-1)")
    print("   • MCPR L-3B-Exa (Q2_K): Texte corrompu avec caractères spéciaux")
    print("   • Gemma-3n-E2B-it (IQ2_XXS): Réponses vides, pas de sortie")
    
    print("\n📈 ATTENTES POUR Q8_0:")
    print("   • Meilleure qualité de quantisation")
    print("   • Moins de corruption de texte")
    print("   • Réponses plus cohérentes")
    print("   • JSON valide possible")

if __name__ == "__main__":
    test_q8_models()