openfree commited on
Commit
011f941
·
verified ·
1 Parent(s): 55a69cd

Create app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +1718 -0
app-backup.py ADDED
@@ -0,0 +1,1718 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import json
4
+ import random
5
+ from datetime import datetime
6
+ import pandas as pd
7
+ from typing import Dict, List, Tuple, Optional, Generator
8
+ import sqlite3
9
+ from dataclasses import dataclass, asdict
10
+ import hashlib
11
+ import time
12
+ from enum import Enum
13
+ import numpy as np
14
+ import threading
15
+ import queue
16
+ import re
17
+
18
+ # For LLM API integration
19
+ try:
20
+ from openai import OpenAI
21
+ except ImportError:
22
+ print("OpenAI library not installed. Install with: pip install openai")
23
+ OpenAI = None
24
+
25
+ try:
26
+ from gradio_client import Client as GradioClient
27
+ except ImportError:
28
+ print("Gradio client not installed. Install with: pip install gradio_client")
29
+ GradioClient = None
30
+
31
+ # For Hugging Face Dataset integration
32
+ try:
33
+ from huggingface_hub import HfApi, login, create_repo, upload_file, hf_hub_download
34
+ from datasets import Dataset, load_dataset
35
+ import pyarrow.parquet as pq
36
+ import pyarrow as pa
37
+ except ImportError:
38
+ print("Hugging Face libraries not installed. Install with: pip install huggingface_hub datasets pyarrow")
39
+ HfApi = None
40
+ Dataset = None
41
+
42
+ # ==================== Configuration ====================
43
+ class Category(Enum):
44
+ STORYTELLING = "storytelling"
45
+ INNOVATION = "innovation"
46
+ BUSINESS = "business"
47
+
48
+ @dataclass
49
+ class Battle:
50
+ id: str
51
+ prompt_id: str
52
+ prompt_text: str
53
+ model_a: str
54
+ model_b: str
55
+ response_a: str
56
+ response_b: str
57
+ winner: Optional[str]
58
+ voter_id: str
59
+ timestamp: datetime
60
+ category: Category
61
+ custom_prompt: bool = False
62
+ language: str = "en"
63
+
64
+ # ==================== Language Configurations ====================
65
+ LANGUAGES = {
66
+ "en": "English",
67
+ "ko": "한국어"
68
+ }
69
+
70
+ UI_TEXT = {
71
+ "en": {
72
+ "title": "🎨 GPT-5 vs jetXA Creativity Battle",
73
+ "subtitle": "Test cutting-edge AI models in creative challenges",
74
+ "battle_tab": "⚔️ Battle Arena",
75
+ "leaderboard_tab": "🏆 Leaderboard",
76
+ "category_label": "Select Category",
77
+ "custom_prompt_label": "✏️ Custom Challenge (Optional)",
78
+ "custom_prompt_placeholder": "Enter your creative challenge for the models...",
79
+ "new_battle_btn": "🎲 Start New Battle",
80
+ "model_a": "### 🅰️ Model A",
81
+ "model_b": "### 🅱️ Model B",
82
+ "vote_a": "🅰️ Model A is more creative",
83
+ "vote_b": "🅱️ Model B is more creative",
84
+ "vote_complete": "### 🎉 Vote Complete!",
85
+ "winner": "Winner",
86
+ "leaderboard_title": "## 🏆 GPT-5 vs jetXA Leaderboard",
87
+ "category_filter": "Category Filter",
88
+ "refresh_btn": "🔄 Refresh",
89
+ "language_label": "Language",
90
+ "contact": "Contact: arxivgpt@gmail.com",
91
+ "challenge_task": "### 📝 Challenge Task",
92
+ "category": "Category",
93
+ "prompt": "Challenge",
94
+ "model_identity": "Model Identity",
95
+ "elo_updated": "Scores have been updated!",
96
+ "generating": "🔄 Generating response...",
97
+ "categories": {
98
+ "random": "🎲 Random",
99
+ "storytelling": "📚 Storytelling",
100
+ "innovation": "💡 Innovation",
101
+ "business": "💼 Business"
102
+ },
103
+ "filter_categories": {
104
+ "overall": "Overall",
105
+ "storytelling": "Storytelling",
106
+ "innovation": "Innovation",
107
+ "business": "Business"
108
+ }
109
+ },
110
+ "ko": {
111
+ "title": "🎨 GPT-5 vs jetXA 창의성 배틀",
112
+ "subtitle": "최첨단 AI 모델들의 창의력 대결",
113
+ "battle_tab": "⚔️ 배틀 아레나",
114
+ "leaderboard_tab": "🏆 리더보드",
115
+ "category_label": "카테고리 선택",
116
+ "custom_prompt_label": "✏️ 커스텀 도전과제 (선택사항)",
117
+ "custom_prompt_placeholder": "모델들에게 도전할 창의적인 과제를 입력하세요...",
118
+ "new_battle_btn": "🎲 새로운 배틀 시작",
119
+ "model_a": "### 🅰️ 모델 A",
120
+ "model_b": "### 🅱️ 모델 B",
121
+ "vote_a": "🅰️ 모델 A가 더 창의적이다",
122
+ "vote_b": "🅱️ 모델 B가 더 창의적이다",
123
+ "vote_complete": "### 🎉 투표 완료!",
124
+ "winner": "승자",
125
+ "leaderboard_title": "## 🏆 GPT-5 vs jetXA 리더보드",
126
+ "category_filter": "카테고리 필터",
127
+ "refresh_btn": "🔄 새로고침",
128
+ "language_label": "언어",
129
+ "contact": "문의: arxivgpt@gmail.com",
130
+ "challenge_task": "### 📝 도전 과제",
131
+ "category": "카테고리",
132
+ "prompt": "도전과제",
133
+ "model_identity": "모델 정체",
134
+ "elo_updated": "점수가 업데이트되었습니다!",
135
+ "generating": "🔄 응답 생성 중...",
136
+ "categories": {
137
+ "random": "🎲 랜덤",
138
+ "storytelling": "📚 스토리텔링",
139
+ "innovation": "💡 혁신/발명",
140
+ "business": "💼 비즈니스"
141
+ },
142
+ "filter_categories": {
143
+ "overall": "전체",
144
+ "storytelling": "스토리텔링",
145
+ "innovation": "혁신/발명",
146
+ "business": "비즈니스"
147
+ }
148
+ }
149
+ }
150
+
151
+ # ==================== Simplified Prompt Database ====================
152
+ PROMPTS = {
153
+ Category.STORYTELLING: {
154
+ "en": [
155
+ {"text": "Write a sci-fi movie proposal with a never-before-explored concept", "difficulty": "high"},
156
+ {"text": "Create a story where the protagonists never meet but fall deeply in love", "difficulty": "high"},
157
+ {"text": "Design a thriller where the twist is revealed in the first scene but still surprises at the end", "difficulty": "high"}
158
+ ],
159
+ "ko": [
160
+ {"text": "한 번도 다뤄지지 않은 소재로 SF 영화 기획안을 작성하세요", "difficulty": "high"},
161
+ {"text": "주인공들이 한 번도 만나지 않지만 깊은 사랑에 빠지는 스토리를 창작하세요", "difficulty": "high"},
162
+ {"text": "첫 장면에서 반전을 공개하지만 마지막에 여전히 충격적인 스릴러를 설계하세요", "difficulty": "high"}
163
+ ]
164
+ },
165
+ Category.INNOVATION: {
166
+ "en": [
167
+ {"text": "Present 5 innovative ideas that could revolutionize the bicycle", "difficulty": "high"},
168
+ {"text": "Propose 5 breakthrough innovations that could transform email communication", "difficulty": "high"},
169
+ {"text": "Design 5 inventions that could make elevators obsolete", "difficulty": "high"}
170
+ ],
171
+ "ko": [
172
+ {"text": "자전거를 혁신할 수 있는 획기적인 발명 아이디어를 5개 제시하세요", "difficulty": "high"},
173
+ {"text": "이메일 커뮤니케이션을 완전히 변화시킬 혁신 아이디어를 5개 제시하세요", "difficulty": "high"},
174
+ {"text": "엘리베이터를 대체할 수 있는 5가지 혁신적 발명을 설계하세요", "difficulty": "high"}
175
+ ]
176
+ },
177
+ Category.BUSINESS: {
178
+ "en": [
179
+ {"text": "Design a business model in robotics/drone sector that could become a unicorn startup", "difficulty": "high"},
180
+ {"text": "Create a one-person SaaS business that could scale to $1M ARR", "difficulty": "high"},
181
+ {"text": "Develop a subscription model that people would happily pay $1000/month for", "difficulty": "high"}
182
+ ],
183
+ "ko": [
184
+ {"text": "로봇/드론 분야에서 유니콘 기업이 될 수 있는 비즈니스 모델을 설계하세요", "difficulty": "high"},
185
+ {"text": "연 매출 10억원을 달성할 수 있는 1인 SaaS 창업 아이템을 기획하세요", "difficulty": "high"},
186
+ {"text": "사람들이 기꺼이 월 100만원을 지불할 만한 구독 비즈니스를 개발하세요", "difficulty": "high"}
187
+ ]
188
+ }
189
+ }
190
+
191
+ # ==================== Database Management ====================
192
+ class ArenaDatabase:
193
+ def __init__(self, db_path="gpt5_vs_jetxa.db", use_hf=True):
194
+ self.db_path = db_path
195
+ self.use_hf = use_hf and HfApi is not None
196
+ self.hf_token = os.getenv("HF_TOKEN")
197
+ self.hf_dataset_name = os.getenv("HF_DATASET_NAME", "gpt5_vs_jetxa_arena")
198
+ self.hf_username = None
199
+
200
+ if self.use_hf and self.hf_token:
201
+ try:
202
+ login(token=self.hf_token)
203
+ self.api = HfApi()
204
+ user_info = self.api.whoami()
205
+ self.hf_username = user_info["name"]
206
+ self.hf_repo_id = f"{self.hf_username}/{self.hf_dataset_name}"
207
+
208
+ # Create or access the dataset repository
209
+ self._init_hf_dataset()
210
+ print(f"✅ Connected to Hugging Face Dataset: {self.hf_repo_id}")
211
+
212
+ # ⭐ CRITICAL: Try to restore from HF FIRST
213
+ if self._restore_from_hf():
214
+ print("✅ Successfully restored data from Hugging Face Dataset")
215
+ return # ⭐ EXIT HERE if data exists - DO NOT initialize new database
216
+ else:
217
+ print("📝 No existing data in HF Dataset, will create new database")
218
+
219
+ except Exception as e:
220
+ print(f"❌ Failed to connect to Hugging Face: {e}")
221
+ self.use_hf = False
222
+
223
+ # ⭐ ONLY initialize new database if HF restore failed or HF not available
224
+ print("📝 Initializing new local database")
225
+ self.init_database()
226
+
227
+ def _init_hf_dataset(self):
228
+ """Initialize Hugging Face dataset repository"""
229
+ try:
230
+ # Try to create the repository (it will fail if it already exists)
231
+ create_repo(
232
+ repo_id=self.hf_repo_id,
233
+ repo_type="dataset",
234
+ private=True,
235
+ exist_ok=True
236
+ )
237
+ print(f"✅ HF Dataset repository ready: {self.hf_repo_id}")
238
+ except Exception as e:
239
+ print(f"Dataset repo creation note: {e}")
240
+
241
+ def _restore_from_hf(self):
242
+ """⭐ NEW METHOD: Restore complete database from HF - returns True if successful"""
243
+ try:
244
+ print("🔄 Attempting to restore data from Hugging Face...")
245
+
246
+ # Try to load battles data
247
+ try:
248
+ dataset = load_dataset(self.hf_repo_id, split="train", token=self.hf_token)
249
+ except Exception as e:
250
+ print(f"No existing battles data found: {e}")
251
+ return False
252
+
253
+ if not dataset or len(dataset) == 0:
254
+ print("Dataset exists but is empty")
255
+ return False
256
+
257
+ print(f"Found {len(dataset)} battles in HF Dataset")
258
+
259
+ # Create fresh local database with data from HF
260
+ conn = sqlite3.connect(self.db_path)
261
+ cursor = conn.cursor()
262
+
263
+ # Create tables
264
+ cursor.execute('''
265
+ CREATE TABLE IF NOT EXISTS battles (
266
+ id TEXT PRIMARY KEY,
267
+ prompt_id TEXT,
268
+ prompt_text TEXT,
269
+ category TEXT,
270
+ model_a TEXT,
271
+ model_b TEXT,
272
+ response_a TEXT,
273
+ response_b TEXT,
274
+ winner TEXT,
275
+ voter_id TEXT,
276
+ timestamp DATETIME,
277
+ custom_prompt INTEGER DEFAULT 0,
278
+ language TEXT DEFAULT 'en'
279
+ )
280
+ ''')
281
+
282
+ cursor.execute('''
283
+ CREATE TABLE IF NOT EXISTS model_stats (
284
+ model_name TEXT PRIMARY KEY,
285
+ overall_score REAL DEFAULT 5.0,
286
+ storytelling_score REAL DEFAULT 5.0,
287
+ innovation_score REAL DEFAULT 5.0,
288
+ business_score REAL DEFAULT 5.0,
289
+ total_battles INTEGER DEFAULT 0,
290
+ wins INTEGER DEFAULT 0,
291
+ losses INTEGER DEFAULT 0,
292
+ elo_rating INTEGER DEFAULT 1500
293
+ )
294
+ ''')
295
+
296
+ # Restore battles data
297
+ battles_df = dataset.to_pandas()
298
+ battles_df.to_sql('battles', conn, if_exists='replace', index=False)
299
+ print(f"✅ Restored {len(battles_df)} battles")
300
+
301
+ # Try to restore model stats
302
+ stats_restored = False
303
+ try:
304
+ stats_dataset = load_dataset(self.hf_repo_id, split="stats", token=self.hf_token)
305
+ if stats_dataset and len(stats_dataset) > 0:
306
+ stats_df = stats_dataset.to_pandas()
307
+ stats_df.to_sql('model_stats', conn, if_exists='replace', index=False)
308
+ print(f"✅ Restored model stats")
309
+ stats_restored = True
310
+ except Exception as e:
311
+ print(f"Could not restore stats: {e}")
312
+
313
+ # If stats not restored, recalculate from battles
314
+ if not stats_restored:
315
+ print("📊 Recalculating stats from battle history...")
316
+ self._recalculate_stats_from_battles(cursor)
317
+
318
+ conn.commit()
319
+ conn.close()
320
+
321
+ return True # Successfully restored
322
+
323
+ except Exception as e:
324
+ print(f"Failed to restore from HF: {e}")
325
+ return False
326
+
327
+ def _recalculate_stats_from_battles(self, cursor):
328
+ """Recalculate model stats from battle history"""
329
+ # Initialize models
330
+ for model in ["GPT-5", "jetXA"]:
331
+ cursor.execute('''
332
+ INSERT OR REPLACE INTO model_stats
333
+ (model_name, overall_score, storytelling_score, innovation_score,
334
+ business_score, total_battles, wins, losses, elo_rating)
335
+ VALUES (?, 5.0, 5.0, 5.0, 5.0, 0, 0, 0, 1500)
336
+ ''', (model,))
337
+
338
+ # Get all battles with winners
339
+ cursor.execute('''
340
+ SELECT model_a, model_b, winner, category FROM battles WHERE winner IS NOT NULL
341
+ ''')
342
+
343
+ battles = cursor.fetchall()
344
+
345
+ # Process each battle
346
+ for model_a, model_b, winner, category in battles:
347
+ # Update win/loss counts
348
+ if winner == model_a:
349
+ cursor.execute('UPDATE model_stats SET wins = wins + 1, total_battles = total_battles + 1 WHERE model_name = ?', (model_a,))
350
+ cursor.execute('UPDATE model_stats SET losses = losses + 1, total_battles = total_battles + 1 WHERE model_name = ?', (model_b,))
351
+
352
+ # Update category scores
353
+ self._update_category_scores(cursor, model_a, Category(category), True)
354
+ self._update_category_scores(cursor, model_b, Category(category), False)
355
+ else:
356
+ cursor.execute('UPDATE model_stats SET wins = wins + 1, total_battles = total_battles + 1 WHERE model_name = ?', (model_b,))
357
+ cursor.execute('UPDATE model_stats SET losses = losses + 1, total_battles = total_battles + 1 WHERE model_name = ?', (model_a,))
358
+
359
+ # Update category scores
360
+ self._update_category_scores(cursor, model_b, Category(category), True)
361
+ self._update_category_scores(cursor, model_a, Category(category), False)
362
+
363
+ # Recalculate ELO ratings
364
+ self._recalculate_elo_from_battles(cursor)
365
+
366
+ print(f"✅ Recalculated stats from {len(battles)} battles")
367
+
368
+ def _recalculate_elo_from_battles(self, cursor):
369
+ """Recalculate ELO ratings from battle history"""
370
+ # Reset ELO to 1500
371
+ cursor.execute('UPDATE model_stats SET elo_rating = 1500')
372
+
373
+ # Get battles in chronological order
374
+ cursor.execute('''
375
+ SELECT model_a, model_b, winner FROM battles
376
+ WHERE winner IS NOT NULL
377
+ ORDER BY timestamp
378
+ ''')
379
+
380
+ battles = cursor.fetchall()
381
+
382
+ for model_a, model_b, winner in battles:
383
+ # Get current ELO ratings
384
+ cursor.execute('SELECT elo_rating FROM model_stats WHERE model_name = ?', (model_a,))
385
+ elo_a = cursor.fetchone()[0]
386
+
387
+ cursor.execute('SELECT elo_rating FROM model_stats WHERE model_name = ?', (model_b,))
388
+ elo_b = cursor.fetchone()[0]
389
+
390
+ # Calculate new ELO
391
+ K = 32
392
+ if winner == model_a:
393
+ expected_a = 1 / (1 + 10**((elo_b - elo_a) / 400))
394
+ new_elo_a = int(elo_a + K * (1 - expected_a))
395
+ new_elo_b = int(elo_b + K * (0 - (1 - expected_a)))
396
+ else:
397
+ expected_b = 1 / (1 + 10**((elo_a - elo_b) / 400))
398
+ new_elo_a = int(elo_a + K * (0 - (1 - expected_b)))
399
+ new_elo_b = int(elo_b + K * (1 - expected_b))
400
+
401
+ cursor.execute('UPDATE model_stats SET elo_rating = ? WHERE model_name = ?', (new_elo_a, model_a))
402
+ cursor.execute('UPDATE model_stats SET elo_rating = ? WHERE model_name = ?', (new_elo_b, model_b))
403
+
404
+ def _sync_from_hf(self):
405
+ """⚠️ DEPRECATED - Use _restore_from_hf instead"""
406
+ print("⚠️ Warning: _sync_from_hf is deprecated, use _restore_from_hf")
407
+ pass
408
+
409
+ def _sync_to_hf(self):
410
+ """Sync local database to Hugging Face"""
411
+ if not self.use_hf:
412
+ return
413
+
414
+ try:
415
+ conn = sqlite3.connect(self.db_path)
416
+
417
+ # Export battles
418
+ battles_df = pd.read_sql_query("SELECT * FROM battles", conn)
419
+ if len(battles_df) > 0:
420
+ battles_dataset = Dataset.from_pandas(battles_df)
421
+ battles_dataset.push_to_hub(
422
+ self.hf_repo_id,
423
+ split="train",
424
+ token=self.hf_token,
425
+ private=True
426
+ )
427
+ print(f"✅ Pushed {len(battles_df)} battles to HF")
428
+
429
+ # Export model stats
430
+ stats_df = pd.read_sql_query("SELECT * FROM model_stats", conn)
431
+ if len(stats_df) > 0:
432
+ stats_dataset = Dataset.from_pandas(stats_df)
433
+ stats_dataset.push_to_hub(
434
+ self.hf_repo_id,
435
+ split="stats",
436
+ token=self.hf_token,
437
+ private=True
438
+ )
439
+ print(f"✅ Pushed model stats to HF")
440
+
441
+ conn.close()
442
+
443
+ except Exception as e:
444
+ print(f"⚠️ Warning: Could not sync to HF: {e}")
445
+
446
+ def init_database(self):
447
+ """Initialize SQLite database - ONLY called when no existing data"""
448
+ conn = sqlite3.connect(self.db_path)
449
+ cursor = conn.cursor()
450
+
451
+ cursor.execute('''
452
+ CREATE TABLE IF NOT EXISTS battles (
453
+ id TEXT PRIMARY KEY,
454
+ prompt_id TEXT,
455
+ prompt_text TEXT,
456
+ category TEXT,
457
+ model_a TEXT,
458
+ model_b TEXT,
459
+ response_a TEXT,
460
+ response_b TEXT,
461
+ winner TEXT,
462
+ voter_id TEXT,
463
+ timestamp DATETIME,
464
+ custom_prompt INTEGER DEFAULT 0,
465
+ language TEXT DEFAULT 'en'
466
+ )
467
+ ''')
468
+
469
+ cursor.execute('''
470
+ CREATE TABLE IF NOT EXISTS model_stats (
471
+ model_name TEXT PRIMARY KEY,
472
+ overall_score REAL DEFAULT 5.0,
473
+ storytelling_score REAL DEFAULT 5.0,
474
+ innovation_score REAL DEFAULT 5.0,
475
+ business_score REAL DEFAULT 5.0,
476
+ total_battles INTEGER DEFAULT 0,
477
+ wins INTEGER DEFAULT 0,
478
+ losses INTEGER DEFAULT 0,
479
+ elo_rating INTEGER DEFAULT 1500
480
+ )
481
+ ''')
482
+
483
+ conn.commit()
484
+ conn.close()
485
+
486
+ self._init_models()
487
+
488
+ def _init_models(self):
489
+ """Initialize only GPT-5 and jetXA models"""
490
+ models = ["GPT-5", "jetXA"]
491
+
492
+ conn = sqlite3.connect(self.db_path)
493
+ cursor = conn.cursor()
494
+
495
+ for model in models:
496
+ cursor.execute('''
497
+ INSERT OR IGNORE INTO model_stats (model_name) VALUES (?)
498
+ ''', (model,))
499
+
500
+ conn.commit()
501
+ conn.close()
502
+
503
+ def save_battle(self, battle: Battle):
504
+ """Save battle result"""
505
+ conn = sqlite3.connect(self.db_path)
506
+ cursor = conn.cursor()
507
+
508
+ cursor.execute('''
509
+ INSERT INTO battles VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
510
+ ''', (
511
+ battle.id,
512
+ battle.prompt_id,
513
+ battle.prompt_text,
514
+ battle.category.value,
515
+ battle.model_a,
516
+ battle.model_b,
517
+ battle.response_a,
518
+ battle.response_b,
519
+ battle.winner,
520
+ battle.voter_id,
521
+ battle.timestamp.isoformat(),
522
+ 1 if battle.custom_prompt else 0,
523
+ battle.language
524
+ ))
525
+
526
+ if battle.winner:
527
+ winner = battle.winner
528
+ loser = battle.model_b if winner == battle.model_a else battle.model_a
529
+
530
+ # Update battle counts
531
+ cursor.execute('''
532
+ UPDATE model_stats
533
+ SET total_battles = total_battles + 1, wins = wins + 1
534
+ WHERE model_name = ?
535
+ ''', (winner,))
536
+
537
+ cursor.execute('''
538
+ UPDATE model_stats
539
+ SET total_battles = total_battles + 1, losses = losses + 1
540
+ WHERE model_name = ?
541
+ ''', (loser,))
542
+
543
+ # Update category scores
544
+ self._update_category_scores(cursor, winner, battle.category, True)
545
+ self._update_category_scores(cursor, loser, battle.category, False)
546
+
547
+ # Update ELO
548
+ self._update_elo_ratings(cursor, winner, loser)
549
+
550
+ conn.commit()
551
+ conn.close()
552
+
553
+ # Sync to Hugging Face after saving
554
+ self._sync_to_hf()
555
+
556
+ def _update_category_scores(self, cursor, model, category, is_winner):
557
+ """Update category-specific scores"""
558
+ column_map = {
559
+ Category.STORYTELLING: "storytelling_score",
560
+ Category.INNOVATION: "innovation_score",
561
+ Category.BUSINESS: "business_score"
562
+ }
563
+
564
+ score_column = column_map.get(category, "overall_score")
565
+
566
+ cursor.execute(f'SELECT {score_column} FROM model_stats WHERE model_name = ?', (model,))
567
+ result = cursor.fetchone()
568
+
569
+ if result:
570
+ current_score = result[0]
571
+ else:
572
+ current_score = 5.0
573
+
574
+ if is_winner:
575
+ new_score = min(10, current_score + 0.2)
576
+ else:
577
+ new_score = max(0, current_score - 0.1)
578
+
579
+ cursor.execute(f'UPDATE model_stats SET {score_column} = ? WHERE model_name = ?',
580
+ (new_score, model))
581
+
582
+ # Update overall score
583
+ cursor.execute('''
584
+ UPDATE model_stats
585
+ SET overall_score = (storytelling_score + innovation_score + business_score) / 3.0
586
+ WHERE model_name = ?
587
+ ''', (model,))
588
+
589
+ def _update_elo_ratings(self, cursor, winner, loser):
590
+ """Update ELO ratings"""
591
+ K = 32
592
+
593
+ cursor.execute('SELECT elo_rating FROM model_stats WHERE model_name = ?', (winner,))
594
+ winner_elo = cursor.fetchone()[0]
595
+
596
+ cursor.execute('SELECT elo_rating FROM model_stats WHERE model_name = ?', (loser,))
597
+ loser_elo = cursor.fetchone()[0]
598
+
599
+ expected_winner = 1 / (1 + 10**((loser_elo - winner_elo) / 400))
600
+ expected_loser = 1 / (1 + 10**((winner_elo - loser_elo) / 400))
601
+
602
+ new_winner_elo = int(winner_elo + K * (1 - expected_winner))
603
+ new_loser_elo = int(loser_elo + K * (0 - expected_loser))
604
+
605
+ cursor.execute('UPDATE model_stats SET elo_rating = ? WHERE model_name = ?',
606
+ (new_winner_elo, winner))
607
+ cursor.execute('UPDATE model_stats SET elo_rating = ? WHERE model_name = ?',
608
+ (new_loser_elo, loser))
609
+
610
+ def get_leaderboard(self, category: Optional[Category] = None) -> pd.DataFrame:
611
+ """Get leaderboard data"""
612
+ conn = sqlite3.connect(self.db_path)
613
+
614
+ if category:
615
+ column_map = {
616
+ Category.STORYTELLING: "storytelling_score",
617
+ Category.INNOVATION: "innovation_score",
618
+ Category.BUSINESS: "business_score"
619
+ }
620
+ sort_column = column_map.get(category, "overall_score")
621
+ else:
622
+ sort_column = "overall_score"
623
+
624
+ query = f'''
625
+ SELECT
626
+ model_name,
627
+ ROUND(overall_score, 1) as overall_score,
628
+ ROUND(storytelling_score, 1) as storytelling_score,
629
+ ROUND(innovation_score, 1) as innovation_score,
630
+ ROUND(business_score, 1) as business_score,
631
+ total_battles,
632
+ wins,
633
+ CASE
634
+ WHEN total_battles > 0
635
+ THEN ROUND(100.0 * wins / total_battles, 1)
636
+ ELSE 0
637
+ END as win_rate,
638
+ elo_rating
639
+ FROM model_stats
640
+ ORDER BY {sort_column} DESC, elo_rating DESC
641
+ '''
642
+
643
+ df = pd.read_sql_query(query, conn)
644
+ conn.close()
645
+
646
+ df.insert(0, 'rank', range(1, len(df) + 1))
647
+ return df
648
+
649
+ # ==================== Fixed LLM Interface with Proper Streaming ====================
650
+ class LLMInterface:
651
+ """Interface for GPT-5 and jetXA models with fixed streaming"""
652
+
653
+ def __init__(self):
654
+ self.models = ["GPT-5", "jetXA"]
655
+ self.response_cache = {}
656
+ self.cache_enabled = False # Disable caching by default
657
+
658
+ # Initialize OpenAI client for GPT-5
659
+ self.openai_client = None
660
+ openai_key = os.getenv("OPENAI_API_KEY")
661
+ if openai_key and OpenAI:
662
+ try:
663
+ self.openai_client = OpenAI(api_key=openai_key)
664
+ print("✅ GPT-5 client initialized")
665
+ except Exception as e:
666
+ print(f"❌ GPT-5 initialization failed: {e}")
667
+ else:
668
+ print("⚠️ GPT-5: No API key or OpenAI library not installed")
669
+
670
+ # Initialize Gradio client for jetXA
671
+ self.gradio_client = None
672
+ jetxa_space = os.getenv("jetXA_API", "aiqtech/tests")
673
+ hf_token = os.getenv("HF_TOKEN")
674
+
675
+ if GradioClient:
676
+ connection_attempts = [
677
+ lambda: GradioClient(jetxa_space, hf_token=hf_token) if hf_token else GradioClient(jetxa_space),
678
+ lambda: GradioClient(f"https://huggingface.co/spaces/{jetxa_space}"),
679
+ lambda: GradioClient(f"https://{jetxa_space.replace('/', '-')}.hf.space"),
680
+ lambda: GradioClient(src=jetxa_space),
681
+ lambda: GradioClient("aiqtech/tests")
682
+ ]
683
+
684
+ for i, attempt in enumerate(connection_attempts, 1):
685
+ try:
686
+ self.gradio_client = attempt()
687
+ if hasattr(self.gradio_client, 'view_api'):
688
+ api_info = self.gradio_client.view_api()
689
+ print(f"✅ jetXA client initialized successfully using method {i}!")
690
+ break
691
+ except Exception as e:
692
+ if i == len(connection_attempts):
693
+ print(f"⚠️ jetXA: All connection attempts failed. Last error: {e}")
694
+ print("Will use fallback responses for jetXA")
695
+ else:
696
+ continue
697
+ else:
698
+ print("⚠️ jetXA: Gradio client not installed")
699
+
700
+ def clear_cache(self):
701
+ """Clear all cached responses"""
702
+ self.response_cache = {}
703
+ print("✅ Cache cleared")
704
+
705
+ def generate_response_stream(self, model: str, prompt: str, language: str = "en") -> Generator[str, None, None]:
706
+ """Generate streaming response with proper accumulation"""
707
+
708
+ # Add language and creativity instructions
709
+ if language == "ko":
710
+ instruction = "창의적이고 혁신적인 한국어 답변을 작성해주세요. 독창적이고 상세한 아이디어를 제시하세요."
711
+ else:
712
+ instruction = "Provide a highly creative and innovative response. Be original and detailed."
713
+
714
+ full_prompt = f"{instruction}\n\n{prompt}"
715
+
716
+ try:
717
+ if model == "GPT-5":
718
+ # Stream GPT-5 with proper accumulation
719
+ accumulated = ""
720
+ for chunk in self._stream_gpt5(full_prompt):
721
+ accumulated += chunk
722
+ yield accumulated # Always yield the accumulated text
723
+
724
+ elif model == "jetXA":
725
+ # Get full response and simulate streaming
726
+ full_response = self._get_jetxa_response(full_prompt)
727
+
728
+ if full_response:
729
+ # Format jetXA response with proper spacing
730
+ formatted_response = self._format_jetxa_response(full_response)
731
+
732
+ # Simulate streaming word by word for jetXA for smoother effect
733
+ words = formatted_response.split()
734
+ accumulated = ""
735
+
736
+ # Stream words in small batches for natural effect
737
+ batch_size = 2 # Stream 2 words at a time
738
+ for i in range(0, len(words), batch_size):
739
+ batch = words[i:i+batch_size]
740
+ for word in batch:
741
+ if accumulated:
742
+ accumulated += " "
743
+ accumulated += word
744
+ yield accumulated # Yield accumulated text after each batch
745
+ time.sleep(0.03) # Small delay between batches
746
+ else:
747
+ # Use fallback if jetXA fails
748
+ fallback = self._generate_fallback(model, prompt, language)
749
+ # Stream fallback with accumulation
750
+ words = fallback.split()
751
+ accumulated = ""
752
+ for word in words:
753
+ if accumulated:
754
+ accumulated += " "
755
+ accumulated += word
756
+ yield accumulated
757
+ time.sleep(0.02)
758
+ else:
759
+ # Unknown model - use fallback
760
+ fallback = self._generate_fallback(model, prompt, language)
761
+ # Stream fallback with accumulation
762
+ words = fallback.split()
763
+ accumulated = ""
764
+ for word in words:
765
+ if accumulated:
766
+ accumulated += " "
767
+ accumulated += word
768
+ yield accumulated
769
+ time.sleep(0.02)
770
+
771
+ except Exception as e:
772
+ print(f"Error streaming {model}: {e}")
773
+ fallback = self._generate_fallback(model, prompt, language)
774
+ yield fallback
775
+
776
+ def _format_jetxa_response(self, text: str) -> str:
777
+ """Format jetXA response with proper spacing and line breaks for better readability"""
778
+ # Clean up the response first
779
+ text = self._clean_markdown_response(text)
780
+
781
+ # Split into lines
782
+ lines = text.split('\n')
783
+ formatted_lines = []
784
+
785
+ for i, line in enumerate(lines):
786
+ line = line.strip()
787
+
788
+ if not line:
789
+ # Keep empty lines for spacing
790
+ formatted_lines.append('')
791
+ continue
792
+
793
+ # Add extra spacing around headers
794
+ if line.startswith('#'):
795
+ # Add double blank line before headers (except first line)
796
+ if i > 0 and formatted_lines and formatted_lines[-1].strip():
797
+ formatted_lines.append('')
798
+ formatted_lines.append('')
799
+ formatted_lines.append(line)
800
+ # Add blank line after major headers
801
+ if line.startswith('# ') or line.startswith('## '):
802
+ formatted_lines.append('')
803
+ # Add spacing around lists
804
+ elif line.startswith('- ') or line.startswith('* ') or re.match(r'^\d+\. ', line):
805
+ # Add blank line before first list item
806
+ if i > 0 and formatted_lines and formatted_lines[-1].strip() and not (
807
+ formatted_lines[-1].startswith('- ') or
808
+ formatted_lines[-1].startswith('* ') or
809
+ re.match(r'^\d+\. ', formatted_lines[-1])
810
+ ):
811
+ formatted_lines.append('')
812
+ formatted_lines.append(line)
813
+ # Add spacing around blockquotes
814
+ elif line.startswith('>'):
815
+ if i > 0 and formatted_lines and formatted_lines[-1].strip():
816
+ formatted_lines.append('')
817
+ formatted_lines.append('')
818
+ formatted_lines.append(line)
819
+ # Add double blank line after blockquote if next line is not a blockquote
820
+ if i < len(lines) - 1 and not lines[i + 1].strip().startswith('>'):
821
+ formatted_lines.append('')
822
+ formatted_lines.append('')
823
+ # Add spacing around horizontal rules
824
+ elif line == '---' or line == '***' or line == '___':
825
+ if i > 0 and formatted_lines and formatted_lines[-1].strip():
826
+ formatted_lines.append('')
827
+ formatted_lines.append(line)
828
+ formatted_lines.append('')
829
+ # Add spacing around tables
830
+ elif '|' in line:
831
+ # First table line - add spacing before
832
+ if i > 0 and '|' not in (formatted_lines[-1] if formatted_lines else ''):
833
+ formatted_lines.append('')
834
+ formatted_lines.append(line)
835
+ # Last table line - add spacing after
836
+ if i < len(lines) - 1 and '|' not in lines[i + 1]:
837
+ formatted_lines.append('')
838
+ # Regular paragraphs
839
+ else:
840
+ # Check if line ends with period(s) or other sentence endings
841
+ ends_with_sentence = (
842
+ line.endswith('.') or
843
+ line.endswith('!') or
844
+ line.endswith('?') or
845
+ line.endswith('."') or
846
+ line.endswith('!"') or
847
+ line.endswith('?"') or
848
+ line.endswith('.)') or
849
+ line.endswith('!)') or
850
+ line.endswith('?)') or
851
+ line.endswith('...') or
852
+ line.endswith(':**') or # For Korean style endings
853
+ line.endswith('다.') or
854
+ line.endswith('요.') or
855
+ line.endswith('니다.') or
856
+ line.endswith('습니다.')
857
+ )
858
+
859
+ # Add spacing between paragraphs
860
+ if (i > 0 and formatted_lines and formatted_lines[-1].strip() and
861
+ not formatted_lines[-1].startswith('#') and
862
+ not formatted_lines[-1].startswith('- ') and
863
+ not formatted_lines[-1].startswith('* ') and
864
+ not re.match(r'^\d+\. ', formatted_lines[-1]) and
865
+ '|' not in formatted_lines[-1]):
866
+ # If previous line ended with sentence ending, add paragraph break
867
+ prev_line = formatted_lines[-1].strip()
868
+ if prev_line and (
869
+ prev_line.endswith('.') or
870
+ prev_line.endswith('!') or
871
+ prev_line.endswith('?') or
872
+ prev_line.endswith('."') or
873
+ prev_line.endswith('!"') or
874
+ prev_line.endswith('?"') or
875
+ prev_line.endswith('.)') or
876
+ prev_line.endswith('!)') or
877
+ prev_line.endswith('?)') or
878
+ prev_line.endswith('...') or
879
+ prev_line.endswith('다.') or
880
+ prev_line.endswith('요.') or
881
+ prev_line.endswith('니다.') or
882
+ prev_line.endswith('습니다.')
883
+ ):
884
+ # Add double line break for paragraph separation
885
+ formatted_lines.append('')
886
+ formatted_lines.append('')
887
+ # Check if this seems to be a new paragraph (starts with capital or Korean)
888
+ elif line and (line[0].isupper() or ord(line[0]) > 0x3000):
889
+ formatted_lines.append('')
890
+
891
+ formatted_lines.append(line)
892
+
893
+ # If this line ends with a sentence and next line exists and is substantial
894
+ if ends_with_sentence and i < len(lines) - 1:
895
+ next_line = lines[i + 1].strip()
896
+ # Add spacing after sentence endings
897
+ if next_line and not next_line.startswith('#') and not next_line.startswith('-'):
898
+ # Don't add space if next line is very short (likely continuation)
899
+ if len(next_line) > 20:
900
+ formatted_lines.append('')
901
+
902
+ # Join with newlines
903
+ result = '\n'.join(formatted_lines)
904
+
905
+ # Additional formatting for better readability
906
+ # Split by sentence endings within paragraphs for very long lines
907
+ improved_lines = []
908
+ for line in result.split('\n'):
909
+ if len(line) > 200 and '.' in line: # Very long line with periods
910
+ # Split at sentence boundaries
911
+ sentences = re.split(r'(?<=[.!?])\s+', line)
912
+ for j, sentence in enumerate(sentences):
913
+ improved_lines.append(sentence)
914
+ # Add line break after each sentence in long paragraphs
915
+ if j < len(sentences) - 1 and len(sentence) > 50:
916
+ improved_lines.append('')
917
+ else:
918
+ improved_lines.append(line)
919
+
920
+ result = '\n'.join(improved_lines)
921
+
922
+ # Clean up excessive blank lines (max 2 consecutive)
923
+ while '\n\n\n\n' in result:
924
+ result = result.replace('\n\n\n\n', '\n\n')
925
+ while '\n\n\n' in result:
926
+ result = result.replace('\n\n\n', '\n\n')
927
+
928
+ # Ensure consistent spacing around major sections
929
+ # Replace single newlines after headers with double
930
+ result = re.sub(r'(^#{1,3} .+$)\n(?!\n)', r'\1\n\n', result, flags=re.MULTILINE)
931
+
932
+ return result.strip()
933
+
934
+ def _stream_gpt5(self, prompt: str) -> Generator[str, None, None]:
935
+ """Stream GPT-5 API response - returns chunks only (not accumulated)"""
936
+ if not self.openai_client:
937
+ fallback = self._generate_fallback("GPT-5", prompt, "en")
938
+ words = fallback.split()
939
+ for word in words:
940
+ yield word + " "
941
+ time.sleep(0.02)
942
+ return
943
+
944
+ try:
945
+ stream = self.openai_client.chat.completions.create(
946
+ model="gpt-4", # Use gpt-4 as fallback if gpt-5 not available
947
+ messages=[{"role": "user", "content": prompt}],
948
+ max_tokens=1500,
949
+ temperature=0.8,
950
+ stream=True
951
+ )
952
+
953
+ for chunk in stream:
954
+ if chunk.choices[0].delta.content is not None:
955
+ yield chunk.choices[0].delta.content # Yield only the chunk
956
+ except Exception as e:
957
+ print(f"GPT-5 streaming error: {e}")
958
+ fallback = self._generate_fallback("GPT-5", prompt, "en")
959
+ yield fallback
960
+
961
+ def _get_jetxa_response(self, prompt: str) -> str:
962
+ """Get complete response from jetXA"""
963
+ if not self.gradio_client:
964
+ return ""
965
+
966
+ try:
967
+ result = self.gradio_client.predict(
968
+ message=prompt,
969
+ history=[],
970
+ use_search=False,
971
+ show_agent_thoughts=False,
972
+ search_count=5,
973
+ api_name="/process_query_optimized"
974
+ )
975
+
976
+ response_text = ""
977
+
978
+ if result and isinstance(result, (tuple, list)) and len(result) >= 1:
979
+ chat_history = result[0]
980
+
981
+ if isinstance(chat_history, list) and len(chat_history) > 0:
982
+ for msg in reversed(chat_history):
983
+ if isinstance(msg, dict):
984
+ content = msg.get('content', '')
985
+ if content:
986
+ response_text = str(content)
987
+ break
988
+ elif isinstance(msg, (list, tuple)) and len(msg) >= 2:
989
+ if msg[1]:
990
+ response_text = str(msg[1])
991
+ break
992
+
993
+ if not response_text:
994
+ for i in range(1, min(3, len(result))):
995
+ if result[i] and isinstance(result[i], str) and result[i].strip():
996
+ response_text = result[i]
997
+ break
998
+
999
+ if response_text:
1000
+ # Clean up any potential formatting issues
1001
+ response_text = self._clean_markdown_response(response_text)
1002
+
1003
+ return response_text
1004
+
1005
+ except Exception as e:
1006
+ print(f"jetXA response error: {e}")
1007
+ return ""
1008
+
1009
+ def _clean_markdown_response(self, text: str) -> str:
1010
+ """Clean and fix common markdown formatting issues"""
1011
+ # Remove any duplicate markers or broken formatting
1012
+ text = text.replace('| ---', '|---') # Fix table separators
1013
+ text = text.replace('---\n---', '---') # Remove duplicate horizontal rules
1014
+
1015
+ # Ensure proper spacing around headers
1016
+ lines = text.split('\n')
1017
+ cleaned_lines = []
1018
+
1019
+ for i, line in enumerate(lines):
1020
+ # Fix header formatting
1021
+ if line.strip().startswith('#'):
1022
+ # Ensure space after # symbols
1023
+ if '#' in line and not line.startswith('# '):
1024
+ parts = line.split('#', 1)
1025
+ if len(parts) > 1:
1026
+ hash_count = len(line) - len(line.lstrip('#'))
1027
+ line = '#' * hash_count + ' ' + parts[-1].strip()
1028
+
1029
+ # Add blank line before headers (except first line)
1030
+ if i > 0 and cleaned_lines and cleaned_lines[-1].strip():
1031
+ cleaned_lines.append('')
1032
+
1033
+ # Fix table formatting
1034
+ if '|' in line:
1035
+ # Ensure proper table separator
1036
+ if all(c in ['-', '|', ' '] for c in line.strip()):
1037
+ line = line.replace(' ', '').replace('|-', '|---').replace('-|', '---|')
1038
+ if not line.startswith('|'):
1039
+ line = '|' + line
1040
+ if not line.endswith('|'):
1041
+ line = line + '|'
1042
+
1043
+ cleaned_lines.append(line)
1044
+
1045
+ return '\n'.join(cleaned_lines)
1046
+
1047
+ def _generate_fallback(self, model: str, prompt: str, language: str) -> str:
1048
+ """Generate high-quality fallback response with language support and proper markdown"""
1049
+
1050
+ # Determine category from prompt
1051
+ if any(word in prompt.lower() for word in ["story", "movie", "novel", "plot", "스토리", "영화", "소설"]):
1052
+ category = "story"
1053
+ elif any(word in prompt.lower() for word in ["innovate", "invent", "revolution", "혁신", "발명", "개발"]):
1054
+ category = "innovation"
1055
+ else:
1056
+ category = "business"
1057
+
1058
+ # Korean responses with better markdown formatting
1059
+ if language == "ko":
1060
+ responses = {
1061
+ "story": {
1062
+ "GPT-5": """# 양자 거울
1063
+
1064
+ ## 시놉시스
1065
+ 한 형사가 도시의 모든 거울이 실제로 **범죄가 예방된 다른 타임라인**으로 통하는 포털임을 발견한다.""",
1066
+ "jetXA": """# 감정 고고학
1067
+
1068
+ ## 기획 의도
1069
+ 2045년, 고고학자들은 유물을 발굴하지 않는다—그들은 **비극의 장소에 남겨진 압축된 인간 감정**을 발굴한다."""
1070
+ },
1071
+ "innovation": {
1072
+ "GPT-5": """# 🚲 자전거 혁신 5가지
1073
+
1074
+ ## 1. **중력 무시 바퀴** (Gravity Defiance Wheels)
1075
+ - **기술**: 전자기 림이 오르막길에서 무게를 거의 0으로 감소""",
1076
+ "jetXA": """# 📧 이메일 혁명 5가지
1077
+
1078
+ ## 1. **시간 메시징** (Temporal Messaging)
1079
+ ### 핵심 기능
1080
+ - ⏰ 과거/미래로 이메일 전송"""
1081
+ },
1082
+ "business": {
1083
+ "GPT-5": """# 🚁 NeuralNest - 10억달러 드론 심리 플랫폼
1084
+
1085
+ ## 사업 개요
1086
+
1087
+ ### 비전
1088
+ > **"위기 지역에서 실시간 정신 건강 지원을 제공하는 세계 최초 AI 드론 플랫폼"**""",
1089
+ "jetXA": """# 💾 MemoryBank - 월 100만원 구독 서비스
1090
+
1091
+ ## 서비스 개요
1092
+
1093
+ ### 핵심 가치
1094
+ > **"당신의 모든 기억을 영원히 보존하고 다시 경험하세요"**"""
1095
+ }
1096
+ }
1097
+ else:
1098
+ # English responses
1099
+ responses = {
1100
+ "story": {
1101
+ "GPT-5": """# The Quantum Mirror
1102
+
1103
+ ## Synopsis
1104
+ A detective discovers that every mirror in the city is actually a portal to **alternate timelines where crimes were prevented**.""",
1105
+ "jetXA": """# Emotional Archaeology
1106
+
1107
+ ## Concept
1108
+ In 2045, archaeologists don't dig for artifacts—they excavate **compressed human emotions left in places of tragedy**."""
1109
+ },
1110
+ "innovation": {
1111
+ "GPT-5": """# 🚲 5 Bicycle Innovations
1112
+
1113
+ ## 1. **Gravity Defiance Wheels**
1114
+ - **Tech**: Electromagnetic rims reduce weight to near-zero when pedaling uphill""",
1115
+ "jetXA": """# 📧 5 Email Revolutionaries
1116
+
1117
+ ## 1. **Temporal Messaging**
1118
+ ### Core Features
1119
+ - ⏰ Send emails to past/future"""
1120
+ },
1121
+ "business": {
1122
+ "GPT-5": """# 🚁 NeuralNest - $1B Drone Psychology Platform
1123
+
1124
+ ## Business Overview
1125
+
1126
+ ### Vision
1127
+ > **"World's first AI drone platform providing real-time mental health support in crisis zones"**""",
1128
+ "jetXA": """# 💾 MemoryBank - $1000/month Subscription
1129
+
1130
+ ## Service Overview
1131
+
1132
+ ### Core Value
1133
+ > **"Preserve and re-experience all your memories forever"**"""
1134
+ }
1135
+ }
1136
+
1137
+ return responses[category][model]
1138
+
1139
+ # ==================== Main Arena Class ====================
1140
+ class CreativityArena:
1141
+ def __init__(self):
1142
+ self.db = ArenaDatabase()
1143
+ self.llm = LLMInterface()
1144
+ self.current_battle = None
1145
+
1146
+ def get_random_prompt(self, category: Category, language: str = "en") -> dict:
1147
+ """Get random prompt from database"""
1148
+ prompts = PROMPTS[category].get(language, PROMPTS[category]["en"])
1149
+ return random.choice(prompts)
1150
+
1151
+ def start_new_battle_stream(self, category: str, custom_prompt: str = None, language: str = "en"):
1152
+ """Start new battle with streaming responses"""
1153
+
1154
+ # Select category
1155
+ if category == "random":
1156
+ category = random.choice(list(Category))
1157
+ else:
1158
+ category = Category(category)
1159
+
1160
+ # Get or set prompt
1161
+ if custom_prompt and custom_prompt.strip():
1162
+ prompt_text = custom_prompt.strip()
1163
+ is_custom = True
1164
+ else:
1165
+ prompt_data = self.get_random_prompt(category, language)
1166
+ prompt_text = prompt_data["text"]
1167
+ is_custom = False
1168
+
1169
+ # Randomly assign models to A/B positions
1170
+ models = ["GPT-5", "jetXA"]
1171
+ random.shuffle(models)
1172
+
1173
+ # Create battle structure
1174
+ battle = Battle(
1175
+ id=hashlib.md5(f"{datetime.now().isoformat()}-{random.randint(0,999999)}".encode()).hexdigest(),
1176
+ prompt_id=hashlib.md5(prompt_text.encode()).hexdigest(),
1177
+ prompt_text=prompt_text,
1178
+ model_a=models[0],
1179
+ model_b=models[1],
1180
+ response_a="",
1181
+ response_b="",
1182
+ winner=None,
1183
+ voter_id="",
1184
+ timestamp=datetime.now(),
1185
+ category=category,
1186
+ custom_prompt=is_custom,
1187
+ language=language
1188
+ )
1189
+
1190
+ self.current_battle = battle
1191
+
1192
+ return {
1193
+ "prompt": prompt_text,
1194
+ "category": category.value,
1195
+ "models": models,
1196
+ "battle": battle
1197
+ }
1198
+
1199
+ def vote(self, choice: str, voter_id: str = None):
1200
+ """Process vote"""
1201
+ if not self.current_battle:
1202
+ return {"error": "No active battle"}
1203
+
1204
+ self.current_battle.winner = self.current_battle.model_a if choice == "A" else self.current_battle.model_b
1205
+ self.current_battle.voter_id = voter_id or "anonymous"
1206
+
1207
+ self.db.save_battle(self.current_battle)
1208
+
1209
+ return {
1210
+ "model_a": self.current_battle.model_a,
1211
+ "model_b": self.current_battle.model_b,
1212
+ "winner": self.current_battle.winner
1213
+ }
1214
+
1215
+ def get_leaderboard(self, category: Optional[Category] = None):
1216
+ """Get leaderboard from database"""
1217
+ return self.db.get_leaderboard(category)
1218
+
1219
+ # ==================== Gradio Interface ====================
1220
+ def create_app():
1221
+ arena = CreativityArena()
1222
+
1223
+ # Updated CSS with pastel colors and proper markdown rendering
1224
+ css = """
1225
+ .gradio-container {
1226
+ background: linear-gradient(135deg, #f5e6ff 0%, #e6f3ff 50%, #ffeef5 100%);
1227
+ font-family: 'Inter', sans-serif;
1228
+ }
1229
+ .main-header {
1230
+ background: rgba(255, 255, 255, 0.98);
1231
+ border-radius: 20px;
1232
+ padding: 2rem;
1233
+ text-align: center;
1234
+ margin-bottom: 2rem;
1235
+ box-shadow: 0 4px 20px rgba(150, 100, 200, 0.15);
1236
+ border: 1px solid rgba(200, 180, 220, 0.3);
1237
+ }
1238
+ .response-container {
1239
+ background: rgba(255, 255, 255, 0.95);
1240
+ border-radius: 15px;
1241
+ padding: 1.5rem;
1242
+ min-height: 400px;
1243
+ max-height: 800px;
1244
+ overflow-y: auto;
1245
+ box-shadow: 0 3px 15px rgba(150, 100, 200, 0.1);
1246
+ transition: transform 0.3s ease;
1247
+ border: 1px solid rgba(200, 180, 220, 0.2);
1248
+ }
1249
+ .response-container:hover {
1250
+ transform: translateY(-3px);
1251
+ box-shadow: 0 6px 20px rgba(150, 100, 200, 0.2);
1252
+ }
1253
+
1254
+ /* Markdown specific styles */
1255
+ .markdown-text {
1256
+ line-height: 1.6;
1257
+ color: #2d3748;
1258
+ }
1259
+ .markdown-text h1 {
1260
+ font-size: 2.5em !important;
1261
+ font-weight: bold;
1262
+ color: #6b46c1;
1263
+ margin-top: 1em;
1264
+ margin-bottom: 0.5em;
1265
+ border-bottom: 2px solid #e9d8fd;
1266
+ padding-bottom: 0.3em;
1267
+ }
1268
+ .markdown-text h2 {
1269
+ font-size: 2em !important;
1270
+ font-weight: bold;
1271
+ color: #805ad5;
1272
+ margin-top: 0.8em;
1273
+ margin-bottom: 0.4em;
1274
+ }
1275
+ .markdown-text h3 {
1276
+ font-size: 1.5em !important;
1277
+ font-weight: bold;
1278
+ color: #9f7aea;
1279
+ margin-top: 0.6em;
1280
+ margin-bottom: 0.3em;
1281
+ }
1282
+ """
1283
+
1284
+ with gr.Blocks(title="GPT-5 vs jetXA Battle", theme=gr.themes.Soft(), css=css) as app:
1285
+ current_lang = gr.State(value="en")
1286
+
1287
+ # Language change handler
1288
+ def update_language(lang_value):
1289
+ return lang_value
1290
+
1291
+ def update_ui_text(lang):
1292
+ ui = UI_TEXT[lang]
1293
+ return (
1294
+ f"""
1295
+ <div class="main-header">
1296
+ <h1 style="color: #6b46c1; font-size: 2.5rem;">{ui['title']}</h1>
1297
+ <p style="color: #805ad5; font-size: 1.2rem;">{ui['subtitle']}</p>
1298
+ </div>
1299
+ """,
1300
+ ui['leaderboard_title'],
1301
+ gr.update(label=ui['category_label']),
1302
+ gr.update(label=ui['custom_prompt_label']),
1303
+ gr.update(placeholder=ui['custom_prompt_placeholder']),
1304
+ gr.update(value=ui['new_battle_btn']),
1305
+ ui['model_a'],
1306
+ ui['model_b'],
1307
+ gr.update(value=ui['vote_a']),
1308
+ gr.update(value=ui['vote_b']),
1309
+ gr.update(label=ui['category_filter']),
1310
+ gr.update(value=ui['refresh_btn']),
1311
+ gr.update(choices=[
1312
+ (ui['categories']['random'], "random"),
1313
+ (ui['categories']['storytelling'], "storytelling"),
1314
+ (ui['categories']['innovation'], "innovation"),
1315
+ (ui['categories']['business'], "business")
1316
+ ]),
1317
+ gr.update(choices=[
1318
+ (ui['filter_categories']['overall'], "overall"),
1319
+ (ui['filter_categories']['storytelling'], "storytelling"),
1320
+ (ui['filter_categories']['innovation'], "innovation"),
1321
+ (ui['filter_categories']['business'], "business")
1322
+ ])
1323
+ )
1324
+
1325
+ # Header
1326
+ with gr.Row():
1327
+ with gr.Column(scale=10):
1328
+ header_html = gr.HTML(f"""
1329
+ <div class="main-header">
1330
+ <h1 style="color: #6b46c1; font-size: 2.5rem;">🎨 GPT-5 vs jetXA Creativity Battle</h1>
1331
+ <p style="color: #805ad5; font-size: 1.2rem;">Test cutting-edge AI models in creative challenges</p>
1332
+ </div>
1333
+ """)
1334
+ with gr.Column(scale=1):
1335
+ language_select = gr.Dropdown(
1336
+ choices=[("English", "en"), ("한국어", "ko")],
1337
+ value="en",
1338
+ label="Language",
1339
+ interactive=True,
1340
+ elem_classes="category-select"
1341
+ )
1342
+
1343
+ with gr.Tabs(elem_classes="tab-nav") as tabs:
1344
+ # Battle Arena Tab
1345
+ with gr.TabItem("⚔️ Battle Arena", id="battle_tab") as battle_tab:
1346
+ with gr.Row():
1347
+ with gr.Column(scale=1):
1348
+ category_select = gr.Dropdown(
1349
+ choices=[
1350
+ ("🎲 Random", "random"),
1351
+ ("📚 Storytelling", "storytelling"),
1352
+ ("💡 Innovation", "innovation"),
1353
+ ("💼 Business", "business")
1354
+ ],
1355
+ value="random",
1356
+ label="Select Category",
1357
+ interactive=True,
1358
+ elem_classes="category-select"
1359
+ )
1360
+
1361
+ custom_prompt_accordion = gr.Accordion("✏️ Custom Challenge (Optional)", open=False)
1362
+ with custom_prompt_accordion:
1363
+ custom_prompt_input = gr.Textbox(
1364
+ label="",
1365
+ placeholder="Enter your creative challenge...",
1366
+ lines=3
1367
+ )
1368
+
1369
+ new_battle_btn = gr.Button(
1370
+ "🎲 Start New Battle",
1371
+ variant="primary",
1372
+ size="lg",
1373
+ elem_classes="vote-button"
1374
+ )
1375
+
1376
+ with gr.Column(scale=3):
1377
+ prompt_display = gr.Markdown("")
1378
+
1379
+ with gr.Row():
1380
+ with gr.Column():
1381
+ model_a_label = gr.Markdown("### 🅰️ Model A")
1382
+ response_a = gr.Markdown(
1383
+ "",
1384
+ elem_classes=["response-container", "markdown-text"],
1385
+ sanitize_html=False,
1386
+ line_breaks=True,
1387
+ latex_delimiters=[
1388
+ {"left": "$", "right": "$", "display": True},
1389
+ {"left": "$", "right": "$", "display": False}
1390
+ ]
1391
+ )
1392
+ model_a_reveal = gr.Textbox(label="Model Identity", visible=False)
1393
+
1394
+ with gr.Column():
1395
+ model_b_label = gr.Markdown("### 🅱️ Model B")
1396
+ response_b = gr.Markdown(
1397
+ "",
1398
+ elem_classes=["response-container", "markdown-text"],
1399
+ sanitize_html=False,
1400
+ line_breaks=True,
1401
+ latex_delimiters=[
1402
+ {"left": "$", "right": "$", "display": True},
1403
+ {"left": "$", "right": "$", "display": False}
1404
+ ]
1405
+ )
1406
+ model_b_reveal = gr.Textbox(label="Model Identity", visible=False)
1407
+
1408
+ with gr.Row():
1409
+ vote_a_btn = gr.Button("🅰️ Model A is more creative", size="lg", variant="primary", elem_classes="vote-button")
1410
+ vote_b_btn = gr.Button("🅱️ Model B is more creative", size="lg", variant="primary", elem_classes="vote-button")
1411
+
1412
+ vote_result = gr.Markdown("")
1413
+ battle_state = gr.State({})
1414
+
1415
+ # Leaderboard Tab
1416
+ with gr.TabItem("🏆 Leaderboard", id="leaderboard_tab") as leaderboard_tab:
1417
+ leaderboard_title = gr.Markdown("## 🏆 GPT-5 vs jetXA Leaderboard")
1418
+
1419
+ category_filter = gr.Radio(
1420
+ choices=[
1421
+ ("Overall", "overall"),
1422
+ ("Storytelling", "storytelling"),
1423
+ ("Innovation", "innovation"),
1424
+ ("Business", "business")
1425
+ ],
1426
+ value="overall",
1427
+ label="Category Filter",
1428
+ elem_classes="category-select"
1429
+ )
1430
+
1431
+ leaderboard_display = gr.Dataframe(
1432
+ headers=["Rank", "Model", "Overall", "Story", "Innovation", "Business", "Battles", "Win%", "ELO"],
1433
+ datatype=["number", "str", "number", "number", "number", "number", "number", "number", "number"]
1434
+ )
1435
+
1436
+ refresh_btn = gr.Button("🔄 Refresh", variant="secondary")
1437
+
1438
+ # Footer
1439
+ footer_html = gr.HTML("""
1440
+ <div class="footer">
1441
+ <p>Testing GPT-5 and jetXA in creative challenges | Contact: arxivgpt@gmail.com</p>
1442
+ </div>
1443
+ """)
1444
+
1445
+ # Event handlers with streaming support
1446
+ def start_battle_stream(category, custom_prompt, lang):
1447
+ # Clear cache for fresh responses if needed
1448
+ arena.llm.clear_cache()
1449
+
1450
+ battle_info = arena.start_new_battle_stream(category, custom_prompt, lang)
1451
+
1452
+ ui = UI_TEXT[lang]
1453
+ category_display = ui["categories"].get(battle_info['category'], battle_info['category'])
1454
+
1455
+ prompt_text = f"""
1456
+ {ui['challenge_task']}
1457
+
1458
+ **{ui['category']}**: {category_display}
1459
+
1460
+ **{ui['prompt']}**:
1461
+ > {battle_info['prompt']}
1462
+ """
1463
+
1464
+ # Initialize with loading state
1465
+ initial_response = ui['generating']
1466
+
1467
+ # Start streaming in separate threads
1468
+ response_a_queue = queue.Queue()
1469
+ response_b_queue = queue.Queue()
1470
+ response_a_final = ""
1471
+ response_b_final = ""
1472
+ done_a = False
1473
+ done_b = False
1474
+
1475
+ def stream_model_a():
1476
+ nonlocal response_a_final, done_a
1477
+ try:
1478
+ for chunk in arena.llm.generate_response_stream(
1479
+ battle_info['models'][0],
1480
+ battle_info['prompt'],
1481
+ lang
1482
+ ):
1483
+ # chunk is already accumulated text
1484
+ response_a_queue.put(('update', chunk)) # Add type marker
1485
+ response_a_final = chunk
1486
+ battle_info['battle'].response_a = response_a_final
1487
+ except Exception as e:
1488
+ print(f"Error in stream_model_a: {e}")
1489
+ response_a_final = arena.llm._generate_fallback(
1490
+ battle_info['models'][0],
1491
+ battle_info['prompt'],
1492
+ lang
1493
+ )
1494
+ response_a_queue.put(('update', response_a_final))
1495
+ battle_info['battle'].response_a = response_a_final
1496
+ finally:
1497
+ response_a_queue.put(('done', None)) # Signal completion
1498
+ done_a = True
1499
+
1500
+ def stream_model_b():
1501
+ nonlocal response_b_final, done_b
1502
+ try:
1503
+ for chunk in arena.llm.generate_response_stream(
1504
+ battle_info['models'][1],
1505
+ battle_info['prompt'],
1506
+ lang
1507
+ ):
1508
+ # chunk is already accumulated text
1509
+ response_b_queue.put(('update', chunk)) # Add type marker
1510
+ response_b_final = chunk
1511
+ battle_info['battle'].response_b = response_b_final
1512
+ except Exception as e:
1513
+ print(f"Error in stream_model_b: {e}")
1514
+ response_b_final = arena.llm._generate_fallback(
1515
+ battle_info['models'][1],
1516
+ battle_info['prompt'],
1517
+ lang
1518
+ )
1519
+ response_b_queue.put(('update', response_b_final))
1520
+ battle_info['battle'].response_b = response_b_final
1521
+ finally:
1522
+ response_b_queue.put(('done', None)) # Signal completion
1523
+ done_b = True
1524
+
1525
+ thread_a = threading.Thread(target=stream_model_a)
1526
+ thread_b = threading.Thread(target=stream_model_b)
1527
+
1528
+ thread_a.start()
1529
+ thread_b.start()
1530
+
1531
+ # Yield updates for both responses
1532
+ response_a_text = initial_response
1533
+ response_b_text = initial_response
1534
+ last_update_time = time.time()
1535
+ stream_a_done = False
1536
+ stream_b_done = False
1537
+
1538
+ while not (stream_a_done and stream_b_done):
1539
+ updated = False
1540
+ current_time = time.time()
1541
+
1542
+ # Process all updates from model A
1543
+ try:
1544
+ while True:
1545
+ msg_type, content = response_a_queue.get_nowait()
1546
+ if msg_type == 'done':
1547
+ stream_a_done = True
1548
+ elif msg_type == 'update':
1549
+ response_a_text = content
1550
+ updated = True
1551
+ except queue.Empty:
1552
+ pass
1553
+
1554
+ # Process all updates from model B
1555
+ try:
1556
+ while True:
1557
+ msg_type, content = response_b_queue.get_nowait()
1558
+ if msg_type == 'done':
1559
+ stream_b_done = True
1560
+ elif msg_type == 'update':
1561
+ response_b_text = content
1562
+ updated = True
1563
+ except queue.Empty:
1564
+ pass
1565
+
1566
+ # Always yield updates more frequently for better streaming effect
1567
+ if updated or (current_time - last_update_time) > 0.05: # Update every 50ms
1568
+ yield (
1569
+ prompt_text,
1570
+ response_a_text,
1571
+ response_b_text,
1572
+ gr.update(visible=False),
1573
+ gr.update(visible=False),
1574
+ "",
1575
+ battle_info
1576
+ )
1577
+ last_update_time = current_time
1578
+
1579
+ time.sleep(0.02) # Smaller sleep for more responsive updates
1580
+
1581
+ # Final update with complete responses
1582
+ yield (
1583
+ prompt_text,
1584
+ response_a_final if response_a_final else initial_response,
1585
+ response_b_final if response_b_final else initial_response,
1586
+ gr.update(visible=False),
1587
+ gr.update(visible=False),
1588
+ "",
1589
+ battle_info
1590
+ )
1591
+
1592
+ def process_vote(choice, state, lang):
1593
+ if not state or 'battle' not in state:
1594
+ return (
1595
+ gr.update(),
1596
+ gr.update(),
1597
+ "Error: No active battle"
1598
+ )
1599
+
1600
+ # Update the current battle from state
1601
+ arena.current_battle = state['battle']
1602
+
1603
+ result = arena.vote(choice)
1604
+ ui = UI_TEXT[lang]
1605
+
1606
+ winner_emoji = "🏆" if result['winner'] == result['model_a'] else "🥈"
1607
+ loser_emoji = "🥈" if winner_emoji == "🏆" else "🏆"
1608
+
1609
+ result_text = f"""
1610
+ {ui['vote_complete']}
1611
+
1612
+ **{ui['winner']}**: {winner_emoji} **{result['winner']}**
1613
+
1614
+ **Model A**: {result['model_a']} {winner_emoji if choice == "A" else loser_emoji}
1615
+ **Model B**: {result['model_b']} {winner_emoji if choice == "B" else loser_emoji}
1616
+
1617
+ {ui['elo_updated']}
1618
+ """
1619
+
1620
+ return (
1621
+ gr.update(value=result['model_a'], visible=True),
1622
+ gr.update(value=result['model_b'], visible=True),
1623
+ result_text
1624
+ )
1625
+
1626
+ def update_leaderboard(category):
1627
+ df = arena.get_leaderboard(
1628
+ Category(category) if category != "overall" else None
1629
+ )
1630
+ return df[['rank', 'model_name', 'overall_score', 'storytelling_score',
1631
+ 'innovation_score', 'business_score', 'total_battles', 'win_rate', 'elo_rating']]
1632
+
1633
+ # Update UI when language changes
1634
+ language_select.change(
1635
+ fn=update_language,
1636
+ inputs=[language_select],
1637
+ outputs=[current_lang]
1638
+ ).then(
1639
+ fn=update_ui_text,
1640
+ inputs=[current_lang],
1641
+ outputs=[
1642
+ header_html,
1643
+ leaderboard_title,
1644
+ category_select,
1645
+ custom_prompt_accordion,
1646
+ custom_prompt_input,
1647
+ new_battle_btn,
1648
+ model_a_label,
1649
+ model_b_label,
1650
+ vote_a_btn,
1651
+ vote_b_btn,
1652
+ category_filter,
1653
+ refresh_btn,
1654
+ category_select,
1655
+ category_filter
1656
+ ]
1657
+ )
1658
+
1659
+ # Connect events with streaming
1660
+ new_battle_btn.click(
1661
+ fn=start_battle_stream,
1662
+ inputs=[category_select, custom_prompt_input, current_lang],
1663
+ outputs=[prompt_display, response_a, response_b, model_a_reveal, model_b_reveal, vote_result, battle_state]
1664
+ )
1665
+
1666
+ vote_a_btn.click(
1667
+ fn=lambda s, l: process_vote("A", s, l),
1668
+ inputs=[battle_state, current_lang],
1669
+ outputs=[model_a_reveal, model_b_reveal, vote_result]
1670
+ )
1671
+
1672
+ vote_b_btn.click(
1673
+ fn=lambda s, l: process_vote("B", s, l),
1674
+ inputs=[battle_state, current_lang],
1675
+ outputs=[model_a_reveal, model_b_reveal, vote_result]
1676
+ )
1677
+
1678
+ category_filter.change(
1679
+ fn=update_leaderboard,
1680
+ inputs=[category_filter],
1681
+ outputs=[leaderboard_display]
1682
+ )
1683
+
1684
+ refresh_btn.click(
1685
+ fn=update_leaderboard,
1686
+ inputs=[category_filter],
1687
+ outputs=[leaderboard_display]
1688
+ )
1689
+
1690
+ # Initialize on load
1691
+ app.load(
1692
+ fn=lambda: update_leaderboard("overall"),
1693
+ outputs=[leaderboard_display]
1694
+ )
1695
+
1696
+ return app
1697
+
1698
+ # ==================== Main ====================
1699
+ if __name__ == "__main__":
1700
+ print("="*50)
1701
+ print("🚀 GPT-5 vs jetXA Creativity Battle Arena")
1702
+ print("="*50)
1703
+ print("\n📋 Environment Setup:")
1704
+ print("1. Set OPENAI_API_KEY for GPT-5")
1705
+ print("2. jetXA will use 'aiqtech/tests' by default")
1706
+ print("3. Set HF_TOKEN for persistent data storage (REQUIRED)")
1707
+ print("4. Optional: Set HF_DATASET_NAME (default: gpt5_vs_jetxa_arena)")
1708
+ print("\n⚠️ Without HF_TOKEN, data will be lost on server restart!")
1709
+ print("\n" + "="*50 + "\n")
1710
+
1711
+ # Check for HF_TOKEN
1712
+ if not os.getenv("HF_TOKEN"):
1713
+ print("⚠️ WARNING: HF_TOKEN not set - data will not persist!")
1714
+ print("Set it with: export HF_TOKEN='your_token_here'")
1715
+ print("")
1716
+
1717
+ app = create_app()
1718
+ app.launch()