Creative-Arena-Leaderboard

Running

App Files Files Community

openfree commited on Aug 22

Commit

08e7201

verified ·

1 Parent(s): 0c006f3

Update app-backup.py

Browse files

Files changed (1) hide show

app-backup.py +192 -67

app-backup.py CHANGED Viewed

@@ -418,31 +418,61 @@ class ArenaDatabase:
             cursor.execute('UPDATE model_stats SET elo_rating = ? WHERE model_name = ?', (new_elo_b, model_b))
     def _sync_to_hf(self):
-        """Sync local database to Hugging Face - battles와 stats를 분리하지 않고 battles만 저장"""
         if not self.use_hf:
             return
         try:
             conn = sqlite3.connect(self.db_path)
-            # Export battles only - stats는 battles에서 재계산 가능
             battles_df = pd.read_sql_query("SELECT * FROM battles", conn)
             if len(battles_df) > 0:
                 battles_dataset = Dataset.from_pandas(battles_df)
-                battles_dataset.push_to_hub(
-                    self.hf_repo_id,
-                    split="train",
-                    token=self.hf_token,
-                    private=True
-                )
-                print(f"✅ Pushed {len(battles_df)} battles to HF")
             conn.close()
         except Exception as e:
-            print(f"⚠️ Warning: Could not sync to HF: {e}")
     def init_database(self):
         """Initialize SQLite database - ONLY called when no existing data"""
@@ -502,11 +532,21 @@ class ArenaDatabase:
         conn.close()
     def save_battle(self, battle: Battle):
-        """Save battle result - 중복 방지 추가"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         try:
             cursor.execute('''
                 INSERT OR REPLACE INTO battles VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
             ''', (
@@ -529,21 +569,23 @@ class ArenaDatabase:
                 winner = battle.winner
                 loser = battle.model_b if winner == battle.model_a else battle.model_a
-                # 이전 투표가 있었는지 확인
-                cursor.execute('SELECT winner FROM battles WHERE id = ?', (battle.id,))
-                previous = cursor.fetchone()
-                if not previous or previous[0] != battle.winner:
-                    # 새 투표이거나 변경된 경우만 업데이트
                     cursor.execute('''
                         UPDATE model_stats
-                        SET total_battles = total_battles + 1, wins = wins + 1
                         WHERE model_name = ?
                     ''', (winner,))
                     cursor.execute('''
                         UPDATE model_stats
-                        SET total_battles = total_battles + 1, losses = losses + 1
                         WHERE model_name = ?
                     ''', (loser,))
@@ -551,20 +593,22 @@ class ArenaDatabase:
                     self._update_category_scores(cursor, winner, battle.category, True)
                     self._update_category_scores(cursor, loser, battle.category, False)
-                    # Update ELO
                     self._update_elo_ratings(cursor, winner, loser)
             conn.commit()
         except Exception as e:
-            print(f"Error saving battle: {e}")
             conn.rollback()
         finally:
             conn.close()
         # Sync to Hugging Face after saving
         self._sync_to_hf()
     def _update_category_scores(self, cursor, model, category, is_winner):
         """Update category-specific scores"""
@@ -658,6 +702,41 @@ class ArenaDatabase:
         df.insert(0, 'rank', range(1, len(df) + 1))
         return df
 # ==================== Fixed LLM Interface with 4 Models ====================
 class LLMInterface:
@@ -851,7 +930,7 @@ class LLMInterface:
                 top_k=40,
             )
-        # 전체 응답을 수집
             full_response = ""
             for chunk in self.gemini_client.models.generate_content_stream(
@@ -874,8 +953,6 @@ class LLMInterface:
             fallback = self._generate_fallback("Gemini-2.5-Pro", prompt, "en")
             yield fallback
     def _stream_claude(self, prompt: str) -> Generator[str, None, None]:
         """Stream Claude Opus 4.1 response"""
         if not self.claude_client:
@@ -1283,15 +1360,28 @@ class CreativityArena:
         }
     def vote(self, choice: str, voter_id: str = None):
-        """Process vote"""
         if not self.current_battle:
             return {"error": "No active battle"}
         self.current_battle.winner = self.current_battle.model_a if choice == "A" else self.current_battle.model_b
-        self.current_battle.voter_id = voter_id or "anonymous"
         self.db.save_battle(self.current_battle)
         return {
             "model_a": self.current_battle.model_a,
             "model_b": self.current_battle.model_b,
@@ -1302,7 +1392,17 @@ class CreativityArena:
         """Get leaderboard from database"""
         return self.db.get_leaderboard(category)
-# ==================== Gradio Interface ====================
 # ==================== Gradio Interface ====================
 def create_app():
     arena = CreativityArena()
@@ -1679,16 +1779,29 @@ def create_app():
         def process_vote(choice, state, lang):
             if not state or 'battle' not in state:
                 return (
                     gr.update(),
                     gr.update(),
                     "Error: No active battle"
                 )
-            # Update the current battle from state
-            arena.current_battle = state['battle']
             result = arena.vote(choice)
             ui = UI_TEXT[lang]
             winner_emoji = "🏆" if result['winner'] == result['model_a'] else "🥈"
@@ -1705,6 +1818,9 @@ def create_app():
 {ui['elo_updated']}
 """
             return (
                 gr.update(value=result['model_a'], visible=True),
                 gr.update(value=result['model_b'], visible=True),
@@ -1785,36 +1901,45 @@ def create_app():
 # ==================== Main ====================
 if __name__ == "__main__":
-   print("="*50)
-   print("🚀 AI Models Creativity Battle Arena")
-   print("="*50)
-   print("\n📋 Environment Setup:")
-   print("1. Set OPENAI_API_KEY for GPT-5")
-   print("2. Set GEMINI_API_KEY for Gemini 2.5 Pro")
-   print("3. Set ANTHROPIC_API_KEY for Claude Opus 4.1")
-   print("4. jetXA will use 'aiqtech/tests' by default")
-   print("5. Set HF_TOKEN for persistent data storage (REQUIRED)")
-   print("6. Optional: Set HF_DATASET_NAME (default: ai_models_arena)")
-   print("\n⚠️  Without HF_TOKEN, data will be lost on server restart!")
-   print("\n" + "="*50 + "\n")
-   # Check for required API keys
-   if not os.getenv("HF_TOKEN"):
-       print("⚠️  WARNING: HF_TOKEN not set - data will not persist!")
-       print("Set it with: export HF_TOKEN='your_token_here'")
-       print("")
-   if not os.getenv("OPENAI_API_KEY"):
-       print("⚠️  GPT-5: No API key found - will use fallback responses")
-   if not os.getenv("GEMINI_API_KEY"):
-       print("⚠️  Gemini: No API key found - will use fallback responses")
-   if not os.getenv("ANTHROPIC_API_KEY"):
-       print("⚠️  Claude: No API key found - will use fallback responses")
-   print("\n🎯 Starting arena with 4 models: GPT-5, jetXA, Gemini 2.5 Pro, Claude Opus 4.1")
-   print("="*50 + "\n")
-   app = create_app()
-   app.launch()

             cursor.execute('UPDATE model_stats SET elo_rating = ? WHERE model_name = ?', (new_elo_b, model_b))
     def _sync_to_hf(self):
+        """Sync local database to Hugging Face with improved error handling"""
         if not self.use_hf:
+            print("HF sync disabled")
             return
         try:
             conn = sqlite3.connect(self.db_path)
+            # Export battles
             battles_df = pd.read_sql_query("SELECT * FROM battles", conn)
             if len(battles_df) > 0:
+                print(f"📤 Syncing {len(battles_df)} battles to HF...")
+                # Convert to Dataset
                 battles_dataset = Dataset.from_pandas(battles_df)
+                # Push to hub with retry logic
+                max_retries = 3
+                for attempt in range(max_retries):
+                    try:
+                        battles_dataset.push_to_hub(
+                            self.hf_repo_id,
+                            split="train",
+                            token=self.hf_token,
+                            private=True
+                        )
+                        print(f"✅ Successfully pushed {len(battles_df)} battles to HF")
+                        break
+                    except Exception as push_error:
+                        if attempt < max_retries - 1:
+                            print(f"⚠️ Push attempt {attempt + 1} failed, retrying...")
+                            time.sleep(2)  # Wait before retry
+                        else:
+                            print(f"❌ Failed to push to HF after {max_retries} attempts: {push_error}")
+            # Also sync model stats for backup
+            stats_df = pd.read_sql_query("SELECT * FROM model_stats", conn)
+            if len(stats_df) > 0:
+                try:
+                    stats_dataset = Dataset.from_pandas(stats_df)
+                    stats_dataset.push_to_hub(
+                        self.hf_repo_id,
+                        split="stats",
+                        token=self.hf_token,
+                        private=True
+                    )
+                    print(f"✅ Model stats synced to HF")
+                except Exception as e:
+                    print(f"⚠️ Could not sync stats: {e}")
             conn.close()
         except Exception as e:
+            print(f"❌ Critical error in HF sync: {e}")
     def init_database(self):
         """Initialize SQLite database - ONLY called when no existing data"""
         conn.close()
     def save_battle(self, battle: Battle):
+        """Save battle result with proper duplicate prevention and sync"""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
         try:
+            # First check if this battle already exists
+            cursor.execute('SELECT id, winner FROM battles WHERE id = ?', (battle.id,))
+            existing = cursor.fetchone()
+            if existing and existing[1]:
+                print(f"⚠️ Battle {battle.id} already has a winner: {existing[1]}")
+                conn.close()
+                return  # Don't update if already voted
+            # Insert or update the battle
             cursor.execute('''
                 INSERT OR REPLACE INTO battles VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
             ''', (
                 winner = battle.winner
                 loser = battle.model_b if winner == battle.model_a else battle.model_a
+                # Only update stats if this is a new vote
+                if not existing or not existing[1]:
+                    print(f"📊 Updating stats: {winner} wins, {loser} loses")
+                    # Update winner stats
                     cursor.execute('''
                         UPDATE model_stats
+                        SET total_battles = total_battles + 1,
+                            wins = wins + 1
                         WHERE model_name = ?
                     ''', (winner,))
+                    # Update loser stats
                     cursor.execute('''
                         UPDATE model_stats
+                        SET total_battles = total_battles + 1,
+                            losses = losses + 1
                         WHERE model_name = ?
                     ''', (loser,))
                     self._update_category_scores(cursor, winner, battle.category, True)
                     self._update_category_scores(cursor, loser, battle.category, False)
+                    # Update ELO ratings
                     self._update_elo_ratings(cursor, winner, loser)
+                    print(f"✅ Stats updated for battle {battle.id}")
             conn.commit()
+            print(f"💾 Battle {battle.id} saved to local database")
         except Exception as e:
+            print(f"❌ Error saving battle: {e}")
             conn.rollback()
         finally:
             conn.close()
         # Sync to Hugging Face after saving
         self._sync_to_hf()
     def _update_category_scores(self, cursor, model, category, is_winner):
         """Update category-specific scores"""
         df.insert(0, 'rank', range(1, len(df) + 1))
         return df
+    def debug_database_state(self):
+        """Debug method to check current database state"""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        # Check battles count
+        cursor.execute("SELECT COUNT(*) FROM battles")
+        total_battles = cursor.fetchone()[0]
+        cursor.execute("SELECT COUNT(*) FROM battles WHERE winner IS NOT NULL")
+        voted_battles = cursor.fetchone()[0]
+        # Check model stats
+        cursor.execute("SELECT * FROM model_stats ORDER BY elo_rating DESC")
+        stats = cursor.fetchall()
+        conn.close()
+        print("\n" + "="*50)
+        print("📊 DATABASE STATE DEBUG")
+        print("="*50)
+        print(f"Total battles: {total_battles}")
+        print(f"Voted battles: {voted_battles}")
+        print("\nModel Stats:")
+        print("-"*50)
+        for stat in stats:
+            print(f"{stat[0]:20} | Battles: {stat[5]:3} | Wins: {stat[6]:3} | ELO: {stat[8]:4}")
+        print("="*50 + "\n")
+        return {
+            "total_battles": total_battles,
+            "voted_battles": voted_battles,
+            "model_stats": stats
+        }
 # ==================== Fixed LLM Interface with 4 Models ====================
 class LLMInterface:
                 top_k=40,
             )
+            # 전체 응답을 수집
             full_response = ""
             for chunk in self.gemini_client.models.generate_content_stream(
             fallback = self._generate_fallback("Gemini-2.5-Pro", prompt, "en")
             yield fallback
     def _stream_claude(self, prompt: str) -> Generator[str, None, None]:
         """Stream Claude Opus 4.1 response"""
         if not self.claude_client:
         }
     def vote(self, choice: str, voter_id: str = None):
+        """Process vote with better error handling"""
         if not self.current_battle:
+            print("❌ No active battle to vote on")
             return {"error": "No active battle"}
+        # Ensure we have the complete battle data
+        if not self.current_battle.response_a or not self.current_battle.response_b:
+            print("⚠️ Battle responses not complete")
+            return {"error": "Battle responses not complete"}
+        # Set the winner
         self.current_battle.winner = self.current_battle.model_a if choice == "A" else self.current_battle.model_b
+        self.current_battle.voter_id = voter_id or f"anonymous_{datetime.now().timestamp()}"
+        print(f"🗳️ Vote recorded: {choice} -> {self.current_battle.winner}")
+        # Save to database
         self.db.save_battle(self.current_battle)
+        # Force immediate sync to HF
+        self.db._sync_to_hf()
         return {
             "model_a": self.current_battle.model_a,
             "model_b": self.current_battle.model_b,
         """Get leaderboard from database"""
         return self.db.get_leaderboard(category)
+# ==================== Periodic Sync Function ====================
+def periodic_sync(arena):
+    """Periodically sync to HF every 30 seconds"""
+    while True:
+        time.sleep(30)
+        try:
+            arena.db._sync_to_hf()
+            print(f"⏰ Periodic sync completed at {datetime.now()}")
+        except Exception as e:
+            print(f"⏰ Periodic sync failed: {e}")
 # ==================== Gradio Interface ====================
 def create_app():
     arena = CreativityArena()
         def process_vote(choice, state, lang):
             if not state or 'battle' not in state:
+                print("❌ No battle in state")
                 return (
                     gr.update(),
                     gr.update(),
                     "Error: No active battle"
                 )
+            # Ensure the battle object is properly set
+            battle_obj = state['battle']
+            arena.current_battle = battle_obj
+            print(f"🎯 Processing vote: Choice={choice}, Battle ID={battle_obj.id}")
+            # Process the vote
             result = arena.vote(choice)
+            if "error" in result:
+                return (
+                    gr.update(),
+                    gr.update(),
+                    f"Error: {result['error']}"
+                )
             ui = UI_TEXT[lang]
             winner_emoji = "🏆" if result['winner'] == result['model_a'] else "🥈"
 {ui['elo_updated']}
 """
+            # Debug: Check database state after vote
+            arena.db.debug_database_state()
             return (
                 gr.update(value=result['model_a'], visible=True),
                 gr.update(value=result['model_b'], visible=True),
 # ==================== Main ====================
 if __name__ == "__main__":
+    print("="*50)
+    print("🚀 AI Models Creativity Battle Arena")
+    print("="*50)
+    print("\n📋 Environment Setup:")
+    print("1. Set OPENAI_API_KEY for GPT-5")
+    print("2. Set GEMINI_API_KEY for Gemini 2.5 Pro")
+    print("3. Set ANTHROPIC_API_KEY for Claude Opus 4.1")
+    print("4. jetXA will use 'aiqtech/tests' by default")
+    print("5. Set HF_TOKEN for persistent data storage (REQUIRED)")
+    print("6. Optional: Set HF_DATASET_NAME (default: ai_models_arena)")
+    print("\n⚠️  Without HF_TOKEN, data will be lost on server restart!")
+    print("\n" + "="*50 + "\n")
+    # Check for required API keys
+    if not os.getenv("HF_TOKEN"):
+        print("⚠️  WARNING: HF_TOKEN not set - data will not persist!")
+        print("Set it with: export HF_TOKEN='your_token_here'")
+        print("")
+    if not os.getenv("OPENAI_API_KEY"):
+        print("⚠️  GPT-5: No API key found - will use fallback responses")
+    if not os.getenv("GEMINI_API_KEY"):
+        print("⚠️  Gemini: No API key found - will use fallback responses")
+    if not os.getenv("ANTHROPIC_API_KEY"):
+        print("⚠️  Claude: No API key found - will use fallback responses")
+    print("\n🎯 Starting arena with 4 models: GPT-5, jetXA, Gemini 2.5 Pro, Claude Opus 4.1")
+    print("="*50 + "\n")
+    # Create app
+    app = create_app()
+    # Start periodic sync in background (optional)
+    arena = CreativityArena()
+    sync_thread = threading.Thread(target=lambda: periodic_sync(arena), daemon=True)
+    sync_thread.start()
+    print("✅ Background sync thread started (every 30 seconds)")
+    # Launch app
+    app.launch()