Spaces:

opencompass
/

Compass_Academic_Leaderboard

Running

App Files Files Community

TracyMc commited on Dec 26, 2024

Commit

4224b43

1 Parent(s): e2eea98

update

Browse files

Files changed (1) hide show

app.py +56 -14

app.py CHANGED Viewed

@@ -3,8 +3,10 @@ import json
 import pandas as pd
 from collections import defaultdict
 import copy as cp
-from urllib.request import urlopen
 import re
 # Constants
 CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
@@ -22,11 +24,37 @@ GITHUB_REPO = 'https://github.com/open-compass/opencompass'
 GITHUB_RAW = 'https://raw.githubusercontent.com/open-compass/opencompass'
 GITHUB_BLOB = 'https://github.com/open-compass/opencompass/blob'
-# URL for the JSON data
-DATA_URL = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.24-12.20241205.json"
 # Markdown content
-MAIN_LEADERBOARD_TITLE = "# CompassAcademic Leaderboard"
 MAIN_LEADERBOARD_DESCRIPTION = """## Main Evaluation Results
 The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs.
 - The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval).
@@ -34,7 +62,6 @@ The CompassAcademic currently focuses on the comprehensive reasoning abilities o
 - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
 """
 def fix_image_urls(content):
     """Fix image URLs in markdown content."""
     # Handle the specific logo.svg path
@@ -57,8 +84,8 @@ MODEL_SIZE = ['<10B', '10B-70B', '>70B', 'Unknown']
 MODEL_TYPE = ['API', 'OpenSource']
-def load_data():
-    response = urlopen(DATA_URL)
     data = json.loads(response.read().decode('utf-8'))
     return data
@@ -141,7 +168,6 @@ def filter_table(df, size_ranges, model_types):
                 type_mask |= filtered_df['OpenSource'] == 'Yes'
         filtered_df = filtered_df[type_mask]
-    # 直接返回过滤后的 DataFrame
     return filtered_df
@@ -172,11 +198,13 @@ def calculate_column_widths(df):
 def create_interface():
-    data = load_data()
     df = build_main_table(data)
     with gr.Blocks() as demo:
-        gr.Markdown(MAIN_LEADERBOARD_TITLE)
         with gr.Tabs() as tabs:
             with gr.TabItem("🏅 Main Leaderboard", elem_id='main'):
@@ -206,6 +234,22 @@ def create_interface():
                         column_widths=calculate_column_widths(df),
                     )
                 def update_table(size_ranges, model_types):
                     filtered_df = filter_table(df, size_ranges, model_types)
                     return filtered_df.sort_values(
@@ -224,10 +268,8 @@ def create_interface():
                     outputs=table,
                 )
-            # with gr.TabItem("🔍 About", elem_id='about'):
-            #     readme_content = urlopen(OPENCOMPASS_README).read().decode()
-            #     fixed_content = fix_image_urls(readme_content)
-            #     gr.Markdown(fixed_content)
         with gr.Row():
             with gr.Accordion("Citation", open=False):

 import pandas as pd
 from collections import defaultdict
 import copy as cp
+from urllib.request import urlopen, URLError
 import re
+from datetime import datetime
+import time
 # Constants
 CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
 GITHUB_RAW = 'https://raw.githubusercontent.com/open-compass/opencompass'
 GITHUB_BLOB = 'https://github.com/open-compass/opencompass/blob'
+# Base URL for the JSON data
+DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.REALTIME."
+def find_latest_data_url():
+    """Find the latest available data URL by trying different dates."""
+    today = datetime.now()
+    # Try last 365 days
+    for i in range(365):
+        date = today.replace(day=today.day - i)
+        date_str = date.strftime("%Y%m%d")
+        url = f"{DATA_URL_BASE}{date_str}.json"
+        try:
+            urlopen(url)
+            return url, date_str
+        except URLError:
+            continue
+    # If no valid URL found, return None
+    return None, None
+def get_latest_data():
+    """Get latest data URL and update time"""
+    data_url, update_time = find_latest_data_url()
+    if not data_url:
+        raise Exception("Could not find valid data URL")
+    formatted_update_time = datetime.strptime(update_time, "%Y%m%d").strftime("%Y-%m-%d")
+    return data_url, formatted_update_time
 # Markdown content
+def get_leaderboard_title(update_time):
+    return f"# CompassAcademic Leaderboard (Last Updated: {update_time})"
 MAIN_LEADERBOARD_DESCRIPTION = """## Main Evaluation Results
 The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs.
 - The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval).
 - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
 """
 def fix_image_urls(content):
     """Fix image URLs in markdown content."""
     # Handle the specific logo.svg path
 MODEL_TYPE = ['API', 'OpenSource']
+def load_data(data_url):
+    response = urlopen(data_url)
     data = json.loads(response.read().decode('utf-8'))
     return data
                 type_mask |= filtered_df['OpenSource'] == 'Yes'
         filtered_df = filtered_df[type_mask]
     return filtered_df
 def create_interface():
+    data_url, update_time = get_latest_data()
+    data = load_data(data_url)
     df = build_main_table(data)
+    title = gr.Markdown(get_leaderboard_title(update_time))
     with gr.Blocks() as demo:
+        title_comp = gr.Markdown(get_leaderboard_title(update_time))
         with gr.Tabs() as tabs:
             with gr.TabItem("🏅 Main Leaderboard", elem_id='main'):
                         column_widths=calculate_column_widths(df),
                     )
+                def update_data():
+                    """Periodically check for new data and update the interface"""
+                    while True:
+                        time.sleep(300)  # Check every 5 minutes
+                        try:
+                            new_data_url, new_update_time = get_latest_data()
+                            if new_data_url != data_url:
+                                new_data = load_data(new_data_url)
+                                new_df = build_main_table(new_data)
+                                filtered_df = filter_table(new_df, size_filter.value, type_filter.value)
+                                title_comp.value = get_leaderboard_title(new_update_time)
+                                table.value = filtered_df.sort_values("Average Score", ascending=False)
+                        except Exception as e:
+                            print(f"Error updating data: {e}")
+                            continue
                 def update_table(size_ranges, model_types):
                     filtered_df = filter_table(df, size_ranges, model_types)
                     return filtered_df.sort_values(
                     outputs=table,
                 )
+                # Set up periodic data update
+                demo.load(update_data)
         with gr.Row():
             with gr.Accordion("Citation", open=False):