update the code based on data format change
Browse files- .gitattributes +1 -0
 - main.py +8 -76
 - src/app.py +51 -12
 - src/components/filters.py +192 -36
 - src/components/visualizations.py +236 -53
 - src/services/firebase.py +85 -42
 
    	
        .gitattributes
    CHANGED
    
    | 
         @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text 
     | 
|
| 33 | 
         
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         
     | 
| 34 | 
         
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         
     | 
| 35 | 
         
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         
     | 
| 
         | 
| 
         | 
|
| 33 | 
         
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         
     | 
| 34 | 
         
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         
     | 
| 35 | 
         
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         
     | 
| 36 | 
         
            +
            src/static/images/Bench.gif filter=lfs diff=lfs merge=lfs -text
         
     | 
    	
        main.py
    CHANGED
    
    | 
         @@ -1,79 +1,11 @@ 
     | 
|
| 1 | 
         
            -
             
     | 
| 2 | 
         
            -
             
     | 
| 3 | 
         
            -
             
     | 
| 4 | 
         
            -
             
     | 
| 5 | 
         
            -
            from src.components.filters import render_table_filters, render_plot_filters
         
     | 
| 6 | 
         
            -
            from src.components.visualizations import (
         
     | 
| 7 | 
         
            -
                render_performance_plots,
         
     | 
| 8 | 
         
            -
                render_leaderboard_table,
         
     | 
| 9 | 
         
            -
            )
         
     | 
| 10 | 
         
            -
            from src.services.firebase import fetch_leaderboard_data
         
     | 
| 11 | 
         
            -
             
     | 
| 12 | 
         
            -
            # Configure the page
         
     | 
| 13 | 
         
            -
            st.set_page_config(
         
     | 
| 14 | 
         
            -
                page_title="AI-Phone Leaderboard",
         
     | 
| 15 | 
         
            -
                page_icon="src/static/images/favicon.png",
         
     | 
| 16 | 
         
            -
                layout="wide",
         
     | 
| 17 | 
         
            -
                initial_sidebar_state="expanded",
         
     | 
| 18 | 
         
            -
            )
         
     | 
| 19 | 
         
            -
             
     | 
| 20 | 
         
            -
            # Apply custom CSS
         
     | 
| 21 | 
         
            -
            st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
         
     | 
| 22 | 
         
            -
             
     | 
| 23 | 
         
            -
            async def main():
         
     | 
| 24 | 
         
            -
                # Render header
         
     | 
| 25 | 
         
            -
                render_header()
         
     | 
| 26 | 
         
            -
                
         
     | 
| 27 | 
         
            -
                # Fetch initial data
         
     | 
| 28 | 
         
            -
                full_df = await fetch_leaderboard_data()
         
     | 
| 29 | 
         
            -
                if full_df.empty:
         
     | 
| 30 | 
         
            -
                    st.info("No benchmark data available yet!")
         
     | 
| 31 | 
         
            -
                    return
         
     | 
| 32 | 
         
            -
             
     | 
| 33 | 
         
            -
                # Get unique values for filters
         
     | 
| 34 | 
         
            -
                models = sorted(full_df["Model"].unique())
         
     | 
| 35 | 
         
            -
                benchmarks = sorted(full_df["Benchmark"].unique())
         
     | 
| 36 | 
         
            -
                platforms = sorted(full_df["Platform"].unique())
         
     | 
| 37 | 
         
            -
                devices = sorted(full_df["Normalized Device ID"].unique())
         
     | 
| 38 | 
         
            -
             
     | 
| 39 | 
         
            -
                # Render table filters and get selections
         
     | 
| 40 | 
         
            -
                (
         
     | 
| 41 | 
         
            -
                    selected_model_table,
         
     | 
| 42 | 
         
            -
                    selected_benchmark_table,
         
     | 
| 43 | 
         
            -
                    selected_platform_table,
         
     | 
| 44 | 
         
            -
                    selected_device_table,
         
     | 
| 45 | 
         
            -
                ) = render_table_filters(models, benchmarks, platforms, devices)
         
     | 
| 46 | 
         | 
| 47 | 
         
            -
             
     | 
| 48 | 
         
            -
             
     | 
| 49 | 
         
            -
             
     | 
| 50 | 
         
            -
                    table_df = table_df[table_df["Model"] == selected_model_table]
         
     | 
| 51 | 
         
            -
                if selected_benchmark_table != "All":
         
     | 
| 52 | 
         
            -
                    table_df = table_df[table_df["Benchmark"] == selected_benchmark_table]
         
     | 
| 53 | 
         
            -
                if selected_platform_table != "All":
         
     | 
| 54 | 
         
            -
                    table_df = table_df[table_df["Platform"] == selected_platform_table]
         
     | 
| 55 | 
         
            -
                if selected_device_table != "All":
         
     | 
| 56 | 
         
            -
                    table_df = table_df[table_df["Normalized Device ID"] == selected_device_table]
         
     | 
| 57 | 
         
            -
             
     | 
| 58 | 
         
            -
                # Render leaderboard table
         
     | 
| 59 | 
         
            -
                render_leaderboard_table(table_df)
         
     | 
| 60 | 
         
            -
             
     | 
| 61 | 
         
            -
                # Performance plots section
         
     | 
| 62 | 
         
            -
                st.subheader("Performance Comparison")
         
     | 
| 63 | 
         
            -
             
     | 
| 64 | 
         
            -
                # Render plot filters and get selections
         
     | 
| 65 | 
         
            -
                selected_model_plot, selected_benchmark_plot = render_plot_filters(
         
     | 
| 66 | 
         
            -
                    models, benchmarks
         
     | 
| 67 | 
         
            -
                )
         
     | 
| 68 | 
         
            -
             
     | 
| 69 | 
         
            -
                # Filter data for plots
         
     | 
| 70 | 
         
            -
                plot_df = full_df[
         
     | 
| 71 | 
         
            -
                    (full_df["Model"] == selected_model_plot)
         
     | 
| 72 | 
         
            -
                    & (full_df["Benchmark"] == selected_benchmark_plot)
         
     | 
| 73 | 
         
            -
                ]
         
     | 
| 74 | 
         
            -
             
     | 
| 75 | 
         
            -
                # Render performance plots
         
     | 
| 76 | 
         
            -
                render_performance_plots(plot_df, selected_model_plot)
         
     | 
| 77 | 
         | 
| 78 | 
         
             
            if __name__ == "__main__":
         
     | 
| 79 | 
         
            -
                asyncio.run(main()) 
     | 
| 
         | 
|
| 1 | 
         
            +
            """
         
     | 
| 2 | 
         
            +
            Main module for the frontend application.
         
     | 
| 3 | 
         
            +
            This file serves as a module init file.
         
     | 
| 4 | 
         
            +
            """
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 5 | 
         | 
| 6 | 
         
            +
            import asyncio
         
     | 
| 7 | 
         
            +
            import streamlit as st
         
     | 
| 8 | 
         
            +
            from src.app import main
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 9 | 
         | 
| 10 | 
         
             
            if __name__ == "__main__":
         
     | 
| 11 | 
         
            +
                asyncio.run(main())
         
     | 
    	
        src/app.py
    CHANGED
    
    | 
         @@ -1,15 +1,54 @@ 
     | 
|
| 1 | 
         
             
            import asyncio
         
     | 
| 2 | 
         
            -
             
     | 
| 3 | 
         
             
            import pandas as pd
         
     | 
| 
         | 
|
| 4 | 
         | 
| 5 | 
         
            -
             
     | 
| 6 | 
         
            -
             
     | 
| 7 | 
         
            -
                 
     | 
| 8 | 
         
            -
             
     | 
| 9 | 
         
            -
             
     | 
| 10 | 
         
            -
             
     | 
| 11 | 
         
            -
             
     | 
| 12 | 
         
            -
             
     | 
| 13 | 
         
            -
             
     | 
| 14 | 
         
            -
             
     | 
| 15 | 
         
            -
                ) 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
             
            import asyncio
         
     | 
| 2 | 
         
            +
            import streamlit as st
         
     | 
| 3 | 
         
             
            import pandas as pd
         
     | 
| 4 | 
         
            +
            from typing import Optional, List, Set
         
     | 
| 5 | 
         | 
| 6 | 
         
            +
            from .components.filters import render_table_filters, render_plot_filters
         
     | 
| 7 | 
         
            +
            from .components.visualizations import (
         
     | 
| 8 | 
         
            +
                render_leaderboard_table,
         
     | 
| 9 | 
         
            +
                render_performance_plots,
         
     | 
| 10 | 
         
            +
            )
         
     | 
| 11 | 
         
            +
            from .services.firebase import fetch_leaderboard_data
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            def get_unique_values(df: pd.DataFrame) -> tuple[List[str], List[str], List[str]]:
         
     | 
| 15 | 
         
            +
                """Get unique values for filters"""
         
     | 
| 16 | 
         
            +
                models = sorted(df["Model ID"].unique().tolist())
         
     | 
| 17 | 
         
            +
                platforms = sorted(df["Platform"].unique().tolist())
         
     | 
| 18 | 
         
            +
                devices = sorted(df["Device"].unique().tolist())
         
     | 
| 19 | 
         
            +
                return models, platforms, devices
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
            async def main():
         
     | 
| 23 | 
         
            +
                """Main application entry point"""
         
     | 
| 24 | 
         
            +
                st.set_page_config(
         
     | 
| 25 | 
         
            +
                    page_title="AI Phone Benchmark Leaderboard",
         
     | 
| 26 | 
         
            +
                    page_icon="📱",
         
     | 
| 27 | 
         
            +
                    layout="wide",
         
     | 
| 28 | 
         
            +
                )
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
                # Fetch initial data
         
     | 
| 31 | 
         
            +
                df = await fetch_leaderboard_data()
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
                if df.empty:
         
     | 
| 34 | 
         
            +
                    st.error("No data available. Please check your connection and try again.")
         
     | 
| 35 | 
         
            +
                    return
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                # Get unique values for filters
         
     | 
| 38 | 
         
            +
                models, platforms, devices = get_unique_values(df)
         
     | 
| 39 | 
         
            +
             
     | 
| 40 | 
         
            +
                # Render table filters in sidebar
         
     | 
| 41 | 
         
            +
                table_filters = render_table_filters(models, platforms, devices)
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
                # Render the main leaderboard table
         
     | 
| 44 | 
         
            +
                st.title("📱 AI Phone Benchmark Leaderboard")
         
     | 
| 45 | 
         
            +
                render_leaderboard_table(df, table_filters)
         
     | 
| 46 | 
         
            +
             
     | 
| 47 | 
         
            +
                # Render plot section
         
     | 
| 48 | 
         
            +
                st.title("📊 Performance Comparison")
         
     | 
| 49 | 
         
            +
                plot_filters = render_plot_filters(models, platforms, devices)
         
     | 
| 50 | 
         
            +
                render_performance_plots(df, plot_filters)
         
     | 
| 51 | 
         
            +
             
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 54 | 
         
            +
                asyncio.run(main())
         
     | 
    	
        src/components/filters.py
    CHANGED
    
    | 
         @@ -1,50 +1,206 @@ 
     | 
|
| 1 | 
         
             
            import streamlit as st
         
     | 
| 2 | 
         
            -
            from typing import List, Tuple
         
     | 
| 3 | 
         | 
| 4 | 
         
            -
             
     | 
| 5 | 
         
            -
             
     | 
| 6 | 
         
            -
                 
     | 
| 7 | 
         
            -
                 
     | 
| 8 | 
         
            -
             
     | 
| 9 | 
         
            -
             
     | 
| 10 | 
         
            -
             
     | 
| 11 | 
         
            -
             
     | 
| 12 | 
         
            -
             
     | 
| 13 | 
         
            -
                     
     | 
| 14 | 
         
            -
                     
     | 
| 15 | 
         
            -
             
     | 
| 16 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 17 | 
         
             
                        )
         
     | 
| 18 | 
         
            -
             
     | 
| 19 | 
         
            -
             
     | 
| 20 | 
         
            -
                            "Benchmark", ["All"] + list(benchmarks), key="table_benchmark"
         
     | 
| 21 | 
         
             
                        )
         
     | 
| 22 | 
         
            -
             
     | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
            -
             
     | 
| 
         | 
|
| 25 | 
         
             
                        )
         
     | 
| 26 | 
         
            -
             
     | 
| 27 | 
         
            -
             
     | 
| 28 | 
         
            -
                            "Device", ["All"] + list(devices), key="table_device"
         
     | 
| 29 | 
         
             
                        )
         
     | 
| 30 | 
         
            -
             
     | 
| 31 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 32 | 
         | 
| 33 | 
         
             
            def render_plot_filters(
         
     | 
| 34 | 
         
            -
                models: List[str],
         
     | 
| 35 | 
         
            -
             
     | 
| 36 | 
         
            -
            ) -> Tuple[str, str]:
         
     | 
| 37 | 
         
             
                """Render and handle plot filters"""
         
     | 
| 38 | 
         
             
                plot_filters = st.container()
         
     | 
| 39 | 
         
             
                with plot_filters:
         
     | 
| 40 | 
         
            -
                    p1, p2 = st.columns( 
     | 
| 41 | 
         
             
                    with p1:
         
     | 
| 42 | 
         
            -
                        selected_model = st.selectbox(
         
     | 
| 43 | 
         
            -
                            "Model for Comparison", models, key="plot_model"
         
     | 
| 44 | 
         
            -
                        )
         
     | 
| 45 | 
         
             
                    with p2:
         
     | 
| 46 | 
         
            -
                         
     | 
| 47 | 
         
            -
                            " 
     | 
| 48 | 
         
             
                        )
         
     | 
| 49 | 
         
            -
             
     | 
| 50 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
             
            import streamlit as st
         
     | 
| 2 | 
         
            +
            from typing import List, Tuple, Dict, Set
         
     | 
| 3 | 
         | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def render_grouping_options() -> List[str]:
         
     | 
| 6 | 
         
            +
                """Render grouping options selector"""
         
     | 
| 7 | 
         
            +
                available_groups = [
         
     | 
| 8 | 
         
            +
                    "Model ID",
         
     | 
| 9 | 
         
            +
                    "Device",
         
     | 
| 10 | 
         
            +
                    "Platform",
         
     | 
| 11 | 
         
            +
                    "n_threads",
         
     | 
| 12 | 
         
            +
                    "flash_attn",
         
     | 
| 13 | 
         
            +
                    "cache_type_k",
         
     | 
| 14 | 
         
            +
                    "cache_type_v",
         
     | 
| 15 | 
         
            +
                    "PP Value",
         
     | 
| 16 | 
         
            +
                    "TG Value",
         
     | 
| 17 | 
         
            +
                ]
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
                default_groups = ["Model ID", "Device", "Platform"]
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
                selected_groups = st.multiselect(
         
     | 
| 22 | 
         
            +
                    "Group Results By",
         
     | 
| 23 | 
         
            +
                    options=available_groups,
         
     | 
| 24 | 
         
            +
                    default=default_groups,
         
     | 
| 25 | 
         
            +
                    help="Select columns to group the results by",
         
     | 
| 26 | 
         
            +
                )
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
                return selected_groups
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
            def render_column_visibility() -> Set[str]:
         
     | 
| 32 | 
         
            +
                """Render column visibility selector"""
         
     | 
| 33 | 
         
            +
                column_categories = {
         
     | 
| 34 | 
         
            +
                    "Device Info": [
         
     | 
| 35 | 
         
            +
                        "Device",
         
     | 
| 36 | 
         
            +
                        "Platform",
         
     | 
| 37 | 
         
            +
                        "CPU Cores",
         
     | 
| 38 | 
         
            +
                        "Total Memory (GB)",
         
     | 
| 39 | 
         
            +
                        "Memory Usage (%)",
         
     | 
| 40 | 
         
            +
                    ],
         
     | 
| 41 | 
         
            +
                    "Benchmark Info": [
         
     | 
| 42 | 
         
            +
                        "PP Value",
         
     | 
| 43 | 
         
            +
                        "TG Value",
         
     | 
| 44 | 
         
            +
                        "Prompt Processing",
         
     | 
| 45 | 
         
            +
                        "Token Generation",
         
     | 
| 46 | 
         
            +
                    ],
         
     | 
| 47 | 
         
            +
                    "Model Info": [
         
     | 
| 48 | 
         
            +
                        "Model",
         
     | 
| 49 | 
         
            +
                        "Model Size",
         
     | 
| 50 | 
         
            +
                        "Model ID",
         
     | 
| 51 | 
         
            +
                    ],
         
     | 
| 52 | 
         
            +
                    "Advanced": [
         
     | 
| 53 | 
         
            +
                        "n_threads",
         
     | 
| 54 | 
         
            +
                        "flash_attn",
         
     | 
| 55 | 
         
            +
                        "cache_type_k",
         
     | 
| 56 | 
         
            +
                        "cache_type_v",
         
     | 
| 57 | 
         
            +
                    ],
         
     | 
| 58 | 
         
            +
                }
         
     | 
| 59 | 
         
            +
             
     | 
| 60 | 
         
            +
                # Default visible columns
         
     | 
| 61 | 
         
            +
                default_columns = {
         
     | 
| 62 | 
         
            +
                    "Device",
         
     | 
| 63 | 
         
            +
                    "Platform",
         
     | 
| 64 | 
         
            +
                    "Model",
         
     | 
| 65 | 
         
            +
                    "Model Size",
         
     | 
| 66 | 
         
            +
                    "Prompt Processing",
         
     | 
| 67 | 
         
            +
                    "Token Generation",
         
     | 
| 68 | 
         
            +
                }
         
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
                with st.expander("Column Visibility", expanded=False):
         
     | 
| 71 | 
         
            +
                    selected_columns = set()
         
     | 
| 72 | 
         
            +
                    for category, columns in column_categories.items():
         
     | 
| 73 | 
         
            +
                        st.subheader(category)
         
     | 
| 74 | 
         
            +
                        for col in columns:
         
     | 
| 75 | 
         
            +
                            if st.checkbox(col, value=col in default_columns):
         
     | 
| 76 | 
         
            +
                                selected_columns.add(col)
         
     | 
| 77 | 
         
            +
             
     | 
| 78 | 
         
            +
                return selected_columns
         
     | 
| 79 | 
         
            +
             
     | 
| 80 | 
         
            +
             
     | 
| 81 | 
         
            +
            def render_benchmark_filters() -> Dict:
         
     | 
| 82 | 
         
            +
                """Render advanced benchmark configuration filters"""
         
     | 
| 83 | 
         
            +
                with st.expander("Benchmark Configuration", expanded=False):
         
     | 
| 84 | 
         
            +
                    use_custom_config = st.checkbox("Use Custom PP/TG Values", value=False)
         
     | 
| 85 | 
         
            +
             
     | 
| 86 | 
         
            +
                    if use_custom_config:
         
     | 
| 87 | 
         
            +
                        col1, col2 = st.columns(2)
         
     | 
| 88 | 
         
            +
                        with col1:
         
     | 
| 89 | 
         
            +
                            pp_min = st.number_input("Min PP", value=0, step=32)
         
     | 
| 90 | 
         
            +
                            pp_max = st.number_input("Max PP", value=1024, step=32)
         
     | 
| 91 | 
         
            +
                        with col2:
         
     | 
| 92 | 
         
            +
                            tg_min = st.number_input("Min TG", value=0, step=32)
         
     | 
| 93 | 
         
            +
                            tg_max = st.number_input("Max TG", value=512, step=32)
         
     | 
| 94 | 
         
            +
                    else:
         
     | 
| 95 | 
         
            +
                        pp_min = pp_max = tg_min = tg_max = None
         
     | 
| 96 | 
         
            +
             
     | 
| 97 | 
         
            +
                    return {
         
     | 
| 98 | 
         
            +
                        "use_custom_config": use_custom_config,
         
     | 
| 99 | 
         
            +
                        "pp_range": (pp_min, pp_max),
         
     | 
| 100 | 
         
            +
                        "tg_range": (tg_min, tg_max),
         
     | 
| 101 | 
         
            +
                    }
         
     | 
| 102 | 
         
            +
             
     | 
| 103 | 
         
            +
             
     | 
| 104 | 
         
            +
            def render_advanced_filters() -> Dict:
         
     | 
| 105 | 
         
            +
                """Render advanced settings filters"""
         
     | 
| 106 | 
         
            +
                with st.expander("Advanced Settings", expanded=False):
         
     | 
| 107 | 
         
            +
                    col1, col2 = st.columns(2)
         
     | 
| 108 | 
         
            +
             
     | 
| 109 | 
         
            +
                    with col1:
         
     | 
| 110 | 
         
            +
                        n_threads = st.multiselect(
         
     | 
| 111 | 
         
            +
                            "Number of Threads", options=[1, 2, 4, 8, 16], default=None
         
     | 
| 112 | 
         
             
                        )
         
     | 
| 113 | 
         
            +
                        flash_attn = st.multiselect(
         
     | 
| 114 | 
         
            +
                            "Flash Attention", options=[True, False], default=None
         
     | 
| 
         | 
|
| 115 | 
         
             
                        )
         
     | 
| 116 | 
         
            +
             
     | 
| 117 | 
         
            +
                    with col2:
         
     | 
| 118 | 
         
            +
                        cache_type = st.multiselect(
         
     | 
| 119 | 
         
            +
                            "Cache Type", options=["f16", "f32"], default=None
         
     | 
| 120 | 
         
             
                        )
         
     | 
| 121 | 
         
            +
                        memory_usage = st.slider(
         
     | 
| 122 | 
         
            +
                            "Max Memory Usage (%)", min_value=0, max_value=100, value=100
         
     | 
| 
         | 
|
| 123 | 
         
             
                        )
         
     | 
| 124 | 
         
            +
             
     | 
| 125 | 
         
            +
                    return {
         
     | 
| 126 | 
         
            +
                        "n_threads": n_threads,
         
     | 
| 127 | 
         
            +
                        "flash_attn": flash_attn,
         
     | 
| 128 | 
         
            +
                        "cache_type": cache_type,
         
     | 
| 129 | 
         
            +
                        "max_memory_usage": memory_usage,
         
     | 
| 130 | 
         
            +
                    }
         
     | 
| 131 | 
         
            +
             
     | 
| 132 | 
         | 
| 133 | 
         
             
            def render_plot_filters(
         
     | 
| 134 | 
         
            +
                models: List[str], platforms: List[str], devices: List[str]
         
     | 
| 135 | 
         
            +
            ) -> Dict:
         
     | 
| 
         | 
|
| 136 | 
         
             
                """Render and handle plot filters"""
         
     | 
| 137 | 
         
             
                plot_filters = st.container()
         
     | 
| 138 | 
         
             
                with plot_filters:
         
     | 
| 139 | 
         
            +
                    p1, p2, p3 = st.columns(3)
         
     | 
| 140 | 
         
             
                    with p1:
         
     | 
| 141 | 
         
            +
                        selected_model = st.selectbox("Model for Plot", models, key="plot_model")
         
     | 
| 
         | 
|
| 
         | 
|
| 142 | 
         
             
                    with p2:
         
     | 
| 143 | 
         
            +
                        selected_platform = st.selectbox(
         
     | 
| 144 | 
         
            +
                            "Platform for Plot", ["All"] + list(platforms), key="plot_platform"
         
     | 
| 145 | 
         
             
                        )
         
     | 
| 146 | 
         
            +
                    with p3:
         
     | 
| 147 | 
         
            +
                        selected_device = st.selectbox(
         
     | 
| 148 | 
         
            +
                            "Device for Plot", ["All"] + list(devices), key="plot_device"
         
     | 
| 149 | 
         
            +
                        )
         
     | 
| 150 | 
         
            +
             
     | 
| 151 | 
         
            +
                # Use the same benchmark and advanced filters as the table
         
     | 
| 152 | 
         
            +
                benchmark_config = render_benchmark_filters()
         
     | 
| 153 | 
         
            +
                advanced_settings = render_advanced_filters()
         
     | 
| 154 | 
         
            +
             
     | 
| 155 | 
         
            +
                return {
         
     | 
| 156 | 
         
            +
                    "basic_filters": {
         
     | 
| 157 | 
         
            +
                        "model": selected_model,
         
     | 
| 158 | 
         
            +
                        "platform": selected_platform,
         
     | 
| 159 | 
         
            +
                        "device": selected_device,
         
     | 
| 160 | 
         
            +
                    },
         
     | 
| 161 | 
         
            +
                    "benchmark_config": benchmark_config,
         
     | 
| 162 | 
         
            +
                    "advanced_settings": advanced_settings,
         
     | 
| 163 | 
         
            +
                }
         
     | 
| 164 | 
         
            +
             
     | 
| 165 | 
         
            +
             
     | 
| 166 | 
         
            +
            def render_table_filters(
         
     | 
| 167 | 
         
            +
                models: List[str], platforms: List[str], devices: List[str]
         
     | 
| 168 | 
         
            +
            ) -> Dict:
         
     | 
| 169 | 
         
            +
                """Render and handle all table filters"""
         
     | 
| 170 | 
         
            +
                st.sidebar.title("Filters")
         
     | 
| 171 | 
         
            +
             
     | 
| 172 | 
         
            +
                # Basic filters
         
     | 
| 173 | 
         
            +
                selected_model = st.sidebar.selectbox(
         
     | 
| 174 | 
         
            +
                    "Model", ["All"] + list(models), key="table_model"
         
     | 
| 175 | 
         
            +
                )
         
     | 
| 176 | 
         
            +
                selected_platform = st.sidebar.selectbox(
         
     | 
| 177 | 
         
            +
                    "Platform", ["All"] + list(platforms), key="table_platform"
         
     | 
| 178 | 
         
            +
                )
         
     | 
| 179 | 
         
            +
                selected_device = st.sidebar.selectbox(
         
     | 
| 180 | 
         
            +
                    "Device", ["All"] + list(devices), key="table_device"
         
     | 
| 181 | 
         
            +
                )
         
     | 
| 182 | 
         
            +
             
     | 
| 183 | 
         
            +
                # Grouping options
         
     | 
| 184 | 
         
            +
                st.sidebar.title("Display Options")
         
     | 
| 185 | 
         
            +
                grouping = render_grouping_options()
         
     | 
| 186 | 
         
            +
             
     | 
| 187 | 
         
            +
                # Column visibility
         
     | 
| 188 | 
         
            +
                visible_columns = render_column_visibility()
         
     | 
| 189 | 
         
            +
             
     | 
| 190 | 
         
            +
                # Benchmark configuration
         
     | 
| 191 | 
         
            +
                benchmark_config = render_benchmark_filters()
         
     | 
| 192 | 
         
            +
             
     | 
| 193 | 
         
            +
                # Advanced settings
         
     | 
| 194 | 
         
            +
                advanced_settings = render_advanced_filters()
         
     | 
| 195 | 
         
            +
             
     | 
| 196 | 
         
            +
                return {
         
     | 
| 197 | 
         
            +
                    "basic_filters": {
         
     | 
| 198 | 
         
            +
                        "model": selected_model,
         
     | 
| 199 | 
         
            +
                        "platform": selected_platform,
         
     | 
| 200 | 
         
            +
                        "device": selected_device,
         
     | 
| 201 | 
         
            +
                    },
         
     | 
| 202 | 
         
            +
                    "grouping": grouping,
         
     | 
| 203 | 
         
            +
                    "visible_columns": visible_columns,
         
     | 
| 204 | 
         
            +
                    "benchmark_config": benchmark_config,
         
     | 
| 205 | 
         
            +
                    "advanced_settings": advanced_settings,
         
     | 
| 206 | 
         
            +
                }
         
     | 
    	
        src/components/visualizations.py
    CHANGED
    
    | 
         @@ -1,7 +1,8 @@ 
     | 
|
| 1 | 
         
             
            import streamlit as st
         
     | 
| 2 | 
         
             
            import plotly.express as px
         
     | 
| 3 | 
         
             
            import pandas as pd
         
     | 
| 4 | 
         
            -
            from typing import Optional
         
     | 
| 
         | 
|
| 5 | 
         | 
| 6 | 
         
             
            def create_performance_plot(df: pd.DataFrame, metric: str, title: str):
         
     | 
| 7 | 
         
             
                """Create a performance comparison plot"""
         
     | 
| 
         @@ -27,93 +28,275 @@ def create_performance_plot(df: pd.DataFrame, metric: str, title: str): 
     | 
|
| 27 | 
         
             
                )
         
     | 
| 28 | 
         
             
                return fig
         
     | 
| 29 | 
         | 
| 30 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 31 | 
         
             
                """Render performance comparison plots"""
         
     | 
| 32 | 
         
            -
                if  
     | 
| 33 | 
         
            -
                    st.warning(
         
     | 
| 34 | 
         
            -
                        "No data available for the selected model and benchmark combination."
         
     | 
| 35 | 
         
            -
                    )
         
     | 
| 36 | 
         
             
                    return
         
     | 
| 37 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 38 | 
         
             
                col1, col2 = st.columns(2)
         
     | 
| 39 | 
         
             
                with col1:
         
     | 
| 40 | 
         
             
                    fig1 = create_performance_plot(
         
     | 
| 41 | 
         
            -
                         
     | 
| 42 | 
         
             
                        "Prompt Processing",
         
     | 
| 43 | 
         
            -
                        f"Prompt Processing Time  
     | 
| 44 | 
         
             
                    )
         
     | 
| 45 | 
         
             
                    if fig1:
         
     | 
| 46 | 
         
             
                        st.plotly_chart(fig1, use_container_width=True)
         
     | 
| 47 | 
         | 
| 48 | 
         
             
                with col2:
         
     | 
| 49 | 
         
             
                    fig2 = create_performance_plot(
         
     | 
| 50 | 
         
            -
                         
     | 
| 51 | 
         
             
                        "Token Generation",
         
     | 
| 52 | 
         
            -
                        f"Token Generation Time  
     | 
| 53 | 
         
             
                    )
         
     | 
| 54 | 
         
             
                    if fig2:
         
     | 
| 55 | 
         
             
                        st.plotly_chart(fig2, use_container_width=True)
         
     | 
| 56 | 
         | 
| 57 | 
         
            -
             
     | 
| 
         | 
|
| 58 | 
         
             
                """Render the leaderboard table with grouped and formatted data"""
         
     | 
| 59 | 
         
            -
                 
     | 
| 60 | 
         
            -
             
     | 
| 61 | 
         
            -
                     
     | 
| 62 | 
         
            -
             
     | 
| 63 | 
         
            -
             
     | 
| 64 | 
         
            -
             
     | 
| 65 | 
         
            -
             
     | 
| 66 | 
         
            -
             
     | 
| 67 | 
         
            -
             
     | 
| 68 | 
         
            -
             
     | 
| 69 | 
         
            -
             
     | 
| 70 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 71 | 
         
             
                )
         
     | 
| 72 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 73 | 
         
             
                # Flatten column names
         
     | 
| 74 | 
         
             
                grouped_df.columns = [
         
     | 
| 75 | 
         
             
                    col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
         
     | 
| 76 | 
         
             
                ]
         
     | 
| 77 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 78 | 
         
             
                # Round numeric columns
         
     | 
| 79 | 
         
             
                numeric_cols = [
         
     | 
| 80 | 
         
            -
                     
     | 
| 81 | 
         
            -
                     
     | 
| 82 | 
         
            -
                     
     | 
| 83 | 
         
            -
                    "Token Generation (std)",
         
     | 
| 84 | 
         
             
                ]
         
     | 
| 85 | 
         
             
                grouped_df[numeric_cols] = grouped_df[numeric_cols].round(2)
         
     | 
| 86 | 
         | 
| 87 | 
         
             
                # Rename columns for display
         
     | 
| 88 | 
         
            -
                 
     | 
| 89 | 
         
            -
                     
     | 
| 90 | 
         
            -
             
     | 
| 91 | 
         
            -
             
     | 
| 92 | 
         
            -
             
     | 
| 93 | 
         
            -
             
     | 
| 94 | 
         
            -
             
     | 
| 95 | 
         
            -
                     
     | 
| 96 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 97 | 
         | 
| 98 | 
         
            -
                #  
     | 
| 99 | 
         
            -
                 
     | 
| 100 | 
         
            -
             
     | 
| 101 | 
         
            -
                     
     | 
| 102 | 
         
            -
                     
     | 
| 103 | 
         
            -
             
     | 
| 104 | 
         
            -
             
     | 
| 105 | 
         
            -
             
     | 
| 106 | 
         
            -
             
     | 
| 107 | 
         
            -
             
     | 
| 108 | 
         
            -
             
     | 
| 109 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 110 | 
         | 
| 111 | 
         
             
                # Display the filtered and grouped table
         
     | 
| 112 | 
         
             
                st.dataframe(
         
     | 
| 113 | 
         
            -
                    grouped_df[display_cols] 
     | 
| 114 | 
         
            -
                        ["Model Size", "Benchmark", "TG Avg (s)"],
         
     | 
| 115 | 
         
            -
                        ascending=[False, True, True],
         
     | 
| 116 | 
         
            -
                    ),
         
     | 
| 117 | 
         
             
                    use_container_width=True,
         
     | 
| 118 | 
         
             
                    height=400,
         
     | 
| 119 | 
         
            -
                ) 
     | 
| 
         | 
|
| 1 | 
         
             
            import streamlit as st
         
     | 
| 2 | 
         
             
            import plotly.express as px
         
     | 
| 3 | 
         
             
            import pandas as pd
         
     | 
| 4 | 
         
            +
            from typing import Optional, Dict, List, Set
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         | 
| 7 | 
         
             
            def create_performance_plot(df: pd.DataFrame, metric: str, title: str):
         
     | 
| 8 | 
         
             
                """Create a performance comparison plot"""
         
     | 
| 
         | 
|
| 28 | 
         
             
                )
         
     | 
| 29 | 
         
             
                return fig
         
     | 
| 30 | 
         | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
         
     | 
| 33 | 
         
            +
                """Apply all filters to the dataframe"""
         
     | 
| 34 | 
         
            +
                if df.empty:
         
     | 
| 35 | 
         
            +
                    return df
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                # Basic filters
         
     | 
| 38 | 
         
            +
                basic_filters = filters["basic_filters"]
         
     | 
| 39 | 
         
            +
                if basic_filters["model"] != "All":
         
     | 
| 40 | 
         
            +
                    df = df[df["Model ID"] == basic_filters["model"]]
         
     | 
| 41 | 
         
            +
                if basic_filters["platform"] != "All":
         
     | 
| 42 | 
         
            +
                    df = df[df["Platform"] == basic_filters["platform"]]
         
     | 
| 43 | 
         
            +
                if basic_filters["device"] != "All":
         
     | 
| 44 | 
         
            +
                    df = df[df["Device"] == basic_filters["device"]]
         
     | 
| 45 | 
         
            +
             
     | 
| 46 | 
         
            +
                # Benchmark configuration filters
         
     | 
| 47 | 
         
            +
                benchmark_config = filters["benchmark_config"]
         
     | 
| 48 | 
         
            +
                if benchmark_config["use_custom_config"]:
         
     | 
| 49 | 
         
            +
                    pp_min, pp_max = benchmark_config["pp_range"]
         
     | 
| 50 | 
         
            +
                    tg_min, tg_max = benchmark_config["tg_range"]
         
     | 
| 51 | 
         
            +
             
     | 
| 52 | 
         
            +
                    # Extract PP/TG values if not already present
         
     | 
| 53 | 
         
            +
                    if "PP Value" not in df.columns:
         
     | 
| 54 | 
         
            +
                        df["PP Value"] = df["Benchmark"].apply(
         
     | 
| 55 | 
         
            +
                            lambda x: int(x.split("pp: ")[1].split(",")[0])
         
     | 
| 56 | 
         
            +
                        )
         
     | 
| 57 | 
         
            +
                    if "TG Value" not in df.columns:
         
     | 
| 58 | 
         
            +
                        df["TG Value"] = df["Benchmark"].apply(
         
     | 
| 59 | 
         
            +
                            lambda x: int(x.split("tg: ")[1].split(")")[0])
         
     | 
| 60 | 
         
            +
                        )
         
     | 
| 61 | 
         
            +
             
     | 
| 62 | 
         
            +
                    df = df[
         
     | 
| 63 | 
         
            +
                        (df["PP Value"] >= pp_min)
         
     | 
| 64 | 
         
            +
                        & (df["PP Value"] <= pp_max)
         
     | 
| 65 | 
         
            +
                        & (df["TG Value"] >= tg_min)
         
     | 
| 66 | 
         
            +
                        & (df["TG Value"] <= tg_max)
         
     | 
| 67 | 
         
            +
                    ]
         
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
                # Advanced settings filters
         
     | 
| 70 | 
         
            +
                advanced = filters["advanced_settings"]
         
     | 
| 71 | 
         
            +
                if advanced["n_threads"]:
         
     | 
| 72 | 
         
            +
                    df["n_threads"] = df["initSettings"].apply(lambda x: x.get("n_threads"))
         
     | 
| 73 | 
         
            +
                    df = df[df["n_threads"].isin(advanced["n_threads"])]
         
     | 
| 74 | 
         
            +
             
     | 
| 75 | 
         
            +
                if advanced["flash_attn"]:
         
     | 
| 76 | 
         
            +
                    df["flash_attn"] = df["initSettings"].apply(lambda x: x.get("flash_attn"))
         
     | 
| 77 | 
         
            +
                    df = df[df["flash_attn"].isin(advanced["flash_attn"])]
         
     | 
| 78 | 
         
            +
             
     | 
| 79 | 
         
            +
                if advanced["cache_type"]:
         
     | 
| 80 | 
         
            +
                    df["cache_type_k"] = df["initSettings"].apply(lambda x: x.get("cache_type_k"))
         
     | 
| 81 | 
         
            +
                    df["cache_type_v"] = df["initSettings"].apply(lambda x: x.get("cache_type_v"))
         
     | 
| 82 | 
         
            +
                    df = df[
         
     | 
| 83 | 
         
            +
                        (df["cache_type_k"].isin(advanced["cache_type"]))
         
     | 
| 84 | 
         
            +
                        & (df["cache_type_v"].isin(advanced["cache_type"]))
         
     | 
| 85 | 
         
            +
                    ]
         
     | 
| 86 | 
         
            +
             
     | 
| 87 | 
         
            +
                if advanced["max_memory_usage"] < 100:
         
     | 
| 88 | 
         
            +
                    df = df[df["Memory Usage (%)"] <= advanced["max_memory_usage"]]
         
     | 
| 89 | 
         
            +
             
     | 
| 90 | 
         
            +
                return df
         
     | 
| 91 | 
         
            +
             
     | 
| 92 | 
         
            +
             
     | 
| 93 | 
         
            +
            def render_performance_plots(df: pd.DataFrame, filters: Dict):
         
     | 
| 94 | 
         
             
                """Render performance comparison plots"""
         
     | 
| 95 | 
         
            +
                if df.empty:
         
     | 
| 96 | 
         
            +
                    st.warning("No data available for plotting.")
         
     | 
| 
         | 
|
| 
         | 
|
| 97 | 
         
             
                    return
         
     | 
| 98 | 
         | 
| 99 | 
         
            +
                # Apply filters
         
     | 
| 100 | 
         
            +
                filtered_df = filter_dataframe(df, filters)
         
     | 
| 101 | 
         
            +
                if filtered_df.empty:
         
     | 
| 102 | 
         
            +
                    st.warning("No data matches the selected filters for plotting.")
         
     | 
| 103 | 
         
            +
                    return
         
     | 
| 104 | 
         
            +
             
     | 
| 105 | 
         
            +
                # Extract PP/TG values if not already present
         
     | 
| 106 | 
         
            +
                if "PP Value" not in filtered_df.columns:
         
     | 
| 107 | 
         
            +
                    filtered_df["PP Value"] = filtered_df["Benchmark"].apply(
         
     | 
| 108 | 
         
            +
                        lambda x: int(x.split("pp: ")[1].split(",")[0])
         
     | 
| 109 | 
         
            +
                    )
         
     | 
| 110 | 
         
            +
                if "TG Value" not in filtered_df.columns:
         
     | 
| 111 | 
         
            +
                    filtered_df["TG Value"] = filtered_df["Benchmark"].apply(
         
     | 
| 112 | 
         
            +
                        lambda x: int(x.split("tg: ")[1].split(")")[0])
         
     | 
| 113 | 
         
            +
                    )
         
     | 
| 114 | 
         
            +
             
     | 
| 115 | 
         
            +
                # Extract initSettings if not already present
         
     | 
| 116 | 
         
            +
                if "n_threads" not in filtered_df.columns:
         
     | 
| 117 | 
         
            +
                    filtered_df["n_threads"] = filtered_df["initSettings"].apply(
         
     | 
| 118 | 
         
            +
                        lambda x: x.get("n_threads")
         
     | 
| 119 | 
         
            +
                    )
         
     | 
| 120 | 
         
            +
                    filtered_df["flash_attn"] = filtered_df["initSettings"].apply(
         
     | 
| 121 | 
         
            +
                        lambda x: x.get("flash_attn")
         
     | 
| 122 | 
         
            +
                    )
         
     | 
| 123 | 
         
            +
                    filtered_df["cache_type_k"] = filtered_df["initSettings"].apply(
         
     | 
| 124 | 
         
            +
                        lambda x: x.get("cache_type_k")
         
     | 
| 125 | 
         
            +
                    )
         
     | 
| 126 | 
         
            +
                    filtered_df["cache_type_v"] = filtered_df["initSettings"].apply(
         
     | 
| 127 | 
         
            +
                        lambda x: x.get("cache_type_v")
         
     | 
| 128 | 
         
            +
                    )
         
     | 
| 129 | 
         
            +
             
     | 
| 130 | 
         
            +
                # Group by device and platform for plotting
         
     | 
| 131 | 
         
            +
                plot_group = (
         
     | 
| 132 | 
         
            +
                    filtered_df.groupby(["Device", "Platform"])
         
     | 
| 133 | 
         
            +
                    .agg(
         
     | 
| 134 | 
         
            +
                        {
         
     | 
| 135 | 
         
            +
                            "Prompt Processing": "mean",
         
     | 
| 136 | 
         
            +
                            "Token Generation": "mean",
         
     | 
| 137 | 
         
            +
                            "Memory Usage (%)": "mean",
         
     | 
| 138 | 
         
            +
                            "Memory Usage (GB)": "mean",
         
     | 
| 139 | 
         
            +
                            "CPU Cores": "first",
         
     | 
| 140 | 
         
            +
                            "Model Size": "first",
         
     | 
| 141 | 
         
            +
                            "PP Value": "first",
         
     | 
| 142 | 
         
            +
                            "TG Value": "first",
         
     | 
| 143 | 
         
            +
                        }
         
     | 
| 144 | 
         
            +
                    )
         
     | 
| 145 | 
         
            +
                    .reset_index()
         
     | 
| 146 | 
         
            +
                )
         
     | 
| 147 | 
         
            +
             
     | 
| 148 | 
         
             
                col1, col2 = st.columns(2)
         
     | 
| 149 | 
         
             
                with col1:
         
     | 
| 150 | 
         
             
                    fig1 = create_performance_plot(
         
     | 
| 151 | 
         
            +
                        plot_group,
         
     | 
| 152 | 
         
             
                        "Prompt Processing",
         
     | 
| 153 | 
         
            +
                        f"Prompt Processing Time (PP: {plot_group['PP Value'].iloc[0]})",
         
     | 
| 154 | 
         
             
                    )
         
     | 
| 155 | 
         
             
                    if fig1:
         
     | 
| 156 | 
         
             
                        st.plotly_chart(fig1, use_container_width=True)
         
     | 
| 157 | 
         | 
| 158 | 
         
             
                with col2:
         
     | 
| 159 | 
         
             
                    fig2 = create_performance_plot(
         
     | 
| 160 | 
         
            +
                        plot_group,
         
     | 
| 161 | 
         
             
                        "Token Generation",
         
     | 
| 162 | 
         
            +
                        f"Token Generation Time (TG: {plot_group['TG Value'].iloc[0]})",
         
     | 
| 163 | 
         
             
                    )
         
     | 
| 164 | 
         
             
                    if fig2:
         
     | 
| 165 | 
         
             
                        st.plotly_chart(fig2, use_container_width=True)
         
     | 
| 166 | 
         | 
| 167 | 
         
            +
             
     | 
| 168 | 
         
            +
            def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
         
     | 
| 169 | 
         
             
                """Render the leaderboard table with grouped and formatted data"""
         
     | 
| 170 | 
         
            +
                if df.empty:
         
     | 
| 171 | 
         
            +
                    st.warning("No data available for the selected filters.")
         
     | 
| 172 | 
         
            +
                    return
         
     | 
| 173 | 
         
            +
             
     | 
| 174 | 
         
            +
                # Apply filters
         
     | 
| 175 | 
         
            +
                filtered_df = filter_dataframe(df, filters)
         
     | 
| 176 | 
         
            +
                if filtered_df.empty:
         
     | 
| 177 | 
         
            +
                    st.warning("No data matches the selected filters.")
         
     | 
| 178 | 
         
            +
                    return
         
     | 
| 179 | 
         
            +
             
     | 
| 180 | 
         
            +
                # Extract settings from benchmark results
         
     | 
| 181 | 
         
            +
                filtered_df["PP Value"] = filtered_df["Benchmark"].apply(
         
     | 
| 182 | 
         
            +
                    lambda x: int(x.split("pp: ")[1].split(",")[0])
         
     | 
| 183 | 
         
            +
                )
         
     | 
| 184 | 
         
            +
                filtered_df["TG Value"] = filtered_df["Benchmark"].apply(
         
     | 
| 185 | 
         
            +
                    lambda x: int(x.split("tg: ")[1].split(")")[0])
         
     | 
| 186 | 
         
             
                )
         
     | 
| 187 | 
         | 
| 188 | 
         
            +
                # Extract initSettings
         
     | 
| 189 | 
         
            +
                filtered_df["n_threads"] = filtered_df["initSettings"].apply(
         
     | 
| 190 | 
         
            +
                    lambda x: x.get("n_threads")
         
     | 
| 191 | 
         
            +
                )
         
     | 
| 192 | 
         
            +
                filtered_df["flash_attn"] = filtered_df["initSettings"].apply(
         
     | 
| 193 | 
         
            +
                    lambda x: x.get("flash_attn")
         
     | 
| 194 | 
         
            +
                )
         
     | 
| 195 | 
         
            +
                filtered_df["cache_type_k"] = filtered_df["initSettings"].apply(
         
     | 
| 196 | 
         
            +
                    lambda x: x.get("cache_type_k")
         
     | 
| 197 | 
         
            +
                )
         
     | 
| 198 | 
         
            +
                filtered_df["cache_type_v"] = filtered_df["initSettings"].apply(
         
     | 
| 199 | 
         
            +
                    lambda x: x.get("cache_type_v")
         
     | 
| 200 | 
         
            +
                )
         
     | 
| 201 | 
         
            +
             
     | 
| 202 | 
         
            +
                # Group by selected columns
         
     | 
| 203 | 
         
            +
                grouping_cols = filters["grouping"]
         
     | 
| 204 | 
         
            +
                if not grouping_cols:
         
     | 
| 205 | 
         
            +
                    grouping_cols = ["Model ID", "Device", "Platform"]  # Default grouping
         
     | 
| 206 | 
         
            +
             
     | 
| 207 | 
         
            +
                agg_dict = {
         
     | 
| 208 | 
         
            +
                    "Prompt Processing": ["mean", "count", "std"],
         
     | 
| 209 | 
         
            +
                    "Token Generation": ["mean", "std"],
         
     | 
| 210 | 
         
            +
                    "Memory Usage (%)": "mean",
         
     | 
| 211 | 
         
            +
                    "Memory Usage (GB)": "mean",
         
     | 
| 212 | 
         
            +
                    "Total Memory (GB)": "first",
         
     | 
| 213 | 
         
            +
                    "CPU Cores": "first",
         
     | 
| 214 | 
         
            +
                    "Model Size": "first",
         
     | 
| 215 | 
         
            +
                    "PP Value": "first",
         
     | 
| 216 | 
         
            +
                    "TG Value": "first",
         
     | 
| 217 | 
         
            +
                    "n_threads": "first",
         
     | 
| 218 | 
         
            +
                    "flash_attn": "first",
         
     | 
| 219 | 
         
            +
                    "cache_type_k": "first",
         
     | 
| 220 | 
         
            +
                    "cache_type_v": "first",
         
     | 
| 221 | 
         
            +
                }
         
     | 
| 222 | 
         
            +
             
     | 
| 223 | 
         
            +
                grouped_df = filtered_df.groupby(grouping_cols).agg(agg_dict).reset_index()
         
     | 
| 224 | 
         
            +
             
     | 
| 225 | 
         
             
                # Flatten column names
         
     | 
| 226 | 
         
             
                grouped_df.columns = [
         
     | 
| 227 | 
         
             
                    col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
         
     | 
| 228 | 
         
             
                ]
         
     | 
| 229 | 
         | 
| 230 | 
         
            +
                # Sort by Model Size, PP Value, and TG time
         
     | 
| 231 | 
         
            +
                grouped_df = grouped_df.sort_values(
         
     | 
| 232 | 
         
            +
                    by=["Model Size (first)", "PP Value (first)", "Token Generation (mean)"],
         
     | 
| 233 | 
         
            +
                    ascending=[False, True, True],
         
     | 
| 234 | 
         
            +
                )
         
     | 
| 235 | 
         
            +
             
     | 
| 236 | 
         
             
                # Round numeric columns
         
     | 
| 237 | 
         
             
                numeric_cols = [
         
     | 
| 238 | 
         
            +
                    col
         
     | 
| 239 | 
         
            +
                    for col in grouped_df.columns
         
     | 
| 240 | 
         
            +
                    if any(x in col for x in ["mean", "std", "Memory", "Size"])
         
     | 
| 
         | 
|
| 241 | 
         
             
                ]
         
     | 
| 242 | 
         
             
                grouped_df[numeric_cols] = grouped_df[numeric_cols].round(2)
         
     | 
| 243 | 
         | 
| 244 | 
         
             
                # Rename columns for display
         
     | 
| 245 | 
         
            +
                column_mapping = {
         
     | 
| 246 | 
         
            +
                    "Prompt Processing (mean)": "PP Avg (ms)",
         
     | 
| 247 | 
         
            +
                    "Prompt Processing (std)": "PP Std",
         
     | 
| 248 | 
         
            +
                    "Prompt Processing (count)": "Runs",
         
     | 
| 249 | 
         
            +
                    "Token Generation (mean)": "TG Avg (ms)",
         
     | 
| 250 | 
         
            +
                    "Token Generation (std)": "TG Std",
         
     | 
| 251 | 
         
            +
                    "Memory Usage (%) (mean)": "Memory Usage (%)",
         
     | 
| 252 | 
         
            +
                    "Memory Usage (GB) (mean)": "Memory Usage (GB)",
         
     | 
| 253 | 
         
            +
                    "PP Value (first)": "PP Value",
         
     | 
| 254 | 
         
            +
                    "TG Value (first)": "TG Value",
         
     | 
| 255 | 
         
            +
                }
         
     | 
| 256 | 
         
            +
                grouped_df = grouped_df.rename(columns=column_mapping)
         
     | 
| 257 | 
         | 
| 258 | 
         
            +
                # Filter visible columns
         
     | 
| 259 | 
         
            +
                visible_cols = filters["visible_columns"]
         
     | 
| 260 | 
         
            +
                if visible_cols:
         
     | 
| 261 | 
         
            +
                    # Map the user-friendly names to actual column names
         
     | 
| 262 | 
         
            +
                    column_name_mapping = {
         
     | 
| 263 | 
         
            +
                        "Device": "Device",
         
     | 
| 264 | 
         
            +
                        "Platform": "Platform",
         
     | 
| 265 | 
         
            +
                        "CPU Cores": "CPU Cores (first)",
         
     | 
| 266 | 
         
            +
                        "Total Memory (GB)": "Total Memory (GB) (first)",
         
     | 
| 267 | 
         
            +
                        "Memory Usage (%)": "Memory Usage (%)",
         
     | 
| 268 | 
         
            +
                        "PP Value": "PP Value",
         
     | 
| 269 | 
         
            +
                        "TG Value": "TG Value",
         
     | 
| 270 | 
         
            +
                        "Prompt Processing": "PP Avg (ms)",
         
     | 
| 271 | 
         
            +
                        "Token Generation": "TG Avg (ms)",
         
     | 
| 272 | 
         
            +
                        "Model": "Model ID",
         
     | 
| 273 | 
         
            +
                        "Model Size": "Model Size (first)",
         
     | 
| 274 | 
         
            +
                        "Model ID": "Model ID",
         
     | 
| 275 | 
         
            +
                        "n_threads": "n_threads (first)",
         
     | 
| 276 | 
         
            +
                        "flash_attn": "flash_attn (first)",
         
     | 
| 277 | 
         
            +
                        "cache_type_k": "cache_type_k (first)",
         
     | 
| 278 | 
         
            +
                        "cache_type_v": "cache_type_v (first)",
         
     | 
| 279 | 
         
            +
                    }
         
     | 
| 280 | 
         
            +
                    display_cols = [
         
     | 
| 281 | 
         
            +
                        column_name_mapping[col]
         
     | 
| 282 | 
         
            +
                        for col in visible_cols
         
     | 
| 283 | 
         
            +
                        if col in column_name_mapping
         
     | 
| 284 | 
         
            +
                    ]
         
     | 
| 285 | 
         
            +
                else:
         
     | 
| 286 | 
         
            +
                    # Default columns if none selected
         
     | 
| 287 | 
         
            +
                    display_cols = [
         
     | 
| 288 | 
         
            +
                        "Device",
         
     | 
| 289 | 
         
            +
                        "Platform",
         
     | 
| 290 | 
         
            +
                        "Model ID",
         
     | 
| 291 | 
         
            +
                        "Model Size (first)",
         
     | 
| 292 | 
         
            +
                        "PP Avg (ms)",
         
     | 
| 293 | 
         
            +
                        "TG Avg (ms)",
         
     | 
| 294 | 
         
            +
                        "Memory Usage (%)",
         
     | 
| 295 | 
         
            +
                    ]
         
     | 
| 296 | 
         | 
| 297 | 
         
             
                # Display the filtered and grouped table
         
     | 
| 298 | 
         
             
                st.dataframe(
         
     | 
| 299 | 
         
            +
                    grouped_df[display_cols],
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 300 | 
         
             
                    use_container_width=True,
         
     | 
| 301 | 
         
             
                    height=400,
         
     | 
| 302 | 
         
            +
                )
         
     | 
    	
        src/services/firebase.py
    CHANGED
    
    | 
         @@ -5,6 +5,7 @@ import pandas as pd 
     | 
|
| 5 | 
         
             
            import streamlit as st
         
     | 
| 6 | 
         
             
            import json
         
     | 
| 7 | 
         | 
| 
         | 
|
| 8 | 
         
             
            def initialize_firebase():
         
     | 
| 9 | 
         
             
                """Initialize Firebase with credentials"""
         
     | 
| 10 | 
         
             
                try:
         
     | 
| 
         @@ -16,17 +17,20 @@ def initialize_firebase(): 
     | 
|
| 16 | 
         
             
                    firebase_admin.initialize_app(cred)
         
     | 
| 17 | 
         
             
                return firestore.client()
         
     | 
| 18 | 
         | 
| 
         | 
|
| 19 | 
         
             
            db = initialize_firebase()
         
     | 
| 20 | 
         | 
| 
         | 
|
| 21 | 
         
             
            def normalize_device_id(device_info: dict) -> str:
         
     | 
| 22 | 
         
             
                """Normalize device identifier for aggregation"""
         
     | 
| 23 | 
         
             
                emulator = "/Emulator" if device_info["isEmulator"] else ""
         
     | 
| 24 | 
         
             
                if device_info["systemName"].lower() == "ios":
         
     | 
| 25 | 
         
             
                    return f"iOS/{device_info['model']}{emulator}"
         
     | 
| 26 | 
         
            -
             
     | 
| 27 | 
         
             
                memory_tier = f"{device_info['totalMemory'] // (1024**3)}GB"
         
     | 
| 28 | 
         
             
                return f"{device_info['brand']}/{device_info['model']}/{memory_tier}{emulator}"
         
     | 
| 29 | 
         | 
| 
         | 
|
| 30 | 
         
             
            def format_params_in_b(params: int) -> float:
         
     | 
| 31 | 
         
             
                """Format number of parameters in billions"""
         
     | 
| 32 | 
         
             
                b_value = params / 1e9
         
     | 
| 
         @@ -37,78 +41,117 @@ def format_params_in_b(params: int) -> float: 
     | 
|
| 37 | 
         
             
                else:
         
     | 
| 38 | 
         
             
                    return round(b_value, 3)
         
     | 
| 39 | 
         | 
| 
         | 
|
| 40 | 
         
             
            def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
         
     | 
| 41 | 
         
             
                """Format submissions for leaderboard display"""
         
     | 
| 42 | 
         
             
                formatted_data = []
         
     | 
| 43 | 
         
            -
             
     | 
| 44 | 
         
             
                for sub in submissions:
         
     | 
| 45 | 
         
             
                    try:
         
     | 
| 46 | 
         
            -
                        benchmark_result = sub.get( 
     | 
| 47 | 
         
            -
                        device_info = sub.get( 
     | 
| 48 | 
         
            -
             
     | 
| 
         | 
|
| 49 | 
         
             
                        if not benchmark_result or not device_info:
         
     | 
| 50 | 
         
             
                            continue
         
     | 
| 51 | 
         
            -
             
     | 
| 52 | 
         
            -
                         
     | 
| 53 | 
         
            -
             
     | 
| 54 | 
         
            -
                             
     | 
| 55 | 
         
            -
             
     | 
| 56 | 
         
            -
             
     | 
| 57 | 
         
            -
             
     | 
| 58 | 
         
            -
                             
     | 
| 59 | 
         
            -
             
     | 
| 60 | 
         
            -
             
     | 
| 61 | 
         
            -
                             
     | 
| 62 | 
         
            -
             
     | 
| 63 | 
         
            -
             
     | 
| 64 | 
         
            -
             
     | 
| 65 | 
         
            -
             
     | 
| 66 | 
         
            -
             
     | 
| 67 | 
         
            -
             
     | 
| 68 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 69 | 
         
             
                    except Exception as e:
         
     | 
| 70 | 
         
             
                        st.warning(f"Error processing submission: {str(e)}")
         
     | 
| 71 | 
         
             
                        continue
         
     | 
| 72 | 
         
            -
             
     | 
| 73 | 
         
             
                return pd.DataFrame(formatted_data)
         
     | 
| 74 | 
         | 
| 
         | 
|
| 75 | 
         
             
            async def fetch_leaderboard_data(
         
     | 
| 76 | 
         
            -
                model_name: Optional[str] = None,
         
     | 
| 77 | 
         
            -
                benchmark_label: Optional[str] = None
         
     | 
| 78 | 
         
             
            ) -> pd.DataFrame:
         
     | 
| 79 | 
         
             
                """Fetch and process leaderboard data from Firestore"""
         
     | 
| 80 | 
         
             
                try:
         
     | 
| 81 | 
         
             
                    # Navigate to the correct collection path: benchmarks/v1/submissions
         
     | 
| 82 | 
         
            -
                    submissions_ref =  
     | 
| 83 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 84 | 
         
             
                    # Get all documents
         
     | 
| 85 | 
         
             
                    docs = submissions_ref.stream()
         
     | 
| 86 | 
         
             
                    all_docs = list(docs)
         
     | 
| 87 | 
         
            -
             
     | 
| 88 | 
         
             
                    if len(all_docs) == 0:
         
     | 
| 89 | 
         
             
                        return pd.DataFrame()
         
     | 
| 90 | 
         
            -
             
     | 
| 91 | 
         
             
                    # Process documents and filter in memory
         
     | 
| 92 | 
         
             
                    submissions = []
         
     | 
| 93 | 
         
            -
             
     | 
| 94 | 
         
             
                    for doc in all_docs:
         
     | 
| 95 | 
         
             
                        data = doc.to_dict()
         
     | 
| 96 | 
         
            -
             
     | 
| 97 | 
         
            -
                        if not data or  
     | 
| 98 | 
         
             
                            continue
         
     | 
| 99 | 
         
            -
             
     | 
| 100 | 
         
            -
                        benchmark_result = data[ 
     | 
| 101 | 
         
            -
             
     | 
| 102 | 
         
             
                        # Apply filters
         
     | 
| 103 | 
         
            -
                        if  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 104 | 
         
             
                            continue
         
     | 
| 105 | 
         
            -
                        if  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 106 | 
         
             
                            continue
         
     | 
| 107 | 
         
            -
             
     | 
| 108 | 
         
             
                        submissions.append(data)
         
     | 
| 109 | 
         
            -
             
     | 
| 110 | 
         
             
                    return format_leaderboard_data(submissions)
         
     | 
| 111 | 
         
            -
             
     | 
| 112 | 
         
             
                except Exception as e:
         
     | 
| 113 | 
         
             
                    st.error(f"Error fetching data from Firestore: {str(e)}")
         
     | 
| 114 | 
         
            -
                    return pd.DataFrame()
         
     | 
| 
         | 
|
| 5 | 
         
             
            import streamlit as st
         
     | 
| 6 | 
         
             
            import json
         
     | 
| 7 | 
         | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
             
            def initialize_firebase():
         
     | 
| 10 | 
         
             
                """Initialize Firebase with credentials"""
         
     | 
| 11 | 
         
             
                try:
         
     | 
| 
         | 
|
| 17 | 
         
             
                    firebase_admin.initialize_app(cred)
         
     | 
| 18 | 
         
             
                return firestore.client()
         
     | 
| 19 | 
         | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
             
            db = initialize_firebase()
         
     | 
| 22 | 
         | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
             
            def normalize_device_id(device_info: dict) -> str:
         
     | 
| 25 | 
         
             
                """Normalize device identifier for aggregation"""
         
     | 
| 26 | 
         
             
                emulator = "/Emulator" if device_info["isEmulator"] else ""
         
     | 
| 27 | 
         
             
                if device_info["systemName"].lower() == "ios":
         
     | 
| 28 | 
         
             
                    return f"iOS/{device_info['model']}{emulator}"
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
             
                memory_tier = f"{device_info['totalMemory'] // (1024**3)}GB"
         
     | 
| 31 | 
         
             
                return f"{device_info['brand']}/{device_info['model']}/{memory_tier}{emulator}"
         
     | 
| 32 | 
         | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
             
            def format_params_in_b(params: int) -> float:
         
     | 
| 35 | 
         
             
                """Format number of parameters in billions"""
         
     | 
| 36 | 
         
             
                b_value = params / 1e9
         
     | 
| 
         | 
|
| 41 | 
         
             
                else:
         
     | 
| 42 | 
         
             
                    return round(b_value, 3)
         
     | 
| 43 | 
         | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
             
            def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
         
     | 
| 46 | 
         
             
                """Format submissions for leaderboard display"""
         
     | 
| 47 | 
         
             
                formatted_data = []
         
     | 
| 48 | 
         
            +
             
     | 
| 49 | 
         
             
                for sub in submissions:
         
     | 
| 50 | 
         
             
                    try:
         
     | 
| 51 | 
         
            +
                        benchmark_result = sub.get("benchmarkResult", {})
         
     | 
| 52 | 
         
            +
                        device_info = sub.get("deviceInfo", {})
         
     | 
| 53 | 
         
            +
             
     | 
| 54 | 
         
            +
                        # Skip if missing required data
         
     | 
| 55 | 
         
             
                        if not benchmark_result or not device_info:
         
     | 
| 56 | 
         
             
                            continue
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
                        # Skip if missing initSettings
         
     | 
| 59 | 
         
            +
                        if "initSettings" not in benchmark_result:
         
     | 
| 60 | 
         
            +
                            continue
         
     | 
| 61 | 
         
            +
             
     | 
| 62 | 
         
            +
                        # Skip emulators
         
     | 
| 63 | 
         
            +
                        if device_info.get("isEmulator", False):
         
     | 
| 64 | 
         
            +
                            continue
         
     | 
| 65 | 
         
            +
             
     | 
| 66 | 
         
            +
                        formatted_data.append(
         
     | 
| 67 | 
         
            +
                            {
         
     | 
| 68 | 
         
            +
                                "Device": device_info.get("model", "Unknown"),
         
     | 
| 69 | 
         
            +
                                "Platform": device_info.get("systemName", "Unknown"),
         
     | 
| 70 | 
         
            +
                                "Benchmark": f"{benchmark_result.get('config', {}).get('label', 'Unknown')} (pp: {benchmark_result.get('config', {}).get('pp', 'N/A')}, tg: {benchmark_result.get('config', {}).get('tg', 'N/A')})",
         
     | 
| 71 | 
         
            +
                                "Model": benchmark_result.get("modelName", "Unknown"),
         
     | 
| 72 | 
         
            +
                                "Model Size": format_params_in_b(
         
     | 
| 73 | 
         
            +
                                    benchmark_result.get("modelNParams", 0)
         
     | 
| 74 | 
         
            +
                                ),
         
     | 
| 75 | 
         
            +
                                "Prompt Processing": round(benchmark_result.get("ppAvg", 0), 2),
         
     | 
| 76 | 
         
            +
                                "Token Generation": round(benchmark_result.get("tgAvg", 0), 2),
         
     | 
| 77 | 
         
            +
                                "Memory Usage (%)": benchmark_result.get("peakMemoryUsage", {}).get(
         
     | 
| 78 | 
         
            +
                                    "percentage"
         
     | 
| 79 | 
         
            +
                                ),
         
     | 
| 80 | 
         
            +
                                "Memory Usage (GB)": (
         
     | 
| 81 | 
         
            +
                                    round(
         
     | 
| 82 | 
         
            +
                                        benchmark_result.get("peakMemoryUsage", {}).get("used", 0)
         
     | 
| 83 | 
         
            +
                                        / (1024**3),
         
     | 
| 84 | 
         
            +
                                        2,
         
     | 
| 85 | 
         
            +
                                    )
         
     | 
| 86 | 
         
            +
                                    if benchmark_result.get("peakMemoryUsage", {}).get("used")
         
     | 
| 87 | 
         
            +
                                    else None
         
     | 
| 88 | 
         
            +
                                ),
         
     | 
| 89 | 
         
            +
                                "Total Memory (GB)": round(
         
     | 
| 90 | 
         
            +
                                    device_info.get("totalMemory", 0) / (1024**3), 2
         
     | 
| 91 | 
         
            +
                                ),
         
     | 
| 92 | 
         
            +
                                "CPU Cores": device_info.get("cpuDetails", {}).get(
         
     | 
| 93 | 
         
            +
                                    "cores", "Unknown"
         
     | 
| 94 | 
         
            +
                                ),
         
     | 
| 95 | 
         
            +
                                "Normalized Device ID": normalize_device_id(device_info),
         
     | 
| 96 | 
         
            +
                                "Timestamp": benchmark_result.get("timestamp", "Unknown"),
         
     | 
| 97 | 
         
            +
                                "Model ID": benchmark_result.get("modelId", "Unknown"),
         
     | 
| 98 | 
         
            +
                                "OID": benchmark_result.get("oid"),
         
     | 
| 99 | 
         
            +
                                "initSettings": benchmark_result.get("initSettings"),
         
     | 
| 100 | 
         
            +
                            }
         
     | 
| 101 | 
         
            +
                        )
         
     | 
| 102 | 
         
             
                    except Exception as e:
         
     | 
| 103 | 
         
             
                        st.warning(f"Error processing submission: {str(e)}")
         
     | 
| 104 | 
         
             
                        continue
         
     | 
| 105 | 
         
            +
             
     | 
| 106 | 
         
             
                return pd.DataFrame(formatted_data)
         
     | 
| 107 | 
         | 
| 108 | 
         
            +
             
     | 
| 109 | 
         
             
            async def fetch_leaderboard_data(
         
     | 
| 110 | 
         
            +
                model_name: Optional[str] = None, benchmark_label: Optional[str] = None
         
     | 
| 
         | 
|
| 111 | 
         
             
            ) -> pd.DataFrame:
         
     | 
| 112 | 
         
             
                """Fetch and process leaderboard data from Firestore"""
         
     | 
| 113 | 
         
             
                try:
         
     | 
| 114 | 
         
             
                    # Navigate to the correct collection path: benchmarks/v1/submissions
         
     | 
| 115 | 
         
            +
                    submissions_ref = (
         
     | 
| 116 | 
         
            +
                        db.collection("benchmarks").document("v1").collection("submissions")
         
     | 
| 117 | 
         
            +
                    )
         
     | 
| 118 | 
         
            +
             
     | 
| 119 | 
         
             
                    # Get all documents
         
     | 
| 120 | 
         
             
                    docs = submissions_ref.stream()
         
     | 
| 121 | 
         
             
                    all_docs = list(docs)
         
     | 
| 122 | 
         
            +
             
     | 
| 123 | 
         
             
                    if len(all_docs) == 0:
         
     | 
| 124 | 
         
             
                        return pd.DataFrame()
         
     | 
| 125 | 
         
            +
             
     | 
| 126 | 
         
             
                    # Process documents and filter in memory
         
     | 
| 127 | 
         
             
                    submissions = []
         
     | 
| 128 | 
         
            +
             
     | 
| 129 | 
         
             
                    for doc in all_docs:
         
     | 
| 130 | 
         
             
                        data = doc.to_dict()
         
     | 
| 131 | 
         
            +
             
     | 
| 132 | 
         
            +
                        if not data or "benchmarkResult" not in data:
         
     | 
| 133 | 
         
             
                            continue
         
     | 
| 134 | 
         
            +
             
     | 
| 135 | 
         
            +
                        benchmark_result = data["benchmarkResult"]
         
     | 
| 136 | 
         
            +
             
     | 
| 137 | 
         
             
                        # Apply filters
         
     | 
| 138 | 
         
            +
                        if (
         
     | 
| 139 | 
         
            +
                            model_name
         
     | 
| 140 | 
         
            +
                            and model_name != "All"
         
     | 
| 141 | 
         
            +
                            and benchmark_result.get("modelName") != model_name
         
     | 
| 142 | 
         
            +
                        ):
         
     | 
| 143 | 
         
             
                            continue
         
     | 
| 144 | 
         
            +
                        if (
         
     | 
| 145 | 
         
            +
                            benchmark_label
         
     | 
| 146 | 
         
            +
                            and benchmark_label != "All"
         
     | 
| 147 | 
         
            +
                            and benchmark_result.get("config", {}).get("label") != benchmark_label
         
     | 
| 148 | 
         
            +
                        ):
         
     | 
| 149 | 
         
             
                            continue
         
     | 
| 150 | 
         
            +
             
     | 
| 151 | 
         
             
                        submissions.append(data)
         
     | 
| 152 | 
         
            +
             
     | 
| 153 | 
         
             
                    return format_leaderboard_data(submissions)
         
     | 
| 154 | 
         
            +
             
     | 
| 155 | 
         
             
                except Exception as e:
         
     | 
| 156 | 
         
             
                    st.error(f"Error fetching data from Firestore: {str(e)}")
         
     | 
| 157 | 
         
            +
                    return pd.DataFrame()
         
     |