Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| from huggingface_hub import list_models | |
| import plotly.express as px | |
| CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
| CITATION_BUTTON_TEXT = r"""@misc{energystarai-leaderboard, | |
| author = {Sasha Luccioni and Boris Gamazaychikov and Emma Strubell and Sara Hooker and Yacine Jernite and Carole-Jean Wu and Margaret Mitchell}, | |
| title = {Energy Star AI Leaderboard v.0}, | |
| year = {2024}, | |
| publisher = {Hugging Face}, | |
| howpublished = "\url{https://huggingface.co/spaces/EnergyStarAI/2024_Leaderboard}", | |
| } | |
| """ | |
| tasks = ['asr.csv', 'object_detection.csv', 'text_classification.csv', 'image_captioning.csv', | |
| 'question_answering.csv', 'text_generation.csv', 'image_classification.csv', | |
| 'sentence_similarity.csv', 'image_generation.csv', 'summarization.csv'] | |
| def get_plots(task): | |
| #TO DO : hover text with energy efficiency number, parameters | |
| task_df= pd.read_csv('data/energy/'+task) | |
| params_df = pd.read_csv('data/params/'+task) | |
| params_df= params_df.rename(columns={"Link": "model"}) | |
| all_df = pd.merge(task_df, params_df, on='model') | |
| all_df['Total GPU Energy (Wh)'] = all_df['total_gpu_energy']*1000 | |
| all_df = all_df.sort_values(by=['Total GPU Energy (Wh)']) | |
| all_df['parameters'] = all_df['parameters'].apply(format_params) | |
| all_df['energy_star'] = pd.cut(all_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"]) | |
| fig = px.scatter(all_df, x="model", y='Total GPU Energy (Wh)', custom_data=['parameters'], height= 500, width= 800, color = 'energy_star', color_discrete_map={"⭐": 'red', "⭐⭐": "orange", "⭐⭐⭐": "green"}) | |
| fig.update_traces( | |
| hovertemplate="<br>".join([ | |
| "Total Energy: %{y}", | |
| "Parameters: %{customdata[0]}"]) | |
| ) | |
| return fig | |
| def get_all_plots(): | |
| all_df = pd.DataFrame(columns= ['model', 'parameters', 'total_gpu_energy']) | |
| for task in tasks: | |
| task_df= pd.read_csv('data/energy/'+task) | |
| params_df = pd.read_csv('data/params/'+task) | |
| params_df= params_df.rename(columns={"Link": "model"}) | |
| tasks_df = pd.merge(task_df, params_df, on='model') | |
| tasks_df= tasks_df[['model', 'parameters', 'total_gpu_energy']] | |
| tasks_df['Total GPU Energy (Wh)'] = tasks_df['total_gpu_energy']*1000 | |
| tasks_df['energy_star'] = pd.cut(tasks_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"]) | |
| all_df = pd.concat([all_df, tasks_df]) | |
| all_df = all_df.sort_values(by=['Total GPU Energy (Wh)']) | |
| all_df['parameters'] = all_df['parameters'].apply(format_params) | |
| fig = px.scatter(all_df, x="model", y='Total GPU Energy (Wh)', custom_data=['parameters'], height= 500, width= 800, color = 'energy_star', color_discrete_map={"⭐": 'red', "⭐⭐": "orange", "⭐⭐⭐": "green"}) | |
| fig.update_traces( | |
| hovertemplate="<br>".join([ | |
| "Total Energy: %{y}", | |
| "Parameters: %{customdata[0]}"]) | |
| ) | |
| return fig | |
| def make_link(mname): | |
| link = "["+ str(mname).split('/')[1] +'](https://huggingface.co/'+str(mname)+")" | |
| return link | |
| def get_model_names(task): | |
| task_df= pd.read_csv('data/params/'+task) | |
| energy_df= pd.read_csv('data/energy/'+task) | |
| task_df= task_df.rename(columns={"Link": "model"}) | |
| all_df = pd.merge(task_df, energy_df, on='model') | |
| all_df=all_df.drop_duplicates(subset=['model']) | |
| all_df['Parameters'] = all_df['parameters'].apply(format_params) | |
| all_df['Model'] = all_df['model'].apply(make_link) | |
| all_df['Total GPU Energy (Wh)'] = all_df['total_gpu_energy']*1000 | |
| all_df['Total GPU Energy (Wh)'] = all_df['Total GPU Energy (Wh)'].round(2) | |
| all_df['Rating'] = pd.cut(all_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"]) | |
| all_df= all_df.sort_values('Total GPU Energy (Wh)') | |
| model_names = all_df[['Model','Rating','Total GPU Energy (Wh)', 'Parameters']] | |
| return model_names | |
| def get_all_model_names(): | |
| #TODO: add link to results in model card of each model | |
| all_df = pd.DataFrame(columns = ['model', 'parameters', 'total_gpu_energy']) | |
| for task in tasks: | |
| task_df= pd.read_csv('data/params/'+task) | |
| energy_df= pd.read_csv('data/energy/'+task) | |
| task_df= task_df.rename(columns={"Link": "model"}) | |
| tasks_df = pd.merge(task_df, energy_df, on='model') | |
| tasks_df= tasks_df[['model', 'parameters', 'total_gpu_energy']] | |
| tasks_df['Total GPU Energy (Wh)'] = tasks_df['total_gpu_energy']*1000 | |
| tasks_df['Total GPU Energy (Wh)'] = tasks_df['Total GPU Energy (Wh)'].round(2) | |
| tasks_df['Rating'] = pd.cut(tasks_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"]) | |
| all_df = pd.concat([all_df, tasks_df]) | |
| all_df=all_df.drop_duplicates(subset=['model']) | |
| all_df['Parameters'] = all_df['parameters'].apply(format_params) | |
| all_df['Model'] = all_df['model'].apply(make_link) | |
| all_df= all_df.sort_values('Total GPU Energy (Wh)') | |
| model_names = all_df[['Model','Rating','Total GPU Energy (Wh)', 'Parameters']] | |
| return model_names | |
| def format_params(num): | |
| if num > 1000000000: | |
| if not num % 1000000000: | |
| return f'{num // 1000000000}B' | |
| return f'{round(num / 1000000000, 1)}B' | |
| return f'{num // 1000000}M' | |
| demo = gr.Blocks() | |
| with demo: | |
| gr.Markdown( | |
| """# Energy Star Leaderboard - v.0 (2024) 🌎 💻 🌟 | |
| ### Welcome to the leaderboard for the [AI Energy Star Project!](https://huggingface.co/EnergyStarAI) | |
| Click through the tasks below to see how different models measure up in terms of energy efficiency""" | |
| ) | |
| gr.Markdown( | |
| """Test your own models via the [submission portal (TODO)].""" | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("Text Generation 💬"): | |
| with gr.Row(): | |
| with gr.Column(scale=1.3): | |
| plot = gr.Plot(get_plots('text_generation.csv')) | |
| with gr.Column(scale=1): | |
| table = gr.Dataframe(get_model_names('text_generation.csv'), datatype="markdown") | |
| with gr.TabItem("Image Generation 📷"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| plot = gr.Plot(get_plots('image_generation.csv')) | |
| with gr.Column(): | |
| table = gr.Dataframe(get_model_names('image_generation.csv'), datatype="markdown") | |
| with gr.TabItem("Text Classification 🎭"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| plot = gr.Plot(get_plots('text_classification.csv')) | |
| with gr.Column(): | |
| table = gr.Dataframe(get_model_names('text_classification.csv'), datatype="markdown") | |
| with gr.TabItem("Image Classification 🖼️"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| plot = gr.Plot(get_plots('image_classification.csv')) | |
| with gr.Column(): | |
| table = gr.Dataframe(get_model_names('image_classification.csv'), datatype="markdown") | |
| with gr.TabItem("Image Captioning 📝"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| plot = gr.Plot(get_plots('image_captioning.csv')) | |
| with gr.Column(): | |
| table = gr.Dataframe(get_model_names('image_captioning.csv'), datatype="markdown") | |
| with gr.TabItem("Summarization 📃"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| plot = gr.Plot(get_plots('summarization.csv')) | |
| with gr.Column(): | |
| table = gr.Dataframe(get_model_names('summarization.csv'), datatype="markdown") | |
| with gr.TabItem("Automatic Speech Recognition 💬 "): | |
| with gr.Row(): | |
| with gr.Column(): | |
| plot = gr.Plot(get_plots('asr.csv')) | |
| with gr.Column(): | |
| table = gr.Dataframe(get_model_names('asr.csv'), datatype="markdown") | |
| with gr.TabItem("Object Detection 🚘"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| plot = gr.Plot(get_plots('object_detection.csv')) | |
| with gr.Column(): | |
| table = gr.Dataframe(get_model_names('object_detection.csv'), datatype="markdown") | |
| with gr.TabItem("Sentence Similarity 📚"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| plot = gr.Plot(get_plots('sentence_similarity.csv')) | |
| with gr.Column(): | |
| table = gr.Dataframe(get_model_names('sentence_similarity.csv'), datatype="markdown") | |
| with gr.TabItem("Extractive QA ❔"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| plot = gr.Plot(get_plots('question_answering.csv')) | |
| with gr.Column(): | |
| table = gr.Dataframe(get_model_names('question_answering.csv'), datatype="markdown") | |
| with gr.TabItem("All Tasks 💡"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| plot = gr.Plot(get_all_plots) | |
| with gr.Column(): | |
| table = gr.Dataframe(get_all_model_names, datatype="markdown") | |
| with gr.Accordion("Methodology", open = False): | |
| gr.Markdown( | |
| """For each of the ten tasks above, we created a custom dataset with 1,000 entries (see all of the datasets on our [org Hub page](https://huggingface.co/EnergyStarAI)). | |
| We then tested each of the models from the leaderboard on the appropriate task on Nvidia H100 GPUs, measuring the energy consumed using [Code Carbon](https://mlco2.github.io/codecarbon/), an open-source Python package for tracking the environmental impacts of code. | |
| We developed and used a [Docker container](https://github.com/huggingface/EnergyStarAI/) to maximize the reproducibility of results, and to enable members of the community to benchmark internal models. | |
| Reach out to us if you want to collaborate! | |
| """) | |
| with gr.Accordion("📙 Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| lines=20, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| gr.Markdown( | |
| """Last updated: September 20th, 2024 by [Sasha Luccioni](https://huggingface.co/sasha)""") | |
| demo.launch() | |