Spaces:
Runtime error
Runtime error
| """ | |
| Dashboard to visualize the progress of the SomosNLP project. | |
| by Argilla. | |
| This dashboard shows the progress of the SomosNLP project, including the number of annotated and pending records, the top annotators, and the remaining records to be annotated. | |
| The data is fetched from the source datasets and updated every 5 minutes. | |
| Due to Gradio's limitation on what can be passed as input to their graph methods, the data is fetched outside of the graph methods and stored in global variables. Therefore, | |
| a function for each graph-dataset tuple is needed. Moreover, to also avoid circular imports, all the functions must be | |
| in the same Python file. This behavior is not ideal, and could be improved knowing how to pass input parameter to graph functions in Gradio. | |
| """ | |
| import datetime | |
| import os | |
| from typing import Dict, List, Tuple | |
| from uuid import UUID | |
| import altair as alt | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| import argilla as rg | |
| from argilla.feedback import FeedbackDataset | |
| from argilla.client.feedback.dataset.remote.dataset import RemoteFeedbackDataset | |
| import gradio as gr | |
| import pandas as pd | |
| def get_source_datasets() -> Tuple[ | |
| FeedbackDataset | RemoteFeedbackDataset, | |
| FeedbackDataset | RemoteFeedbackDataset, | |
| FeedbackDataset | RemoteFeedbackDataset, | |
| ]: | |
| """ | |
| This function returns the source datasets to be showed in the visualization. The datasets names | |
| and the workspace name is obtained from the environment variables. | |
| Returns: | |
| A tuple with the three source datasets | |
| """ | |
| return ( | |
| rg.FeedbackDataset.from_argilla( | |
| os.getenv("SOURCE_DATASET_1"), workspace=os.getenv("SOURCE_WORKSPACE") | |
| ), | |
| rg.FeedbackDataset.from_argilla( | |
| os.getenv("SOURCE_DATASET_2"), workspace=os.getenv("SOURCE_WORKSPACE") | |
| ), | |
| rg.FeedbackDataset.from_argilla( | |
| os.getenv("SOURCE_DATASET_3"), workspace=os.getenv("SOURCE_WORKSPACE") | |
| ), | |
| ) | |
| def get_user_annotations_dictionary( | |
| datasets: List[FeedbackDataset | RemoteFeedbackDataset], | |
| ) -> Dict[str, int]: | |
| """ | |
| This function returns a dictionary with the username as the key and the number of annotations as the value. | |
| All annotationsfrom all datasets are introduced in the same dictionary. | |
| Args: | |
| datasets: A list with the datasets to be used to obtain the annotations and the annotators. | |
| Returns: | |
| A dictionary with the username as the key and the number of annotations as the value. | |
| """ | |
| output = {} | |
| for dataset in datasets: | |
| for record in dataset: | |
| for response in record.responses: | |
| if str(response.user_id) not in output.keys(): | |
| output[str(response.user_id)] = 1 | |
| else: | |
| output[str(response.user_id)] += 1 | |
| # Changing the name of the keys, from the id to the username | |
| for key in list(output.keys()): | |
| output[rg.User.from_id(UUID(key)).username] = output.pop(key) | |
| return output | |
| def donut_chart_1() -> alt.Chart: | |
| """ | |
| This function returns a donut chart with the number of annotated and pending records, for the first dataset | |
| Returns: | |
| An altair chart with the donut chart. | |
| """ | |
| annotated_records = len(dataset1.filter_by(response_status=["submitted"])) | |
| pending_records = len(dataset1) - annotated_records | |
| source = pd.DataFrame( | |
| { | |
| "values": [annotated_records, pending_records], | |
| "category": ["Annotated", "Pending"], # Add a new column for categories | |
| } | |
| ) | |
| base = alt.Chart(source).encode( | |
| theta=alt.Theta("values:Q", stack=True), | |
| radius=alt.Radius( | |
| "values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20) | |
| ), | |
| color=alt.Color("category:N", legend=alt.Legend(title="Category")), | |
| ) | |
| c1 = base.mark_arc(innerRadius=20, stroke="#fff") | |
| c2 = base.mark_text(radiusOffset=10).encode(text="values:Q") | |
| chart = c1 + c2 | |
| return chart | |
| def donut_chart_2() -> alt.Chart: | |
| """ | |
| This function returns a donut chart with the number of annotated and pending records, for the second dataset. | |
| Returns: | |
| An altair chart with the donut chart. | |
| """ | |
| annotated_records = len(dataset2.filter_by(response_status=["submitted"])) | |
| pending_records = len(dataset2) - annotated_records | |
| source = pd.DataFrame( | |
| { | |
| "values": [annotated_records, pending_records], | |
| "category": ["Annotated", "Pending"], # Add a new column for categories | |
| } | |
| ) | |
| base = alt.Chart(source).encode( | |
| theta=alt.Theta("values:Q", stack=True), | |
| radius=alt.Radius( | |
| "values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20) | |
| ), | |
| color=alt.Color("category:N", legend=alt.Legend(title="Category")), | |
| ) | |
| c1 = base.mark_arc(innerRadius=20, stroke="#fff") | |
| c2 = base.mark_text(radiusOffset=10).encode(text="values:Q") | |
| chart = c1 + c2 | |
| return chart | |
| def donut_chart_3() -> alt.Chart: | |
| """ | |
| This function returns a donut chart with the number of annotated and pending records, for the third dataset. | |
| Returns: | |
| An altair chart with the donut chart. | |
| """ | |
| annotated_records = len(dataset3.filter_by(response_status=["submitted"])) | |
| pending_records = len(dataset3) - annotated_records | |
| source = pd.DataFrame( | |
| { | |
| "values": [annotated_records, pending_records], | |
| "category": ["Annotated", "Pending"], # Add a new column for categories | |
| } | |
| ) | |
| base = alt.Chart(source).encode( | |
| theta=alt.Theta("values:Q", stack=True), | |
| radius=alt.Radius( | |
| "values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20) | |
| ), | |
| color=alt.Color("category:N", legend=alt.Legend(title="Category")), | |
| ) | |
| c1 = base.mark_arc(innerRadius=20, stroke="#fff") | |
| c2 = base.mark_text(radiusOffset=10).encode(text="values:Q") | |
| chart = c1 + c2 | |
| return chart | |
| def kpi_chart_submitted_1() -> alt.Chart: | |
| """ | |
| This function returns a KPI chart with the total amount of records that have been annotated, for the first dataset. | |
| Returns: | |
| An altair chart with the KPI chart. | |
| """ | |
| total = len(dataset1.filter_by(response_status=["submitted"])) | |
| # Assuming you have a DataFrame with user data, create a sample DataFrame | |
| data = pd.DataFrame({"Category": ["Total completed"], "Value": [total]}) | |
| # Create Altair chart | |
| chart = ( | |
| alt.Chart(data) | |
| .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue") | |
| .encode(text="Value:N") | |
| .properties(title="Completados", width=250, height=200) | |
| ) | |
| return chart | |
| def kpi_chart_submitted_2() -> alt.Chart: | |
| """ | |
| This function returns a KPI chart with the total amount of records that have been annotated, for the second dataset. | |
| Returns: | |
| An altair chart with the KPI chart. | |
| """ | |
| total = len(dataset2.filter_by(response_status=["submitted"])) | |
| # Assuming you have a DataFrame with user data, create a sample DataFrame | |
| data = pd.DataFrame({"Category": ["Total completed"], "Value": [total]}) | |
| # Create Altair chart | |
| chart = ( | |
| alt.Chart(data) | |
| .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue") | |
| .encode(text="Value:N") | |
| .properties(title="Completados", width=250, height=200) | |
| ) | |
| return chart | |
| def kpi_chart_submitted_3() -> alt.Chart: | |
| """ | |
| This function returns a KPI chart with the total amount of records that have been annotated, for the third dataset. | |
| Returns: | |
| An altair chart with the KPI chart. | |
| """ | |
| total = len(dataset3.filter_by(response_status=["submitted"])) | |
| # Assuming you have a DataFrame with user data, create a sample DataFrame | |
| data = pd.DataFrame({"Category": ["Total completed"], "Value": [total]}) | |
| # Create Altair chart | |
| chart = ( | |
| alt.Chart(data) | |
| .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue") | |
| .encode(text="Value:N") | |
| .properties(title="Completados", width=250, height=200) | |
| ) | |
| return chart | |
| def kpi_chart_remaining_1() -> alt.Chart: | |
| """ | |
| This function returns a KPI chart with the remaining amount of records to be annotated, for the first dataset. | |
| Returns: | |
| An altair chart with the KPI chart. | |
| """ | |
| annotated_records = len(dataset1.filter_by(response_status=["submitted"])) | |
| pending_records = len(dataset1) - annotated_records | |
| # Assuming you have a DataFrame with user data, create a sample DataFrame | |
| data = pd.DataFrame({"Category": ["Total remaining"], "Value": [pending_records]}) | |
| # Create Altair chart | |
| chart = ( | |
| alt.Chart(data) | |
| .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue") | |
| .encode(text="Value:N") | |
| .properties(title="Restantes", width=250, height=200) | |
| ) | |
| return chart | |
| def kpi_chart_remaining_2() -> alt.Chart: | |
| """ | |
| This function returns a KPI chart with the remaining amount of records to be annotated, for the second dataset. | |
| Returns: | |
| An altair chart with the KPI chart. | |
| """ | |
| annotated_records = len(dataset2.filter_by(response_status=["submitted"])) | |
| pending_records = len(dataset2) - annotated_records | |
| # Assuming you have a DataFrame with user data, create a sample DataFrame | |
| data = pd.DataFrame({"Category": ["Total remaining"], "Value": [pending_records]}) | |
| # Create Altair chart | |
| chart = ( | |
| alt.Chart(data) | |
| .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue") | |
| .encode(text="Value:N") | |
| .properties(title="Restantes", width=250, height=200) | |
| ) | |
| return chart | |
| def kpi_chart_remaining_3() -> alt.Chart: | |
| """ | |
| This function returns a KPI chart with the remaining amount of records to be annotated, for the third dataset. | |
| Returns: | |
| An altair chart with the KPI chart. | |
| """ | |
| annotated_records = len(dataset3.filter_by(response_status=["submitted"])) | |
| pending_records = len(dataset3) - annotated_records | |
| # Assuming you have a DataFrame with user data, create a sample DataFrame | |
| data = pd.DataFrame({"Category": ["Total remaining"], "Value": [pending_records]}) | |
| # Create Altair chart | |
| chart = ( | |
| alt.Chart(data) | |
| .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue") | |
| .encode(text="Value:N") | |
| .properties(title="Restantes", width=250, height=200) | |
| ) | |
| return chart | |
| def render_hub_user_link(hub_id: str) -> str: | |
| """ | |
| This function formats the username with a link to the user's profile in the Hugging Face Hub. | |
| Args: | |
| hub_id: The user's id in the Hugging Face Hub. | |
| Returns: | |
| A string with the username formatted as a link to the user's profile in the Hugging Face Hub. | |
| """ | |
| link = f"https://huggingface.co/{hub_id}" | |
| return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{hub_id}</a>' | |
| def kpi_chart_annotators() -> alt.Chart: | |
| """ | |
| This function returns a KPI chart with the total amount of annotators. | |
| Returns: | |
| An altair chart with the KPI chart. | |
| """ | |
| # Obtain the total amount of annotators | |
| total_annotators = len(user_ids_annotations) | |
| # Assuming you have a DataFrame with user data, create a sample DataFrame | |
| data = pd.DataFrame( | |
| {"Category": ["Total Contributors"], "Value": [total_annotators]} | |
| ) | |
| # Create Altair chart | |
| chart = ( | |
| alt.Chart(data) | |
| .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue") | |
| .encode(text="Value:N") | |
| .properties(title="Contribuidores Totales", width=250, height=200) | |
| ) | |
| return chart | |
| def obtain_top_users(user_ids_annotations: Dict[str, int]) -> pd.DataFrame: | |
| """ | |
| This function returns the top 50 users with the most annotations. The usernames are formatted as links to the user's profile in the Hugging Face Hub. | |
| Args: | |
| user_ids_annotations: A dictionary with the user ids as the key and the number of annotations as the value. | |
| Returns: | |
| A pandas dataframe with the top 5 users with the most annotations. | |
| """ | |
| dataframe = pd.DataFrame( | |
| user_ids_annotations.items(), columns=["Name", "Submitted Responses"] | |
| ) | |
| dataframe["Name"] = dataframe["Name"].apply(render_hub_user_link) | |
| dataframe = dataframe.sort_values(by="Submitted Responses", ascending=False) | |
| # Renaming the df columns to Spanish | |
| dataframe.columns = ["Nombre", "Respuestas Enviadas"] | |
| return dataframe.head(50) | |
| def get_top() -> pd.DataFrame: | |
| """ | |
| This function returns the top users with the most annotations. The usernames are formatted as links to the user's profile in the Hugging Face Hub. | |
| Returns: | |
| A pandas dataframe with the top users with the most annotations. | |
| """ | |
| return obtain_top_users(user_ids_annotations) | |
| def fetch_data() -> None: | |
| """ | |
| This function fetches the data from the source datasets and updates the global variables. | |
| """ | |
| print(f"Starting to fetch data: {datetime.datetime.now()}") | |
| # Load the dataset as global variable to be able to use it in all Gradio graph methods, | |
| # as they usually do not allow arguments. | |
| global dataset1, dataset2, dataset3, user_ids_annotations | |
| dataset1, dataset2, dataset3 = get_source_datasets() | |
| user_ids_annotations = get_user_annotations_dictionary( | |
| [dataset1, dataset2, dataset3] | |
| ) | |
| # Print the current date and time | |
| print(f"Data fetched: {datetime.datetime.now()}") | |
| def main() -> None: | |
| # Set the update interval | |
| update_interval = 300 # seconds | |
| update_interval_charts = 30 # seconds | |
| # Connect to the space with rg.init() | |
| rg.init( | |
| api_url=os.getenv("ARGILLA_API_URL"), | |
| api_key=os.getenv("ARGILLA_API_KEY"), | |
| extra_headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}, | |
| ) | |
| # Initial data fetching | |
| fetch_data() | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job( | |
| func=fetch_data, trigger="interval", seconds=update_interval, max_instances=1 | |
| ) | |
| scheduler.start() | |
| # To avoid the orange border for the Gradio elements that are in constant loading | |
| css = """ | |
| .generating { | |
| border: none; | |
| } | |
| """ | |
| with gr.Blocks(css=css, title="LLM Benchmark en EspaΓ±ol Dashboard") as demo: | |
| # JSS code to force light theme | |
| demo.load( | |
| None, | |
| None, | |
| js=""" | |
| () => { | |
| const params = new URLSearchParams(window.location.search); | |
| if (!params.has('__theme')) { | |
| params.set('__theme', 'light'); | |
| window.location.search = params.toString(); | |
| } | |
| }""", | |
| ) | |
| gr.Markdown( | |
| """ | |
| # π£οΈ SomosNLP LLM Benchmark en EspaΓ±ol Dashboard | |
| """ | |
| ) | |
| gr.Markdown( | |
| f""" | |
| ## π Progreso del dataset {os.getenv("SOURCE_DATASET_1")} | |
| """ | |
| ) | |
| with gr.Row(): | |
| plot = gr.Plot(label="Plot") | |
| demo.load( | |
| kpi_chart_submitted_1, | |
| inputs=[], | |
| outputs=[plot], | |
| every=update_interval_charts, | |
| ) | |
| plot = gr.Plot(label="Plot") | |
| demo.load( | |
| kpi_chart_remaining_1, | |
| inputs=[], | |
| outputs=[plot], | |
| every=update_interval_charts, | |
| ) | |
| # donut_chart_plotted_1 = gr.Plot(label="Plot") | |
| # demo.load( | |
| # donut_chart_1, | |
| # inputs=[], | |
| # outputs=[donut_chart_plotted_1], | |
| # ) | |
| gr.Markdown( | |
| f""" | |
| ## π Progreso del dataset {os.getenv("SOURCE_DATASET_2")} | |
| """ | |
| ) | |
| with gr.Row(): | |
| plot = gr.Plot(label="Plot") | |
| demo.load( | |
| kpi_chart_submitted_2, | |
| inputs=[], | |
| outputs=[plot], | |
| every=update_interval_charts, | |
| ) | |
| plot = gr.Plot(label="Plot") | |
| demo.load( | |
| kpi_chart_remaining_2, | |
| inputs=[], | |
| outputs=[plot], | |
| every=update_interval_charts, | |
| ) | |
| # donut_chart_plotted_2 = gr.Plot(label="Plot") | |
| # demo.load( | |
| # donut_chart_2, | |
| # inputs=[], | |
| # outputs=[donut_chart_plotted_2], | |
| # ) | |
| gr.Markdown( | |
| f""" | |
| ## π Progreso del dataset {os.getenv("SOURCE_DATASET_3")} | |
| """ | |
| ) | |
| with gr.Row(): | |
| plot = gr.Plot(label="Plot") | |
| demo.load( | |
| kpi_chart_submitted_3, | |
| inputs=[], | |
| outputs=[plot], | |
| every=update_interval_charts, | |
| ) | |
| plot = gr.Plot(label="Plot") | |
| demo.load( | |
| kpi_chart_remaining_3, | |
| inputs=[], | |
| outputs=[plot], | |
| every=update_interval_charts, | |
| ) | |
| # donut_chart_plotted_3 = gr.Plot(label="Plot") | |
| # demo.load( | |
| # donut_chart_3, | |
| # inputs=[], | |
| # outputs=[donut_chart_plotted_3], | |
| # ) | |
| gr.Markdown( | |
| """ | |
| ## πΎ Hall de la Fama | |
| AquΓ puedes ver el nΓΊmero de contribuidores y los contribuidores con mΓ‘s contribuciones: | |
| """ | |
| ) | |
| with gr.Row(): | |
| plot2 = gr.Plot(label="Plot") | |
| demo.load( | |
| kpi_chart_annotators, | |
| inputs=[], | |
| outputs=[plot2], | |
| every=update_interval_charts, | |
| ) | |
| top_df_plot = gr.Dataframe( | |
| headers=["Name", "Submitted Responses"], | |
| datatype=[ | |
| "markdown", | |
| "number", | |
| ], | |
| row_count=50, | |
| col_count=(2, "fixed"), | |
| interactive=False, | |
| ) | |
| demo.load(get_top, None, [top_df_plot], every=update_interval_charts) | |
| # Launch the Gradio interface | |
| demo.launch(share=True, debug=True) | |
| if __name__ == "__main__": | |
| main() | |