Spaces:
Build error
Build error
File size: 3,452 Bytes
5bf8054 c310cb4 df2d876 c310cb4 a25ae1b c310cb4 a25ae1b df2d876 a25ae1b c310cb4 f75874f c310cb4 a25ae1b c310cb4 a25ae1b 98eaa14 a25ae1b 98eaa14 84ca7cb 6e89986 98eaa14 ed2e1d6 df2d876 6e89986 c310cb4 5bf8054 a25ae1b c0052fc a25ae1b f75874f df2d876 f75874f c310cb4 a25ae1b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
LAST_UPDATED = "Nov 22th 2024"
####################################
# Datos estáticos del leaderboard
####################################
leaderboard_data = [
{'name': 'StyleTTS 2', 'STOI': 0.998, 'PESQ': 3.921, 'WER': 0.162, 'UTMOS': 3.47},
{'name': 'Matxa-TTS', 'STOI': 0.996, 'PESQ': 3.539, 'WER': 0.179, 'UTMOS': 3.50},
{'name': 'Matxa-TTS-multiaccent', 'STOI': 0.996, 'PESQ': 3.415, 'WER': 0.242, 'UTMOS': 2.98},
{'name': 'StableTTS', 'STOI': 0.997, 'PESQ': 3.643, 'WER': 0.164, 'UTMOS': 2.62},
]
# Texto para la pestaña de métricas
METRICS_TAB_TEXT = """
## Metrics
Here you will find details about the speech recognition metrics and datasets reported in our leaderboard.
### UTMOS
The **UTMOS** (Utterance Mean Opinion Score) metric evaluates the **quality** of speech synthesis models. A higher UTMOS score indicates better audio quality.
### WER
The **Word Error Rate (WER)** measures the **accuracy** of automatic speech recognition systems. It calculates the percentage of words in the system's output that differ from the reference transcript. Lower WER values indicate higher accuracy.
### STOI
The **Short-Time Objective Intelligibility (STOI)** is a metric used to evaluate the **intelligibility** of synthesized speech. Higher STOI values indicate clearer, more intelligible speech.
### PESQ
The **Perceptual Evaluation of Speech Quality (PESQ)** is a metric used to measure the **quality** of speech signals, considering human perception. Higher PESQ values indicate better speech quality.
"""
####################################
# Functions (static version)
####################################
def get_leaderboard():
"""
Retorna el leaderboard en orden descendente por PESQ y luego por UTMOS.
"""
# Ordenar primero por PESQ (calidad del habla) y luego por UTMOS (calidad percibida)
sorted_leaderboard = sorted(leaderboard_data, key=lambda x: (x['UTMOS']), reverse=True)
# Asignar el rank basado en el orden por PESQ
for rank, model in enumerate(sorted_leaderboard):
model['rank'] = rank + 1 # rank es la posición en la lista (1-indexed)
return [[model['rank'], model['name'], model['UTMOS'], model['WER'], model['STOI'], model['PESQ']] for model in sorted_leaderboard]
####################################
# Interfaz con Gradio
####################################
theme = gr.themes.Base(
font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
)
with gr.Blocks(theme=theme) as demo:
gr.Markdown("# 🏆 Leaderboard\nVote to help the community determine the best Catalan TTS models.\n")
with gr.Blocks(theme=theme) as demo:
gr.Markdown("# 🏆 Leaderboard\nVote to help the community determine the best Catalan TTS models.\n")
with gr.TabbedInterface() as tabs:
with gr.Tab("Leaderboard"):
# Inicializa la tabla con los datos directamente al iniciar
leaderboard_table = gr.DataFrame(
headers=["Rank", "Model", "UTMOS", "WER", "STOI", "PESQ"],
datatype=["str", "str", "str", "str", "str", "str"],
value=get_leaderboard() # Carga los datos iniciales de la tabla
)
with gr.Tab("Metrics"):
gr.Markdown(METRICS_TAB_TEXT)
# Lanzar la aplicación
demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
|