File size: 3,452 Bytes
5bf8054
c310cb4
df2d876
c310cb4
 
a25ae1b
c310cb4
a25ae1b
df2d876
 
 
 
a25ae1b
c310cb4
f75874f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c310cb4
a25ae1b
c310cb4
 
a25ae1b
 
98eaa14
a25ae1b
98eaa14
84ca7cb
6e89986
98eaa14
ed2e1d6
 
 
df2d876
 
6e89986
 
 
c310cb4
 
 
5bf8054
 
a25ae1b
c0052fc
a25ae1b
f75874f
df2d876
f75874f
 
 
 
 
 
 
 
 
 
 
 
 
 
c310cb4
a25ae1b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr

LAST_UPDATED = "Nov 22th 2024"

####################################
# Datos estáticos del leaderboard
####################################
leaderboard_data = [
    {'name': 'StyleTTS 2', 'STOI': 0.998, 'PESQ': 3.921, 'WER': 0.162, 'UTMOS': 3.47},
    {'name': 'Matxa-TTS', 'STOI': 0.996, 'PESQ': 3.539, 'WER': 0.179, 'UTMOS': 3.50},
    {'name': 'Matxa-TTS-multiaccent', 'STOI': 0.996, 'PESQ': 3.415, 'WER': 0.242, 'UTMOS': 2.98},
    {'name': 'StableTTS', 'STOI': 0.997, 'PESQ': 3.643, 'WER': 0.164, 'UTMOS': 2.62},
]


# Texto para la pestaña de métricas
METRICS_TAB_TEXT = """
## Metrics
Here you will find details about the speech recognition metrics and datasets reported in our leaderboard.
### UTMOS
The **UTMOS** (Utterance Mean Opinion Score) metric evaluates the **quality** of speech synthesis models. A higher UTMOS score indicates better audio quality.

### WER
The **Word Error Rate (WER)** measures the **accuracy** of automatic speech recognition systems. It calculates the percentage of words in the system's output that differ from the reference transcript. Lower WER values indicate higher accuracy.

### STOI
The **Short-Time Objective Intelligibility (STOI)** is a metric used to evaluate the **intelligibility** of synthesized speech. Higher STOI values indicate clearer, more intelligible speech.

### PESQ
The **Perceptual Evaluation of Speech Quality (PESQ)** is a metric used to measure the **quality** of speech signals, considering human perception. Higher PESQ values indicate better speech quality.
"""

####################################
# Functions (static version)
####################################

def get_leaderboard():
    """
    Retorna el leaderboard en orden descendente por PESQ y luego por UTMOS.
    """
    # Ordenar primero por PESQ (calidad del habla) y luego por UTMOS (calidad percibida)
    sorted_leaderboard = sorted(leaderboard_data, key=lambda x: (x['UTMOS']), reverse=True)
    
    # Asignar el rank basado en el orden por PESQ
    for rank, model in enumerate(sorted_leaderboard):
        model['rank'] = rank + 1  # rank es la posición en la lista (1-indexed)
    
    return [[model['rank'], model['name'], model['UTMOS'], model['WER'], model['STOI'], model['PESQ']] for model in sorted_leaderboard]

####################################
# Interfaz con Gradio
####################################

theme = gr.themes.Base(
    font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
)

with gr.Blocks(theme=theme) as demo:
    gr.Markdown("# 🏆 Leaderboard\nVote to help the community determine the best Catalan TTS models.\n")
    

    
with gr.Blocks(theme=theme) as demo:
    gr.Markdown("# 🏆 Leaderboard\nVote to help the community determine the best Catalan TTS models.\n")

    with gr.TabbedInterface() as tabs:
        with gr.Tab("Leaderboard"):
            # Inicializa la tabla con los datos directamente al iniciar
            leaderboard_table = gr.DataFrame(
                headers=["Rank", "Model", "UTMOS", "WER", "STOI", "PESQ"], 
                datatype=["str", "str", "str", "str", "str", "str"], 
                value=get_leaderboard()  # Carga los datos iniciales de la tabla
            )
        
        with gr.Tab("Metrics"):
            gr.Markdown(METRICS_TAB_TEXT)

# Lanzar la aplicación
demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)