File size: 5,080 Bytes
8eda1d3
 
b1903b9
7f75ea6
8eda1d3
7f75ea6
8eda1d3
 
 
 
 
 
 
 
7f75ea6
8eda1d3
 
 
 
 
 
 
 
 
 
 
60a5b5b
 
8eda1d3
60a5b5b
 
 
 
 
 
 
8eda1d3
 
60a5b5b
8eda1d3
 
 
 
 
 
 
 
 
 
 
60a5b5b
 
8eda1d3
60a5b5b
8eda1d3
 
 
 
 
 
 
 
 
 
60a5b5b
 
 
 
 
8eda1d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import gradio as gr
import os
from audio_tokenizer import process_dataset
import spaces

@spaces.GPU(duration=120)
def process_dataset_ui(
    original_dataset,
    output_dataset,
    model_type,
    text_field,
    hf_token
):
    """
    Process dataset with Gradio UI using ZeroGPU.

    Args:
        original_dataset: HuggingFace dataset path to process
        output_dataset: Output dataset path on HuggingFace Hub
        model_type: Model type - either "qwen3" or "lfm2"
        text_field: Name of text field in dataset
        hf_token: HuggingFace token for authentication

    Returns:
        Status message
    """
    import traceback

    try:
        print("=== Starting Dataset Processing ===")
        print(f"Original Dataset: {original_dataset}")
        print(f"Output Dataset: {output_dataset}")
        print(f"Model Type: {model_type}")
        print(f"Text Field: {text_field}")
        print(f"Token provided: {bool(hf_token)}")

        # Set HuggingFace token
        os.environ["HF_TOKEN"] = hf_token
        print("βœ“ Token set in environment")

        # Validate inputs
        if not original_dataset or not output_dataset:
            return "❌ Error: Please provide both original and output dataset names"

        if not hf_token:
            return "❌ Error: Please provide a HuggingFace token"

        if model_type not in ["qwen3", "lfm2"]:
            return "❌ Error: Model type must be either 'qwen3' or 'lfm2'"

        print("βœ“ Input validation passed")

        # Process dataset
        print("Starting dataset processing...")
        process_dataset(
            original_dataset=original_dataset,
            output_dataset=output_dataset,
            model_type=model_type,
            text_field=text_field
        )

        return f"βœ… Dataset processed successfully and uploaded to: {output_dataset}"

    except Exception as e:
        error_msg = f"❌ Error: {str(e)}\n\n"
        error_msg += f"Error Type: {type(e).__name__}\n\n"
        error_msg += f"Traceback:\n{traceback.format_exc()}"
        print(error_msg)
        return error_msg

# Create Gradio interface
with gr.Blocks(title="VyvoTTS Dataset Tokenizer") as demo:
    gr.Markdown("""
    # πŸŽ™οΈ VyvoTTS Dataset Tokenizer

    Process audio datasets for VyvoTTS training by tokenizing both audio and text.

    ## Instructions:
    1. Enter your HuggingFace token (required for downloading and uploading datasets)
    2. Provide the original dataset path from HuggingFace Hub
    3. Specify the output dataset path where processed data will be uploaded
    4. Select the model type (Qwen3 or LFM2)
    5. Specify the text field name in your dataset
    6. Click "Process Dataset" to start

    **Note:** This process requires a GPU and may take several minutes depending on dataset size.
    """)

    with gr.Row():
        with gr.Column():
            hf_token = gr.Textbox(
                label="HuggingFace Token",
                placeholder="hf_...",
                type="password",
                info="Your HuggingFace token for authentication"
            )

            original_dataset = gr.Textbox(
                label="Original Dataset",
                placeholder="MrDragonFox/Elise",
                value="MrDragonFox/Elise",
                info="HuggingFace dataset path to process"
            )

            output_dataset = gr.Textbox(
                label="Output Dataset",
                placeholder="username/dataset-name",
                info="Output dataset path on HuggingFace Hub"
            )

            model_type = gr.Radio(
                choices=["qwen3", "lfm2"],
                value="qwen3",
                label="Model Type",
                info="Select the model type for tokenization"
            )

            text_field = gr.Textbox(
                label="Text Field Name",
                placeholder="text",
                value="text",
                info="Name of the text field in your dataset (e.g., 'text', 'text_scribe')"
            )

            process_btn = gr.Button("Process Dataset", variant="primary")

        with gr.Column():
            output = gr.Textbox(
                label="Status",
                placeholder="Status will appear here...",
                lines=10
            )

    process_btn.click(
        fn=process_dataset_ui,
        inputs=[original_dataset, output_dataset, model_type, text_field, hf_token],
        outputs=output
    )

    gr.Markdown("""
    ## πŸ“ Example Values:

    ### For Qwen3:
    - **Original Dataset:** `MrDragonFox/Elise`
    - **Output Dataset:** `username/elise-qwen3-processed`
    - **Model Type:** `qwen3`
    - **Text Field:** `text`

    ### For LFM2:
    - **Original Dataset:** `MrDragonFox/Elise`
    - **Output Dataset:** `username/elise-lfm2-processed`
    - **Model Type:** `lfm2`
    - **Text Field:** `text`

    ## ⚠️ Requirements:
    - GPU with CUDA support
    - HuggingFace account with write access
    - Valid HuggingFace token
    """)

if __name__ == "__main__":
    demo.launch()