Spaces:

GemteksAI
/

KeyWave

Sleeping

File size: 5,044 Bytes

import gradio as gr
import soundfile as sf
import datetime
import numpy as np
import hashlib


MAX_LENGTH = 1.5  # in seconds
LABLES = ["Hi, doctor", "doctor home", "Ask doctor", "Hey, daughter", "Hi, gemtek", "Hi, AI", "Custom"]

# Initialize a dictionary to store file paths by speaker_id
uploaded_files = {}

help_en = f"""
### Usage Instructions:
1. **Select a Label**: Choose a label from the dropdown menu, or select "Custom" to enter your own label.
2. **Enter User Name**: Input your user name, which will be used to generate a unique ID for your recordings.
3. **Record or Upload Audio**: Click the "Record Audio" button to start recording or upload an existing audio file. The maximum length for each recording or uploaded file is {MAX_LENGTH} seconds.
4. **Submit Recording**: After recording or uploading, click the "Submit" button to save the audio.
5. **Download Recordings**: Use the provided download links to retrieve all recorded files.
"""
help_zh_tw = f"""
### 使用流程:
1. **選擇標記**: 從下拉選單中選擇一個標記，或選擇“Custom”以輸入自己的標記。
2. **輸入用戶名稱**: 輸入您的用戶名稱，用於生成錄音唯一ID。
3. **錄製或上傳音頻**: 點擊“Record Audio”按鈕開始錄音，或上傳現有的音頻文件。每次錄音或上傳文件的最長時間為{MAX_LENGTH}秒。
4. **提交錄音**: 錄製或上傳完成後，點擊“Submit”按鈕保存音頻。
5. **下載錄音**: 使用提供的下載連結下載所有已錄製或上傳的文件。
"""

# Function to save audio with label and speaker name
def save_audio(audio, dropdown_label, custom_label, speaker_name):
    global uploaded_files
    
    # Determine the final label
    label = custom_label if dropdown_label == "Custom" else dropdown_label

    if not label:
        raise gr.Error("Label cannot be empty 💥!", duration=5)
    if not speaker_name:
        raise gr.Error("User name cannot be empty 💥!", duration=5)

    # Generate speaker_id using a hash function to ensure consistency
    speaker_id = hashlib.sha256(speaker_name.encode()).hexdigest()[:8]

    # Get sample rate and audio data from Gradio
    sample_rate = audio[0]
    audio_data = np.array(audio[1])

    # Calculate the audio length in seconds
    audio_length = len(audio_data) / sample_rate

    # Check if the audio length exceeds MAX_LENGTH second
    if audio_length > MAX_LENGTH:
        raise gr.Error(f"Recording is longer than {MAX_LENGTH} second 💥!", duration=5)

    # Generate file name
    filename = f"{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}_{speaker_id}_{label}.wav"

    # Save the audio file in wav format
    sf.write(filename, audio_data, sample_rate)

    # Add the new file path to the list of uploaded files by speaker_id
    if speaker_id not in uploaded_files:
        uploaded_files[speaker_id] = []
    uploaded_files[speaker_id].append(filename)

    # Create an info message
    info_message = f"Recorded audio length: {audio_length:.2f} seconds."

    # Reset the audio input to None
    return uploaded_files[speaker_id], info_message, gr.update(value=None)

# Interface design using gr.Blocks
def create_interface():
    with gr.Blocks() as demo:
        label_dropdown = gr.Dropdown(choices=LABLES, label="Select Label")
        custom_label = gr.Textbox(label="Enter Custom Label", visible=False)  # Initially hidden

        # Function to show/hide custom label textbox based on dropdown selection
        def toggle_custom_label(selected_label):
            return gr.update(visible=True) if selected_label == "Custom" else gr.update(visible=False)

        label_dropdown.change(toggle_custom_label, inputs=label_dropdown, outputs=custom_label)

        speaker_name = gr.Textbox(label="Enter User Name")
        audio = gr.Audio(
            sources=["microphone", "upload"], 
            type="numpy", 
            label="Record or Upload Audio"
        )

        submit_button = gr.Button("Submit")

        # Create a list to display all uploaded files
        file_list = gr.Files(label="Download your recordings")

        # Create an info display area for feedback messages
        info_display = gr.Markdown()

        # Add the info output to display recording length and reset audio
        submit_button.click(
            fn=save_audio,
            inputs=[audio, label_dropdown, custom_label, speaker_name],
            outputs=[file_list, info_display, audio],
        )

        # Update file list based on the current speaker
        def get_user_files(speaker_name):
            speaker_id = hashlib.sha256(speaker_name.encode()).hexdigest()[:8]
            return uploaded_files.get(speaker_id, [])

        speaker_name.submit(fn=get_user_files, inputs=speaker_name, outputs=file_list)

        # Add usage instructions in both English and Chinese
        gr.Markdown(help_en)
        gr.Markdown(help_zh_tw)

    return demo

# Launch the interface
if __name__ == "__main__":
    interface = create_interface()
    interface.launch()