Spaces:
Running
on
T4
Running
on
T4
| import gradio as gr | |
| from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs | |
| import transformers | |
| import numpy as np | |
| from twilio.rest import Client | |
| import os | |
| pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True) | |
| account_sid = os.environ.get("TWILIO_ACCOUNT_SID") | |
| auth_token = os.environ.get("TWILIO_AUTH_TOKEN") | |
| if account_sid and auth_token: | |
| client = Client(account_sid, auth_token) | |
| token = client.tokens.create() | |
| rtc_configuration = { | |
| "iceServers": token.ice_servers, | |
| "iceTransportPolicy": "relay", | |
| } | |
| else: | |
| rtc_configuration = None | |
| def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict]): | |
| output = pipe({"audio": audio[1], "turns": conversation, "sampling_rate": audio[0]}, | |
| max_new_tokens=512) | |
| conversation.append({"role": "user", "content": output["transcription"]}) | |
| conversation.append({"role": "assistant", "content": output["reply"]}) | |
| yield AdditionalOutputs(conversation) | |
| with gr.Blocks() as demo: | |
| gr.HTML( | |
| """ | |
| <h1 style='text-align: center'> | |
| Talk to Ultravox Llama 3.1 8b (Powered by WebRTC ⚡️) | |
| </h1> | |
| <p style='text-align: center'> | |
| Once you grant access to your microphone, you can talk naturally to Ultravox. | |
| When you stop talking, the audio will be sent for processing. | |
| </p> | |
| <p style='text-align: center'> | |
| Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation. | |
| </p> | |
| """ | |
| ) | |
| transformers_convo = gr.State(value=[{ | |
| "role": "system", | |
| "content": "You are a friendly and helpful character. You love to answer questions for people." | |
| }]) | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio = WebRTC( | |
| rtc_configuration=rtc_configuration, | |
| label="Stream", | |
| mode="send", | |
| modality="audio", | |
| ) | |
| with gr.Column(): | |
| transcript = gr.Chatbot(label="transcript", type="messages") | |
| audio.stream(ReplyOnPause(transcribe), inputs=[audio, transformers_convo, transcript], outputs=[audio], time_limit=90) | |
| audio.on_additional_outputs(lambda s,a: (s,a), outputs=[transformers_convo, transcript], | |
| queue=False, show_progress="hidden") | |
| if __name__ == "__main__": | |
| demo.launch() |