Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import gradio as gr | |
| from gradio_webrtc import ( | |
| WebRTC, | |
| ReplyOnStopWords, | |
| AdditionalOutputs, | |
| audio_to_bytes, | |
| get_twilio_turn_credentials, | |
| ) | |
| import numpy as np | |
| import base64 | |
| import re | |
| from groq import Groq | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| spinner_html = open("spinner.html").read() | |
| sandbox_html = open("sandbox.html").read() | |
| something_happened_html = open("something_happened.html").read() | |
| rtc_configuration = get_twilio_turn_credentials() | |
| import logging | |
| # Configure the root logger to WARNING to suppress debug messages from other libraries | |
| logging.basicConfig(level=logging.WARNING) | |
| # Create a console handler | |
| console_handler = logging.FileHandler("gradio_webrtc.log") | |
| console_handler.setLevel(logging.DEBUG) | |
| # Create a formatter | |
| formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") | |
| console_handler.setFormatter(formatter) | |
| # Configure the logger for your specific library | |
| logger = logging.getLogger("gradio_webrtc") | |
| logger.setLevel(logging.DEBUG) | |
| logger.addHandler(console_handler) | |
| groq_client = Groq() | |
| system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response." | |
| user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}" | |
| def extract_html_content(text): | |
| """ | |
| Extract content including HTML tags. | |
| """ | |
| match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL) | |
| return match.group(0) if match else None | |
| def display_in_sandbox(code): | |
| encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8") | |
| data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" | |
| return f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>' | |
| def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str): | |
| yield AdditionalOutputs(history, spinner_html) | |
| sr, audio = user_message | |
| audio = audio.squeeze() | |
| text = groq_client.audio.transcriptions.create( | |
| file=("audio-file.mp3", audio_to_bytes((sr, audio))), | |
| model="whisper-large-v3-turbo", | |
| response_format="verbose_json", | |
| ).text | |
| user_msg_formatted = user_prompt.format(user_message=text, code=code) | |
| history.append({"role": "user", "content": user_msg_formatted}) | |
| print("generating response") | |
| response = groq_client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=history, | |
| temperature=1, | |
| max_tokens=2048, | |
| top_p=1, | |
| stream=False, | |
| ) | |
| print("finished generating response") | |
| output = response.choices[0].message.content | |
| try: | |
| html_code = extract_html_content(output) | |
| except Exception as e: | |
| html_code = something_happened_html | |
| print(e) | |
| history.append({"role": "assistant", "content": output}) | |
| yield AdditionalOutputs(history, html_code) | |
| with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo: | |
| history = gr.State([{"role": "system", "content": system_prompt}]) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.HTML( | |
| """ | |
| <h1 style='text-align: center'> | |
| Hello Llama! 🦙 | |
| </h1> | |
| <p style='text-align: center'> | |
| Create and edit single-file HTML applications with just your voice! After recording, say "Hey Llama" and wait for confirmation, before asking your question. | |
| </p> | |
| <p style='text-align: center'> | |
| Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation. | |
| </p> | |
| """ | |
| ) | |
| webrtc = WebRTC( | |
| rtc_configuration=rtc_configuration, mode="send", modality="audio" | |
| ) | |
| with gr.Column(scale=10): | |
| with gr.Tabs(): | |
| with gr.Tab("Sandbox"): | |
| sandbox = gr.HTML(value=sandbox_html) | |
| with gr.Tab("Code"): | |
| code = gr.Code( | |
| language="html", | |
| max_lines=50, | |
| interactive=False, | |
| elem_classes="code-component", | |
| ) | |
| with gr.Tab("Chat"): | |
| cb = gr.Chatbot(type="messages") | |
| webrtc.stream( | |
| ReplyOnStopWords( | |
| generate, | |
| input_sample_rate=16000, | |
| stop_words=["hello llama", "hello lama", "hello lamma", "hello llamma"], | |
| ), | |
| inputs=[webrtc, history, code], | |
| outputs=[webrtc], | |
| time_limit=90, | |
| concurrency_limit=10, | |
| ) | |
| webrtc.on_additional_outputs( | |
| lambda history, code: (history, code, history), outputs=[history, code, cb] | |
| ) | |
| code.change(display_in_sandbox, code, sandbox, queue=False) | |
| if __name__ == "__main__": | |
| demo.launch() | |