Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import time | |
| import transformers | |
| from transformers import Qwen2AudioForConditionalGeneration, AutoProcessor | |
| from io import BytesIO | |
| from urllib.request import urlopen | |
| import librosa | |
| import os, json | |
| from sys import argv | |
| from vllm import LLM, SamplingParams | |
| import vllm | |
| import re | |
| def load_model_processor(model_path): | |
| processor = AutoProcessor.from_pretrained(model_path) | |
| llm = LLM( | |
| model=model_path, trust_remote_code=True, gpu_memory_utilization=0.8, | |
| enforce_eager=True, device = "cuda", | |
| limit_mm_per_prompt={"audio": 5}, | |
| ) | |
| return llm, processor | |
| model_path1 = "SeaLLMs/SeaLLMs-Audio-7B" | |
| model1, processor1 = load_model_processor(model_path1) | |
| def response_to_audio(audio_url, text, model=None, processor=None, temperature = 0,repetition_penalty=1.1, top_p = 0.9,max_new_tokens = 2048): | |
| if text == None: | |
| conversation = [ | |
| {"role": "user", "content": [ | |
| {"type": "audio", "audio_url": audio_url}, | |
| ]},] | |
| elif audio_url == None: | |
| conversation = [ | |
| {"role": "user", "content": [ | |
| {"type": "text", "text": text}, | |
| ]},] | |
| else: | |
| conversation = [ | |
| {"role": "user", "content": [ | |
| {"type": "audio", "audio_url": audio_url}, | |
| {"type": "text", "text": text}, | |
| ]},] | |
| text = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False) | |
| audios = [] | |
| for message in conversation: | |
| if isinstance(message["content"], list): | |
| for ele in message["content"]: | |
| if ele["type"] == "audio": | |
| if ele['audio_url'] != None: | |
| audios.append(librosa.load( | |
| ele['audio_url'], | |
| sr=processor.feature_extractor.sampling_rate)[0] | |
| ) | |
| sampling_params = SamplingParams( | |
| temperature=temperature, max_tokens=max_new_tokens, repetition_penalty=repetition_penalty, top_p=top_p, top_k=20, | |
| stop_token_ids=[], | |
| ) | |
| input = { | |
| 'prompt': text, | |
| 'multi_modal_data': { | |
| 'audio': [(audio, 16000) for audio in audios] | |
| } | |
| } | |
| output = model.generate([input], sampling_params=sampling_params)[0] | |
| response = output.outputs[0].text | |
| return response | |
| def clear_inputs(): | |
| return None, "", "" | |
| def contains_chinese(text): | |
| # Regular expression for Chinese characters | |
| chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]') | |
| return bool(chinese_char_pattern.search(text)) | |
| def compare_responses(audio_url, text): | |
| if contains_chinese(text): | |
| return "Caution! This demo does not support Chinese!" | |
| response1 = response_to_audio(audio_url, text, model1, processor1) | |
| if contains_chinese(response1): | |
| return "ERROR! Try another example!" | |
| return response1 | |
| with gr.Blocks() as demo: | |
| # gr.Markdown(f"Evaluate {model_path1}") | |
| gr.HTML("""<p align="center"><img src="https://DAMO-NLP-SG.github.io/SeaLLMs-Audio/static/images/seallm-audio-logo.png" style="height: 80px"/><p>""") | |
| # gr.Image("images/seal_logo.png", elem_id="seal_logo", show_label=False,height=80,show_fullscreen_button=False) | |
| gr.HTML("""<h1 align="center" id="space-title">SeaLLMs-Audio-Demo</h1>""") | |
| # gr.Markdown( | |
| # """\ | |
| # <center><font size=4>This WebUI is based on SeaLLMs-Audio-7B, developed by Alibaba DAMO Academy.<br> | |
| # You can interact with the chatbot in <b>English, Chinese, Indonesian, Thai, or Vietnamese</b>.<br> | |
| # For the input, you can input <b>audio and/or text</center>.""") | |
| # # Links with proper formatting | |
| # gr.Markdown( | |
| # """<center><font size=4> | |
| # <a href="https://huggingface.co/SeaLLMs/SeaLLMs-v3-7B-Chat">[Website]</a> | |
| # <a href="https://huggingface.co/SeaLLMs/SeaLLMs-Audio-7B">[Model🤗]</a> | |
| # <a href="https://github.com/DAMO-NLP-SG/SeaLLMs-Audio">[Github]</a> | |
| # </center>""", | |
| # ) | |
| gr.HTML( | |
| """<div style="text-align: center; font-size: 16px;"> | |
| This WebUI is based on <a href="https://huggingface.co/SeaLLMs/SeaLLMs-Audio-7B">SeaLLMs-Audio-7B</a>, developed by Alibaba DAMO Academy.<br> | |
| You can interact with the chatbot in <b>English, Indonesian, Thai, or Vietnamese</b>.<br> | |
| For the input, you can provide <b>audio and/or text</b>. | |
| </div>""" | |
| ) | |
| gr.HTML( | |
| """<div style="text-align: center; font-size: 16px;"> | |
| <a href="https://DAMO-NLP-SG.github.io/SeaLLMs-Audio/">[Website]</a> | |
| <a href="https://huggingface.co/SeaLLMs/SeaLLMs-Audio-7B">[Model🤗]</a> | |
| <a href="https://github.com/DAMO-NLP-SG/SeaLLMs-Audio">[Github]</a> | |
| </div>""" | |
| ) | |
| # gr.Markdown(insturctions) | |
| # with gr.Row(): | |
| # with gr.Column(): | |
| # temperature = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Temperature") | |
| # with gr.Column(): | |
| # top_p = gr.Slider(minimum=0.1, maximum=1, value=0.5, step=0.1, label="Top P") | |
| # with gr.Column(): | |
| # repetition_penalty = gr.Slider(minimum=0, maximum=2, value=1.1, step=0.1, label="Repetition Penalty") | |
| with gr.Row(): | |
| with gr.Column(): | |
| # mic_input = gr.Microphone(label="Record Audio", type="filepath", elem_id="mic_input") | |
| mic_input = gr.Audio(sources = ['upload', 'microphone'], label="Record Audio", type="filepath", elem_id="mic_input") | |
| with gr.Column(): | |
| additional_input = gr.Textbox(label="Text Input") | |
| # Button to trigger the function | |
| with gr.Row(): | |
| btn_submit = gr.Button("Submit") | |
| btn_clear = gr.Button("Clear") | |
| with gr.Row(): | |
| output_text1 = gr.Textbox(label=model_path1.split('/')[-1], interactive=False, elem_id="output_text1") | |
| btn_submit.click( | |
| fn=compare_responses, | |
| inputs=[mic_input, additional_input], | |
| outputs=[output_text1], | |
| ) | |
| btn_clear.click( | |
| fn=clear_inputs, | |
| inputs=None, | |
| outputs=[mic_input, additional_input, output_text1], | |
| queue=False, | |
| ) | |
| # demo.launch( | |
| # share=False, | |
| # inbrowser=True, | |
| # server_port=7950, | |
| # server_name="0.0.0.0", | |
| # max_threads=40 | |
| # ) | |
| demo.launch(share=True) | |
| demo.queue(default_concurrency_limit=40).launch(share=True) | |