| import os | |
| import gradio as gr | |
| from openai import OpenAI | |
| title = "ERNIE4.5-21B-A3B-Thinking Demo" | |
| description = """ | |
| - Official Website: <https://yiyan.baidu.com/> (UI in Chinese) | |
| - API services: [Qianfan Large Model Platform](https://cloud.baidu.com/product-s/qianfan_home) (cloud platform providing LLM services, UI in Chinese) | |
| - Open weights on Hugging Face: [ERNIE4.5-21B-A3B-Thinking](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Thinking) | |
| """ | |
| qianfan_api_key = os.getenv("QIANFAN_TOKEN") | |
| qianfan_model = "ernie-4.5-21b-a3b-thinking" | |
| client = OpenAI(base_url="https://qianfan.baidubce.com/v2", api_key=qianfan_api_key) | |
| def respond( | |
| message, | |
| history: list[tuple[str, str]], | |
| system_message, | |
| max_tokens, | |
| ): | |
| messages = [{"role": "system", "content": system_message}] | |
| messages.extend(history) | |
| messages.append({"role": "user", "content": message}) | |
| response = client.chat.completions.create( | |
| model=qianfan_model, | |
| messages=messages, | |
| max_completion_tokens=max_tokens, | |
| stream=True, | |
| ) | |
| reasoning_content = "**Thinking**:\n" | |
| content = "\n\n**Answer**: \n" | |
| for chunk in response: | |
| if hasattr(chunk.choices[0].delta, 'reasoning_content'): | |
| token = chunk.choices[0].delta.reasoning_content | |
| if token: | |
| reasoning_content += token | |
| yield reasoning_content | |
| elif hasattr(chunk.choices[0].delta, 'content'): | |
| token = chunk.choices[0].delta.content | |
| if token: | |
| content += token | |
| yield reasoning_content + content | |
| demo = gr.ChatInterface( | |
| respond, | |
| additional_inputs=[ | |
| gr.Textbox(value="", label="System message"), | |
| gr.Slider(minimum=1024, maximum=65536, value=32768, step=1024, label="Max new tokens"), | |
| ], | |
| title=title, | |
| description=description, | |
| type='messages', | |
| concurrency_limit=50 | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |