ernie4.5_21b_a3b_thinking_demo

Running

App Files Files Community

ernie4.5_21b_a3b_thinking_demo / app.py

jzhang533

use longer content length

b3ad39a about 2 months ago

raw

history blame contribute delete

1.97 kB

	import os
	import gradio as gr
	from openai import OpenAI

	title = "ERNIE4.5-21B-A3B-Thinking Demo"
	description = """
	- Official Website: <https://yiyan.baidu.com/> (UI in Chinese)
	- API services: [Qianfan Large Model Platform](https://cloud.baidu.com/product-s/qianfan_home) (cloud platform providing LLM services, UI in Chinese)
	- Open weights on Hugging Face: [ERNIE4.5-21B-A3B-Thinking](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Thinking)
	"""

	qianfan_api_key = os.getenv("QIANFAN_TOKEN")
	qianfan_model = "ernie-4.5-21b-a3b-thinking"


	client = OpenAI(base_url="https://qianfan.baidubce.com/v2", api_key=qianfan_api_key)


	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	):
	messages = [{"role": "system", "content": system_message}]
	messages.extend(history)
	messages.append({"role": "user", "content": message})

	response = client.chat.completions.create(
	model=qianfan_model,
	messages=messages,
	max_completion_tokens=max_tokens,
	stream=True,
	)

	reasoning_content = "Thinking:\n"
	content = "\n\nAnswer: \n"

	for chunk in response:
	if hasattr(chunk.choices[0].delta, 'reasoning_content'):
	token = chunk.choices[0].delta.reasoning_content
	if token:
	reasoning_content += token
	yield reasoning_content
	elif hasattr(chunk.choices[0].delta, 'content'):
	token = chunk.choices[0].delta.content
	if token:
	content += token
	yield reasoning_content + content


	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="", label="System message"),
	gr.Slider(minimum=1024, maximum=65536, value=32768, step=1024, label="Max new tokens"),
	],
	title=title,
	description=description,
	type='messages',
	concurrency_limit=50
	)

	if __name__ == "__main__":
	demo.launch()