Spaces:

Qwen
/

Qwen2.5-Math-Demo

Running

App Files Files Community

Qwen2.5-Math-Demo / app.py

Muratbeser

Adding multiple images

74aa7d2 verified about 1 year ago

raw

history blame

4.45 kB

	import gradio as gr
	import os
	import tempfile
	from pathlib import Path
	import secrets
	import dashscope
	from dashscope import MultiModalConversation, Generation
	from PIL import Image

	# API key setup
	YOUR_API_TOKEN = os.getenv('YOUR_API_TOKEN')
	dashscope.api_key = YOUR_API_TOKEN

	# Global variables
	math_messages = []
	image_descriptions = []

	def process_image(image, shouldConvert=False):
	uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
	Path(tempfile.gettempdir()) / "gradio"
	)
	os.makedirs(uploaded_file_dir, exist_ok=True)

	name = f"tmp{secrets.token_hex(20)}.jpg"
	filename = os.path.join(uploaded_file_dir, name)

	if shouldConvert:
	new_img = Image.new('RGB', size=(image.width, image.height), color=(255, 255, 255))
	new_img.paste(image, (0, 0), mask=image)
	image = new_img
	image.save(filename)

	messages = [{
	'role': 'system',
	'content': [{'text': 'You are a helpful assistant.'}]
	}, {
	'role': 'user',
	'content': [
	{'image': f'file://{filename}'},
	{'text': 'Please describe the math-related content in this image, ensuring that any LaTeX formulas are correctly transcribed. Non-mathematical details do not need to be described.'}
	]
	}]

	response = MultiModalConversation.call(model='qwen-vl-max-0809', messages=messages)

	os.remove(filename)

	return response.output.choices[0]["message"]["content"]

	def get_math_response(image_descriptions, user_question):
	global math_messages
	if not math_messages:
	math_messages.append({'role': 'system', 'content': 'You are a helpful math assistant.'})

	content = "Image descriptions:\n" + "\n".join(image_descriptions) if image_descriptions else ""
	content += f"\n\nUser question: {user_question}"

	math_messages.append({'role': 'user', 'content': content})
	response = Generation.call(
	model="qwen2.5-math-72b-instruct",
	messages=math_messages,
	result_format='message',
	stream=True
	)
	answer = ""
	for resp in response:
	if resp.output is None:
	continue
	answer = resp.output.choices[0].message.content
	yield answer.replace("\\", "\\\\")

	math_messages.append({'role': 'assistant', 'content': answer})

	def math_chat_bot(images, sketchpad, question, chat_history):
	global image_descriptions

	# Process new images
	for image in images:
	if image:
	description = process_image(image)
	image_descriptions.append(description)

	# Process sketchpad if present
	if sketchpad and sketchpad["composite"]:
	sketch_description = process_image(sketchpad["composite"], True)
	image_descriptions.append(sketch_description)

	# Generate response
	response = ""
	for chunk in get_math_response(image_descriptions, question):
	response += chunk
	yield chat_history + [(question, response)]

	css = """
	#qwen-md .katex-display { display: inline; }
	#qwen-md .katex-display>.katex { display: inline; }
	#qwen-md .katex-display>.katex>.katex-html { display: inline; }
	"""

	# Create Gradio interface
	with gr.Blocks(css=css) as demo:
	gr.HTML("""\
	<p align="center"><img src="https://modelscope.oss-cn-beijing.aliyuncs.com/resource/qwen.png" style="height: 60px"/><p>"""
	"""<center><font size=8>📖 Qwen2.5-Math Demo</center>"""
	"""\
	<center><font size=3>This WebUI is based on Qwen2-VL for OCR and Qwen2.5-Math for mathematical reasoning. You can input either images or texts of mathematical or arithmetic problems.</center>"""
	)

	with gr.Row():
	with gr.Column():
	input_images = gr.File(file_count="multiple", label="Upload Images")
	input_sketchpad = gr.Sketchpad(type="pil", label="Sketch", layers=False)
	input_text = gr.Textbox(label="Input your question")
	with gr.Row():
	clear_btn = gr.ClearButton([input_images, input_sketchpad, input_text])
	submit_btn = gr.Button("Submit", variant="primary")

	with gr.Column():
	chat_output = gr.Chatbot(label="Chat History", elem_id="qwen-md")

	submit_btn.click(
	fn=math_chat_bot,
	inputs=[input_images, input_sketchpad, input_text, chat_output],
	outputs=chat_output
	)

	demo.launch()