Spaces:

VIDraft
/

Darwin-Qwen3-4B

Running

App Files Files Community

Darwin-Qwen3-4B / app.py

openfree

Create app.py

d14c764 verified about 2 months ago

raw

history blame contribute delete

6.44 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from threading import Thread
	import spaces

	# Load model and tokenizer
	model_id = "openfree/Darwin-Qwen3-4B"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)

	@spaces.GPU
	def generate_response(
	message,
	history,
	temperature=0.7,
	max_new_tokens=512,
	top_p=0.9,
	repetition_penalty=1.1,
	):
	# Format conversation history
	conversation = []
	for user, assistant in history:
	conversation.extend([
	{"role": "user", "content": user},
	{"role": "assistant", "content": assistant}
	])
	conversation.append({"role": "user", "content": message})

	# Apply chat template if available
	if hasattr(tokenizer, "apply_chat_template"):
	text = tokenizer.apply_chat_template(
	conversation,
	tokenize=False,
	add_generation_prompt=True
	)
	else:
	# Fallback formatting
	text = "\n".join([f"User: {message}" if i["role"] == "user"
	else f"Assistant: {message}"
	for i in conversation])
	text += "\nAssistant: "

	# Tokenize input
	inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048)
	inputs = inputs.to(model.device)

	# Set up streaming
	streamer = TextIteratorStreamer(
	tokenizer,
	timeout=10.0,
	skip_prompt=True,
	skip_special_tokens=True
	)

	# Generation parameters
	gen_kwargs = dict(
	inputs,
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	)

	# Start generation in separate thread
	thread = Thread(target=model.generate, kwargs=gen_kwargs)
	thread.start()

	# Stream output
	response = ""
	for new_text in streamer:
	response += new_text
	yield response

	thread.join()

	# Create Gradio interface
	with gr.Blocks(title="Darwin-Qwen3-4B Chat") as demo:
	gr.Markdown(
	"""
	# 🌱 Darwin-Qwen3-4B Interactive Chat

	Test the evolutionary merged model that combines the strengths of instruction-following and reasoning capabilities.

	Model: [openfree/Darwin-Qwen3-4B](https://huggingface.co/openfree/Darwin-Qwen3-4B)

	This model was created using the Darwin A2AP Enhanced v3.2 evolutionary algorithm, merging:
	- Parent 1: Qwen/Qwen3-4B-Instruct-2507
	- Parent 2: Qwen/Qwen3-4B-Thinking-2507
	"""
	)

	chatbot = gr.Chatbot(
	label="Chat History",
	bubble_full_width=False,
	height=400
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Your Message",
	placeholder="Type your message here and press Enter...",
	lines=2,
	scale=4
	)
	submit_btn = gr.Button("Send", scale=1, variant="primary")

	with gr.Accordion("Advanced Settings", open=False):
	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.5,
	value=0.7,
	step=0.1,
	label="Temperature (higher = more creative)"
	)
	max_new_tokens = gr.Slider(
	minimum=64,
	maximum=2048,
	value=512,
	step=64,
	label="Max New Tokens"
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.9,
	step=0.05,
	label="Top-p (nucleus sampling)"
	)
	repetition_penalty = gr.Slider(
	minimum=1.0,
	maximum=1.5,
	value=1.1,
	step=0.05,
	label="Repetition Penalty"
	)

	with gr.Row():
	clear_btn = gr.Button("Clear Chat", variant="secondary")

	gr.Examples(
	examples=[
	"Explain quantum computing in simple terms.",
	"Write a Python function to find prime numbers.",
	"What are the key differences between machine learning and deep learning?",
	"Suggest a healthy meal plan for a week.",
	"How does photosynthesis work?",
	],
	inputs=msg,
	label="Example Prompts"
	)

	# Event handlers
	def user_submit(message, history):
	return "", history + [[message, None]]

	def bot_respond(history, temperature, max_new_tokens, top_p, repetition_penalty):
	message = history[-1][0]
	history[-1][1] = ""

	for response in generate_response(
	message,
	history[:-1],
	temperature,
	max_new_tokens,
	top_p,
	repetition_penalty
	):
	history[-1][1] = response
	yield history

	msg.submit(
	user_submit,
	[msg, chatbot],
	[msg, chatbot]
	).then(
	bot_respond,
	[chatbot, temperature, max_new_tokens, top_p, repetition_penalty],
	chatbot
	)

	submit_btn.click(
	user_submit,
	[msg, chatbot],
	[msg, chatbot]
	).then(
	bot_respond,
	[chatbot, temperature, max_new_tokens, top_p, repetition_penalty],
	chatbot
	)

	clear_btn.click(lambda: None, None, chatbot, queue=False)

	gr.Markdown(
	"""
	---
	### About Darwin Project

	The Darwin Project demonstrates a new paradigm in AI model creation through evolutionary algorithms.
	This model showcases the fusion of different model capabilities at 1/10,000 the cost of traditional training.

	Key Features:
	- Automated model merging without manual hyperparameter tuning
	- Multi-objective optimization (accuracy, robustness, generalization)
	- 5,000+ generation evolution process

	[GitHub](https://github.com/yourusername/darwin-project) \| [Paper](https://arxiv.org/abs/xxxx.xxxxx) (Coming Soon)
	"""
	)

	if __name__ == "__main__":
	demo.queue().launch(share=True)