import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer from threading import Thread import spaces # Load model and tokenizer model_id = "openfree/Darwin-Qwen3-4B" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) @spaces.GPU def generate_response( message, history, temperature=0.7, max_new_tokens=512, top_p=0.9, repetition_penalty=1.1, ): # Format conversation history conversation = [] for user, assistant in history: conversation.extend([ {"role": "user", "content": user}, {"role": "assistant", "content": assistant} ]) conversation.append({"role": "user", "content": message}) # Apply chat template if available if hasattr(tokenizer, "apply_chat_template"): text = tokenizer.apply_chat_template( conversation, tokenize=False, add_generation_prompt=True ) else: # Fallback formatting text = "\n".join([f"User: {message}" if i["role"] == "user" else f"Assistant: {message}" for i in conversation]) text += "\nAssistant: " # Tokenize input inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048) inputs = inputs.to(model.device) # Set up streaming streamer = TextIteratorStreamer( tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True ) # Generation parameters gen_kwargs = dict( inputs, streamer=streamer, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, ) # Start generation in separate thread thread = Thread(target=model.generate, kwargs=gen_kwargs) thread.start() # Stream output response = "" for new_text in streamer: response += new_text yield response thread.join() # Create Gradio interface with gr.Blocks(title="Darwin-Qwen3-4B Chat") as demo: gr.Markdown( """ # 🌱 Darwin-Qwen3-4B Interactive Chat Test the evolutionary merged model that combines the strengths of instruction-following and reasoning capabilities. **Model**: [openfree/Darwin-Qwen3-4B](https://huggingface.co/openfree/Darwin-Qwen3-4B) This model was created using the Darwin A2AP Enhanced v3.2 evolutionary algorithm, merging: - Parent 1: Qwen/Qwen3-4B-Instruct-2507 - Parent 2: Qwen/Qwen3-4B-Thinking-2507 """ ) chatbot = gr.Chatbot( label="Chat History", bubble_full_width=False, height=400 ) with gr.Row(): msg = gr.Textbox( label="Your Message", placeholder="Type your message here and press Enter...", lines=2, scale=4 ) submit_btn = gr.Button("Send", scale=1, variant="primary") with gr.Accordion("Advanced Settings", open=False): temperature = gr.Slider( minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature (higher = more creative)" ) max_new_tokens = gr.Slider( minimum=64, maximum=2048, value=512, step=64, label="Max New Tokens" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling)" ) repetition_penalty = gr.Slider( minimum=1.0, maximum=1.5, value=1.1, step=0.05, label="Repetition Penalty" ) with gr.Row(): clear_btn = gr.Button("Clear Chat", variant="secondary") gr.Examples( examples=[ "Explain quantum computing in simple terms.", "Write a Python function to find prime numbers.", "What are the key differences between machine learning and deep learning?", "Suggest a healthy meal plan for a week.", "How does photosynthesis work?", ], inputs=msg, label="Example Prompts" ) # Event handlers def user_submit(message, history): return "", history + [[message, None]] def bot_respond(history, temperature, max_new_tokens, top_p, repetition_penalty): message = history[-1][0] history[-1][1] = "" for response in generate_response( message, history[:-1], temperature, max_new_tokens, top_p, repetition_penalty ): history[-1][1] = response yield history msg.submit( user_submit, [msg, chatbot], [msg, chatbot] ).then( bot_respond, [chatbot, temperature, max_new_tokens, top_p, repetition_penalty], chatbot ) submit_btn.click( user_submit, [msg, chatbot], [msg, chatbot] ).then( bot_respond, [chatbot, temperature, max_new_tokens, top_p, repetition_penalty], chatbot ) clear_btn.click(lambda: None, None, chatbot, queue=False) gr.Markdown( """ --- ### About Darwin Project The Darwin Project demonstrates a new paradigm in AI model creation through evolutionary algorithms. This model showcases the fusion of different model capabilities at 1/10,000 the cost of traditional training. **Key Features:** - Automated model merging without manual hyperparameter tuning - Multi-objective optimization (accuracy, robustness, generalization) - 5,000+ generation evolution process [GitHub](https://github.com/yourusername/darwin-project) | [Paper](https://arxiv.org/abs/xxxx.xxxxx) (Coming Soon) """ ) if __name__ == "__main__": demo.queue().launch(share=True)