Spaces:
Running
Running
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| from threading import Thread | |
| import spaces | |
| # Load model and tokenizer | |
| model_id = "openfree/Darwin-Qwen3-4B" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| def generate_response( | |
| message, | |
| history, | |
| temperature=0.7, | |
| max_new_tokens=512, | |
| top_p=0.9, | |
| repetition_penalty=1.1, | |
| ): | |
| # Format conversation history | |
| conversation = [] | |
| for user, assistant in history: | |
| conversation.extend([ | |
| {"role": "user", "content": user}, | |
| {"role": "assistant", "content": assistant} | |
| ]) | |
| conversation.append({"role": "user", "content": message}) | |
| # Apply chat template if available | |
| if hasattr(tokenizer, "apply_chat_template"): | |
| text = tokenizer.apply_chat_template( | |
| conversation, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| else: | |
| # Fallback formatting | |
| text = "\n".join([f"User: {message}" if i["role"] == "user" | |
| else f"Assistant: {message}" | |
| for i in conversation]) | |
| text += "\nAssistant: " | |
| # Tokenize input | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048) | |
| inputs = inputs.to(model.device) | |
| # Set up streaming | |
| streamer = TextIteratorStreamer( | |
| tokenizer, | |
| timeout=10.0, | |
| skip_prompt=True, | |
| skip_special_tokens=True | |
| ) | |
| # Generation parameters | |
| gen_kwargs = dict( | |
| inputs, | |
| streamer=streamer, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| repetition_penalty=repetition_penalty, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| ) | |
| # Start generation in separate thread | |
| thread = Thread(target=model.generate, kwargs=gen_kwargs) | |
| thread.start() | |
| # Stream output | |
| response = "" | |
| for new_text in streamer: | |
| response += new_text | |
| yield response | |
| thread.join() | |
| # Create Gradio interface | |
| with gr.Blocks(title="Darwin-Qwen3-4B Chat") as demo: | |
| gr.Markdown( | |
| """ | |
| # 🌱 Darwin-Qwen3-4B Interactive Chat | |
| Test the evolutionary merged model that combines the strengths of instruction-following and reasoning capabilities. | |
| **Model**: [openfree/Darwin-Qwen3-4B](https://huggingface.co/openfree/Darwin-Qwen3-4B) | |
| This model was created using the Darwin A2AP Enhanced v3.2 evolutionary algorithm, merging: | |
| - Parent 1: Qwen/Qwen3-4B-Instruct-2507 | |
| - Parent 2: Qwen/Qwen3-4B-Thinking-2507 | |
| """ | |
| ) | |
| chatbot = gr.Chatbot( | |
| label="Chat History", | |
| bubble_full_width=False, | |
| height=400 | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| label="Your Message", | |
| placeholder="Type your message here and press Enter...", | |
| lines=2, | |
| scale=4 | |
| ) | |
| submit_btn = gr.Button("Send", scale=1, variant="primary") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.5, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperature (higher = more creative)" | |
| ) | |
| max_new_tokens = gr.Slider( | |
| minimum=64, | |
| maximum=2048, | |
| value=512, | |
| step=64, | |
| label="Max New Tokens" | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.9, | |
| step=0.05, | |
| label="Top-p (nucleus sampling)" | |
| ) | |
| repetition_penalty = gr.Slider( | |
| minimum=1.0, | |
| maximum=1.5, | |
| value=1.1, | |
| step=0.05, | |
| label="Repetition Penalty" | |
| ) | |
| with gr.Row(): | |
| clear_btn = gr.Button("Clear Chat", variant="secondary") | |
| gr.Examples( | |
| examples=[ | |
| "Explain quantum computing in simple terms.", | |
| "Write a Python function to find prime numbers.", | |
| "What are the key differences between machine learning and deep learning?", | |
| "Suggest a healthy meal plan for a week.", | |
| "How does photosynthesis work?", | |
| ], | |
| inputs=msg, | |
| label="Example Prompts" | |
| ) | |
| # Event handlers | |
| def user_submit(message, history): | |
| return "", history + [[message, None]] | |
| def bot_respond(history, temperature, max_new_tokens, top_p, repetition_penalty): | |
| message = history[-1][0] | |
| history[-1][1] = "" | |
| for response in generate_response( | |
| message, | |
| history[:-1], | |
| temperature, | |
| max_new_tokens, | |
| top_p, | |
| repetition_penalty | |
| ): | |
| history[-1][1] = response | |
| yield history | |
| msg.submit( | |
| user_submit, | |
| [msg, chatbot], | |
| [msg, chatbot] | |
| ).then( | |
| bot_respond, | |
| [chatbot, temperature, max_new_tokens, top_p, repetition_penalty], | |
| chatbot | |
| ) | |
| submit_btn.click( | |
| user_submit, | |
| [msg, chatbot], | |
| [msg, chatbot] | |
| ).then( | |
| bot_respond, | |
| [chatbot, temperature, max_new_tokens, top_p, repetition_penalty], | |
| chatbot | |
| ) | |
| clear_btn.click(lambda: None, None, chatbot, queue=False) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### About Darwin Project | |
| The Darwin Project demonstrates a new paradigm in AI model creation through evolutionary algorithms. | |
| This model showcases the fusion of different model capabilities at 1/10,000 the cost of traditional training. | |
| **Key Features:** | |
| - Automated model merging without manual hyperparameter tuning | |
| - Multi-objective optimization (accuracy, robustness, generalization) | |
| - 5,000+ generation evolution process | |
| [GitHub](https://github.com/yourusername/darwin-project) | [Paper](https://arxiv.org/abs/xxxx.xxxxx) (Coming Soon) | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch(share=True) |