import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
import spaces

# Load model and tokenizer
model_id = "openfree/Darwin-Qwen3-4B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

@spaces.GPU
def generate_response(
    message,
    history,
    temperature=0.7,
    max_new_tokens=512,
    top_p=0.9,
    repetition_penalty=1.1,
):
    # Format conversation history
    conversation = []
    for user, assistant in history:
        conversation.extend([
            {"role": "user", "content": user},
            {"role": "assistant", "content": assistant}
        ])
    conversation.append({"role": "user", "content": message})
    
    # Apply chat template if available
    if hasattr(tokenizer, "apply_chat_template"):
        text = tokenizer.apply_chat_template(
            conversation,
            tokenize=False,
            add_generation_prompt=True
        )
    else:
        # Fallback formatting
        text = "\n".join([f"User: {message}" if i["role"] == "user" 
                         else f"Assistant: {message}" 
                         for i in conversation])
        text += "\nAssistant: "
    
    # Tokenize input
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048)
    inputs = inputs.to(model.device)
    
    # Set up streaming
    streamer = TextIteratorStreamer(
        tokenizer, 
        timeout=10.0, 
        skip_prompt=True, 
        skip_special_tokens=True
    )
    
    # Generation parameters
    gen_kwargs = dict(
        inputs,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )
    
    # Start generation in separate thread
    thread = Thread(target=model.generate, kwargs=gen_kwargs)
    thread.start()
    
    # Stream output
    response = ""
    for new_text in streamer:
        response += new_text
        yield response
    
    thread.join()

# Create Gradio interface
with gr.Blocks(title="Darwin-Qwen3-4B Chat") as demo:
    gr.Markdown(
        """
        # 🌱 Darwin-Qwen3-4B Interactive Chat
        
        Test the evolutionary merged model that combines the strengths of instruction-following and reasoning capabilities.
        
        **Model**: [openfree/Darwin-Qwen3-4B](https://huggingface.co/openfree/Darwin-Qwen3-4B)
        
        This model was created using the Darwin A2AP Enhanced v3.2 evolutionary algorithm, merging:
        - Parent 1: Qwen/Qwen3-4B-Instruct-2507
        - Parent 2: Qwen/Qwen3-4B-Thinking-2507
        """
    )
    
    chatbot = gr.Chatbot(
        label="Chat History",
        bubble_full_width=False,
        height=400
    )
    
    with gr.Row():
        msg = gr.Textbox(
            label="Your Message",
            placeholder="Type your message here and press Enter...",
            lines=2,
            scale=4
        )
        submit_btn = gr.Button("Send", scale=1, variant="primary")
    
    with gr.Accordion("Advanced Settings", open=False):
        temperature = gr.Slider(
            minimum=0.1,
            maximum=1.5,
            value=0.7,
            step=0.1,
            label="Temperature (higher = more creative)"
        )
        max_new_tokens = gr.Slider(
            minimum=64,
            maximum=2048,
            value=512,
            step=64,
            label="Max New Tokens"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.9,
            step=0.05,
            label="Top-p (nucleus sampling)"
        )
        repetition_penalty = gr.Slider(
            minimum=1.0,
            maximum=1.5,
            value=1.1,
            step=0.05,
            label="Repetition Penalty"
        )
    
    with gr.Row():
        clear_btn = gr.Button("Clear Chat", variant="secondary")
        
    gr.Examples(
        examples=[
            "Explain quantum computing in simple terms.",
            "Write a Python function to find prime numbers.",
            "What are the key differences between machine learning and deep learning?",
            "Suggest a healthy meal plan for a week.",
            "How does photosynthesis work?",
        ],
        inputs=msg,
        label="Example Prompts"
    )
    
    # Event handlers
    def user_submit(message, history):
        return "", history + [[message, None]]
    
    def bot_respond(history, temperature, max_new_tokens, top_p, repetition_penalty):
        message = history[-1][0]
        history[-1][1] = ""
        
        for response in generate_response(
            message, 
            history[:-1],
            temperature,
            max_new_tokens,
            top_p,
            repetition_penalty
        ):
            history[-1][1] = response
            yield history
    
    msg.submit(
        user_submit, 
        [msg, chatbot], 
        [msg, chatbot]
    ).then(
        bot_respond,
        [chatbot, temperature, max_new_tokens, top_p, repetition_penalty],
        chatbot
    )
    
    submit_btn.click(
        user_submit, 
        [msg, chatbot], 
        [msg, chatbot]
    ).then(
        bot_respond,
        [chatbot, temperature, max_new_tokens, top_p, repetition_penalty],
        chatbot
    )
    
    clear_btn.click(lambda: None, None, chatbot, queue=False)
    
    gr.Markdown(
        """
        ---
        ### About Darwin Project
        
        The Darwin Project demonstrates a new paradigm in AI model creation through evolutionary algorithms.
        This model showcases the fusion of different model capabilities at 1/10,000 the cost of traditional training.
        
        **Key Features:**
        - Automated model merging without manual hyperparameter tuning
        - Multi-objective optimization (accuracy, robustness, generalization)
        - 5,000+ generation evolution process
        
        [GitHub](https://github.com/yourusername/darwin-project) | [Paper](https://arxiv.org/abs/xxxx.xxxxx) (Coming Soon)
        """
    )

if __name__ == "__main__":
    demo.queue().launch(share=True)