import gradio as gr
from transformers import pipeline

# Load the model locally
MODEL = "Intelligent-Internet/II-Medical-8B"

print("🔄 Loading medical model locally...")
medical_tutor = pipeline(
    "text-generation",
    model=MODEL,
    device=-1,  # Use CPU
    torch_dtype="auto"
)
print("✅ Model loaded!")

def chat(message, history):
    # Simple medical tutoring prompt
    prompt = f"""You are a medical tutor. Provide educational information about: {message}

Remember: This is for learning purposes only, not medical advice.
Answer:"""
    
    response = medical_tutor(
        prompt,
        max_new_tokens=150,
        temperature=0.7,
        do_sample=True,
        pad_token_id=medical_tutor.tokenizer.eos_token_id
    )[0]['generated_text']
    
    # Extract just the answer part
    answer = response.split("Answer:")[-1].strip()
    return answer

gr.ChatInterface(
    chat,
    title="🩺 Medical Tutor",
    
).launch(server_port=7860)