# ============================= # app.py for Cass Beta 2 Chat # ============================= import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel # ============================= # MODEL SETTINGS # ============================= BASE_MODEL_ID = "ibm-granite/granite-4.0-micro-base" PEFT_MODEL_ID = "DSDUDEd/Cass-Beta2.0" print("🚀 Loading base model and PEFT adapter...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID) # Load base model base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_ID) # Load PEFT model model = PeftModel.from_pretrained(base_model, PEFT_MODEL_ID) # Move to device device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) model.eval() print(f"✅ Model loaded on {device.upper()}") # ============================= # CHAT HISTORY # ============================= history = [] # ============================= # GENERATION FUNCTION # ============================= def chat(user_input): """Generates AI response given user input""" global history history.append(("User", user_input)) # Prepare input inputs = tokenizer(user_input, return_tensors="pt").to(device) # Generate output outputs = model.generate( **inputs, max_new_tokens=150, pad_token_id=tokenizer.eos_token_id ) ai_response = tokenizer.decode(outputs[0], skip_special_tokens=True) history.append(("Cass", ai_response)) # Format chat chat_text = "" for role, message in history: chat_text += f"{role}: {message}\n\n" return chat_text # ============================= # GRADIO INTERFACE # ============================= with gr.Blocks() as demo: gr.Markdown("## Chat with Cass Beta 2 🤖") chatbox = gr.Textbox(label="Your message", placeholder="Type your message here...", lines=2) send_button = gr.Button("Send") output = gr.Textbox(label="Chat History", interactive=False, lines=20) send_button.click(chat, inputs=chatbox, outputs=output) chatbox.submit(chat, inputs=chatbox, outputs=output) # ============================= # RUN APP # ============================= if __name__ == "__main__": demo.launch()