Spaces:
Running
Running
File size: 4,595 Bytes
f98a7e0 baaf291 13a64ed b441326 f98a7e0 79509b1 6f40086 f98a7e0 baaf291 b32d25e a18b9c3 f98a7e0 baaf291 79509b1 f98a7e0 6f40086 f98a7e0 6f40086 f98a7e0 6f40086 f98a7e0 baaf291 79509b1 f98a7e0 6f40086 f98a7e0 baaf291 f98a7e0 baaf291 f98a7e0 79509b1 baaf291 f98a7e0 a18b9c3 f98a7e0 a18b9c3 79509b1 f98a7e0 6f40086 f98a7e0 6f40086 f98a7e0 6995ea8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# app.py β Simplified for Hugging Face Spaces
# ---------------------------------------------------------------
# This version uses the high-level `pipeline` from transformers
# for a much simpler and cleaner implementation.
# ---------------------------------------------------------------
import os
import torch
import gradio as gr
from transformers import pipeline
# ββ Configuration ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Set the model repository ID
MODEL_ID = "Reubencf/gemma3-goan-finetuned"
HF_TOKEN = os.getenv("HF_TOKEN") # Optional: for private models
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TITLE = "π΄ Gemma Goan Q&A Bot"
DESCRIPTION = (
"This is a simple Gradio chat interface for the Gemma model fine-tuned on a Goan Q&A dataset.\n"
"Ask about Goa, Konkani culture, or general topics!"
)
# ββ Load Model Pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββββ
# We load the model and tokenizer into a pipeline object.
# This is done only once when the app starts.
# `device_map="auto"` ensures the model is placed on a GPU if available.
print(f"[Init] Loading model pipeline: {MODEL_ID} on {DEVICE}...")
try:
pipe = pipeline(
"text-generation",
model=MODEL_ID,
torch_dtype=torch.bfloat16, # Use bfloat16 for better performance
device_map="auto",
token=HF_TOKEN,
)
MODEL_LOADED = True
print("[Init] Model pipeline loaded successfully.")
except Exception as e:
MODEL_LOADED = False
DESCRIPTION = f"β Model failed to load: {e}"
print(f"[Fatal] Could not load model: {e}")
# ββ Generation Function ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def generate_response(message, history):
"""
This function is called for each user message.
It takes the user's message and the conversation history,
formats them for the model, and returns the model's response.
"""
if not MODEL_LOADED:
return "β οΈ Model is not available. Please check the Space logs for errors."
# Format the conversation history into the format expected by the model
# The model expects a list of dictionaries with "role" and "content" keys
conversation = []
for user_msg, assistant_msg in history:
conversation.append({"role": "user", "content": user_msg})
if assistant_msg:
conversation.append({"role": "assistant", "content": assistant_msg})
# Add the current user's message
conversation.append({"role": "user", "content": message})
# Use the pipeline's tokenizer to apply the chat template
# This correctly formats the input for the conversational model
prompt = pipe.tokenizer.apply_chat_template(
conversation,
tokenize=False,
add_generation_prompt=True
)
# Generate the response using the pipeline
outputs = pipe(
prompt,
do_sample=True,
temperature=0.7,
top_k=50,
top_p=0.95
)
# The pipeline output includes the entire conversation history (prompt).
# We need to extract only the newly generated text from the assistant.
response = outputs[0]["generated_text"]
# Slice the response to get only the new part
new_response = response[len(prompt):].strip()
return new_response
# ββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Define some example questions to display in the UI
examples = [
"What is bebinca?",
"Tell me about the history of Feni.",
"Suggest a good, quiet beach in South Goa.",
"Describe Goan fish curry.",
]
# Create the Gradio ChatInterface
demo = gr.ChatInterface(
fn=generate_response,
title=TITLE,
description=DESCRIPTION,
examples=examples,
theme="soft",
)
# ββ Launch ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if __name__ == "__main__":
print("π Starting Gradio app...")
demo.launch()
|