Spaces:

konkani
/

Goan-information

Running

App Files Files Community

Goan-information / app.py

Reubencf

Update app.py

f98a7e0 verified 3 months ago

raw

history blame contribute delete

4.6 kB

	# app.py — Simplified for Hugging Face Spaces
	# ---------------------------------------------------------------
	# This version uses the high-level `pipeline` from transformers
	# for a much simpler and cleaner implementation.
	# ---------------------------------------------------------------
	import os
	import torch
	import gradio as gr
	from transformers import pipeline

	# ── Configuration ──────────────────────────────────────────────────────────────
	# Set the model repository ID
	MODEL_ID = "Reubencf/gemma3-goan-finetuned"
	HF_TOKEN = os.getenv("HF_TOKEN") # Optional: for private models

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	TITLE = "🌴 Gemma Goan Q&A Bot"
	DESCRIPTION = (
	"This is a simple Gradio chat interface for the Gemma model fine-tuned on a Goan Q&A dataset.\n"
	"Ask about Goa, Konkani culture, or general topics!"
	)

	# ── Load Model Pipeline ─────────────────────────────────────────────────────
	# We load the model and tokenizer into a pipeline object.
	# This is done only once when the app starts.
	# `device_map="auto"` ensures the model is placed on a GPU if available.
	print(f"[Init] Loading model pipeline: {MODEL_ID} on {DEVICE}...")
	try:
	pipe = pipeline(
	"text-generation",
	model=MODEL_ID,
	torch_dtype=torch.bfloat16, # Use bfloat16 for better performance
	device_map="auto",
	token=HF_TOKEN,
	)
	MODEL_LOADED = True
	print("[Init] Model pipeline loaded successfully.")
	except Exception as e:
	MODEL_LOADED = False
	DESCRIPTION = f"❌ Model failed to load: {e}"
	print(f"[Fatal] Could not load model: {e}")


	# ── Generation Function ──────────────────────────────────────────────────────
	def generate_response(message, history):
	"""
	This function is called for each user message.
	It takes the user's message and the conversation history,
	formats them for the model, and returns the model's response.
	"""
	if not MODEL_LOADED:
	return "⚠️ Model is not available. Please check the Space logs for errors."

	# Format the conversation history into the format expected by the model
	# The model expects a list of dictionaries with "role" and "content" keys
	conversation = []
	for user_msg, assistant_msg in history:
	conversation.append({"role": "user", "content": user_msg})
	if assistant_msg:
	conversation.append({"role": "assistant", "content": assistant_msg})

	# Add the current user's message
	conversation.append({"role": "user", "content": message})

	# Use the pipeline's tokenizer to apply the chat template
	# This correctly formats the input for the conversational model
	prompt = pipe.tokenizer.apply_chat_template(
	conversation,
	tokenize=False,
	add_generation_prompt=True
	)

	# Generate the response using the pipeline
	outputs = pipe(
	prompt,
	do_sample=True,
	temperature=0.7,
	top_k=50,
	top_p=0.95
	)

	# The pipeline output includes the entire conversation history (prompt).
	# We need to extract only the newly generated text from the assistant.
	response = outputs[0]["generated_text"]
	# Slice the response to get only the new part
	new_response = response[len(prompt):].strip()

	return new_response

	# ── UI ────────────────────────────────────────────────────────────────────────
	# Define some example questions to display in the UI
	examples = [
	"What is bebinca?",
	"Tell me about the history of Feni.",
	"Suggest a good, quiet beach in South Goa.",
	"Describe Goan fish curry.",
	]

	# Create the Gradio ChatInterface
	demo = gr.ChatInterface(
	fn=generate_response,
	title=TITLE,
	description=DESCRIPTION,
	examples=examples,
	theme="soft",
	)

	# ── Launch ────────────────────────────────────────────────────────────────────
	if __name__ == "__main__":
	print("🚀 Starting Gradio app...")
	demo.launch()