Spaces:

ALVHB95
/

TFM_DataScience_APP

Running

App Files Files Community

TFM_DataScience_APP / app.py

ALVHB95

new model

99a1538 27 days ago

raw

history blame contribute delete

15.3 kB

	"""
	=========================================================
	app.py — Green Greta (Gradio + TF/Keras 3 + LangChain 0.3)
	- Chat tab: Blocks + Chatbot(height=...) ✅
	- LLM: meta-llama/Meta-Llama-3.1-8B-Instruct ✅
	- RAG: e5-base-v2 + (BM25+Vector) con fallback + Multi-Query + reranker ✅
	- Responde en el idioma elegido (sin pasar claves extra) ✅
	=========================================================
	"""

	import os
	import json
	import shutil

	# --- Env / telemetry (antes de imports que lo usen) ---
	os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
	os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
	os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
	os.environ.setdefault("ANONYMIZED_TELEMETRY", "false")
	os.environ.setdefault("CHROMA_TELEMETRY_ENABLED", "FALSE")
	os.environ.setdefault("USER_AGENT", "green-greta/1.0 (+contact-or-repo)")
	# Opcional: resultados CPU más estables de TF
	# os.environ.setdefault("TF_ENABLE_ONEDNN_OPTS", "0")

	import gradio as gr
	import tensorflow as tf
	from tensorflow import keras
	from PIL import Image

	import tenacity
	try:
	from fake_useragent import UserAgent
	user_agent = UserAgent().random
	except Exception:
	user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
	header_template = {"User-Agent": user_agent}

	# --- LangChain / RAG ---
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_core.prompts import ChatPromptTemplate
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory
	from langchain_community.document_loaders import WebBaseLoader
	from langchain_community.vectorstores import Chroma

	# Embeddings
	try:
	from langchain_huggingface import HuggingFaceEmbeddings
	except ImportError:
	from langchain_community.embeddings import HuggingFaceEmbeddings

	# Retrieval utilities
	from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever
	from langchain.retrievers.multi_query import MultiQueryRetriever
	from langchain.retrievers.document_compressors import CrossEncoderReranker
	from langchain_community.retrievers import BM25Retriever
	from langchain_community.cross_encoders import HuggingFaceCrossEncoder

	# HF Hub
	from huggingface_hub import snapshot_download

	# LLM via HF Inference
	from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

	# Theming + URL list
	import theme
	from url_list import URLS
	theme = theme.Theme()

	# =========================================================
	# 1) IMAGE CLASSIFICATION — Keras 3-safe SavedModel loading
	# =========================================================
	MODEL_REPO = "rocioadlc/efficientnetB0_trash"
	MODEL_SERVING_SIGNATURE = "serving_default"

	model_dir = snapshot_download(MODEL_REPO)
	image_model = keras.layers.TFSMLayer(model_dir, call_endpoint=MODEL_SERVING_SIGNATURE)

	class_labels = ["cardboard", "glass", "metal", "paper", "plastic", "trash"]

	def predict_image(input_image: Image.Image):
	img = input_image.convert("RGB").resize((224, 224))
	x = tf.keras.preprocessing.image.img_to_array(img)
	x = tf.keras.applications.efficientnet.preprocess_input(x)
	x = tf.expand_dims(x, 0)

	outputs = image_model(x)
	preds = outputs[next(iter(outputs))] if isinstance(outputs, dict) and outputs else outputs
	arr = preds.numpy() if hasattr(preds, "numpy") else preds
	probs = arr[0].tolist()
	return {label: float(probs[i]) for i, label in enumerate(class_labels)}

	image_gradio_app = gr.Interface(
	fn=predict_image,
	inputs=gr.Image(label="Image", sources=["upload", "webcam"], type="pil"),
	outputs=[gr.Label(label="Result")],
	title="<span style='color: rgb(243, 239, 224);'>Green Greta</span>",
	theme=theme,
	)

	# ============================================
	# 2) KNOWLEDGE LOADING (RAG: loader + splitter)
	# ============================================

	@tenacity.retry(wait=tenacity.wait_fixed(3), stop=tenacity.stop_after_attempt(3), reraise=True)
	def load_url(url: str):
	loader = WebBaseLoader(web_paths=[url], header_template=header_template)
	return loader.load()

	def safe_load_all_urls(urls):
	all_docs = []
	for link in urls:
	try:
	docs = load_url(link)
	all_docs.extend(docs)
	except Exception as e:
	print(f"Skipping URL due to error: {link}\nError: {e}\n")
	return all_docs

	all_loaded_docs = safe_load_all_urls(URLS)

	base_splitter = RecursiveCharacterTextSplitter(
	chunk_size=900,
	chunk_overlap=100,
	length_function=len,
	)
	docs = base_splitter.split_documents(all_loaded_docs)

	# Embeddings
	embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base-v2")

	# Vector store
	persist_directory = "docs/chroma/"
	shutil.rmtree(persist_directory, ignore_errors=True)
	vectordb = Chroma.from_documents(
	documents=docs,
	embedding=embeddings,
	persist_directory=persist_directory,
	)

	# Vector retriever
	vec_retriever = vectordb.as_retriever(search_kwargs={"k": 8}, search_type="mmr")

	# BM25 + Ensemble con fallback si falta rank-bm25
	use_bm25 = True
	try:
	bm25 = BM25Retriever.from_documents(docs) # requiere rank-bm25
	bm25.k = 8
	except Exception as e:
	print(f"[RAG] BM25 unavailable ({e}). Falling back to vector-only retriever.")
	use_bm25 = False
	bm25 = None

	if use_bm25:
	base_retriever = EnsembleRetriever(retrievers=[bm25, vec_retriever], weights=[0.4, 0.6])
	else:
	base_retriever = vec_retriever

	# ======================================
	# 3) PROMPT (sin variables extra: solo {context} y {question})
	# Instruimos al modelo a obedecer un prefijo en la propia pregunta.
	# ======================================
	SYSTEM_TEMPLATE = (
	"You are Greta, a recycling & sustainability assistant. "
	"Follow any explicit language directive at the start of the question, e.g., "
	"‘Answer ONLY in Spanish.’ If there is no directive, detect the user's language and answer accordingly. "
	"Be direct and practical. If the snippets are insufficient, say so and suggest actionable next steps.\n\n"
	"{context}\n\n"
	"Question: {question}"
	)
	qa_prompt = ChatPromptTemplate.from_template(SYSTEM_TEMPLATE)

	# ===========================================
	# 4) LLM — Hugging Face Inference (Llama 3.1 8B)
	# ===========================================
	endpoint = HuggingFaceEndpoint(
	repo_id="meta-llama/Meta-Llama-3.1-8B-Instruct",
	task="text-generation",
	max_new_tokens=900,
	temperature=0.2,
	top_k=40,
	repetition_penalty=1.05,
	return_full_text=False,
	huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
	timeout=120,
	model_kwargs={},
	)
	llm = ChatHuggingFace(llm=endpoint)

	# ===========================================
	# 5) Chain (memory + Multi-Query + reranker + compression)
	# ===========================================
	memory = ConversationBufferMemory(
	memory_key="chat_history",
	return_messages=True,
	)

	# Multi-Query (paráfrasis de la consulta)
	mqr = MultiQueryRetriever.from_llm(retriever=base_retriever, llm=llm, include_original=True)

	# Reranker (cross-encoder base)
	cross_encoder = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
	reranker = CrossEncoderReranker(model=cross_encoder, top_n=4)

	compression_retriever = ContextualCompressionRetriever(
	base_retriever=mqr,
	base_compressor=reranker,
	)

	qa_chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=compression_retriever,
	memory=memory,
	verbose=True,
	combine_docs_chain_kwargs={"prompt": qa_prompt},
	get_chat_history=lambda h: h,
	rephrase_question=False,
	return_source_documents=False,
	)

	# ===== Helper: construir prefijo de idioma en la propia pregunta =====
	def _lang_directive(lang: str) -> str:
	if not lang or lang.strip().lower() == "auto":
	return "Detect the user's language and answer in that language."
	return f"Answer ONLY in {lang}."

	def chat_interface(question: str, history, target_language: str = "Auto"):
	"""Devuelve respuesta limpia en el idioma solicitado, SIN pasar claves extra al chain."""
	try:
	directive = _lang_directive(target_language)
	combined_q = f"{directive}\n\n{question}"
	result = qa_chain.invoke({"question": combined_q})
	answer = result.get("answer", "")
	if not answer:
	return "Lo siento, no pude generar una respuesta útil con la información disponible."
	return answer
	except Exception as e:
	return (
	"Lo siento, tuve un problema procesando tu pregunta. "
	"Intenta de nuevo en un momento o formula la consulta de otra manera.\n\n"
	f"Detalle técnico: {e}"
	)

	# ============================
	# 6) Banner / Welcome content
	# ============================
	banner_tab_content = """
	<div style="background-color: #d3e3c3; text-align: center; padding: 20px; display: flex; flex-direction: column; align-items: center;">
	<img src="https://huggingface.co/spaces/ALVHB95/TFM_DataScience_APP/resolve/main/front_4.jpg" alt="Banner Image" style="width: 50%; max-width: 500px; margin: 0 auto;">
	<h1 style="font-size: 24px; color: #4e6339; margin-top: 20px;">¡Bienvenido a nuestro clasificador de imágenes y chatbot para un reciclaje más inteligente!♻️</h1>
	<p style="font-size: 16px; color: #4e6339; text-align: justify;">¿Alguna vez te has preguntado si puedes reciclar un objeto en particular? ¿O te has sentido abrumado por la cantidad de residuos que generas y no sabes cómo manejarlos de manera más sostenible? ¡Estás en el lugar correcto!</p>
	<p style="font-size: 16px; color: #4e6339; text-align: justify;">Nuestra plataforma combina la potencia de la inteligencia artificial con la comodidad de un chatbot para brindarte respuestas rápidas y precisas sobre qué objetos son reciclables y cómo hacerlo de la manera más eficiente.</p>
	<p style="font-size: 16px; text-align:center;"><strong><span style="color: #4e6339;">¿Cómo usarlo?</span></strong></p>
	<ul style="list-style-type: disc; text-align: justify; margin-top: 20px; padding-left: 20px;">
	<li style="font-size: 16px; color: #4e6339;"><strong><span style="color: #4e6339;">Green Greta Image Classification:</span></strong> Ve a la pestaña Greta Image Classification y simplemente carga una foto del objeto que quieras reciclar, y nuestro modelo identificará de qué se trata🕵️‍♂️ para que puedas desecharlo adecuadamente.</li>
	<li style="font-size: 16px; color: #4e6339;"><strong><span style="color: #4e6339;">Green Greta Chat:</span></strong> ¿Tienes preguntas sobre reciclaje, materiales específicos o prácticas sostenibles? ¡Pregunta a nuestro chatbot en la pestaña Green Greta Chat!📝 Está aquí para responder todas tus preguntas y ayudarte a tomar decisiones más informadas sobre tu reciclaje.</li>
	</ul>
	<h1 style="font-size: 24px; color: #4e6339; margin-top: 20px;">Welcome to our image classifier and chatbot for smarter recycling!♻️</h1>
	<p style="font-size: 16px; color: #4e6339; text-align: justify;">Have you ever wondered if you can recycle a particular object? Or felt overwhelmed by the amount of waste you generate and don't know how to handle it more sustainably? You're in the right place!</p>
	<p style="font-size: 16px; color: #4e6339; text-align: justify;">Our platform combines the power of artificial intelligence with the convenience of a chatbot to provide you with quick and accurate answers about which objects are recyclable and how to do it most efficiently.</p>
	<p style="font-size: 16px; text-align:center;"><strong><span style="color: #4e6339;">How to use it?</span></strong>
	<ul style="list-style-type: disc; text-align: justify; margin-top: 20px; padding-left: 20px;">
	<li style="font-size: 16px; color: #4e6339;"><strong><span style="color: #4e6339;">Green Greta Image Classification:</span></strong> Go to the Greta Image Classification tab and simply upload a photo of the object you want to recycle, and our model will identify what it is🕵️‍♂️ so you can dispose of it properly.</li>
	<li style="font-size: 16px; color: #4e6339;"><strong><span style="color: #4e6339;">Green Greta Chat:</span></strong> Have questions about recycling, specific materials, or sustainable practices? Ask our chatbot in the Green Greta Chat tab!📝 It's here to answer all your questions and help you make more informed decisions about your recycling.</li>
	</ul>
	</div>
	"""
	banner_tab = gr.Markdown(banner_tab_content)

	# ============================
	# 7) Chat tab (Blocks + Chatbot with height + language selector)
	# ============================

	SUPPORTED_LANGS = ["Auto", "English", "German", "French", "Italian", "Portuguese", "Hindi", "Spanish", "Thai"]

	# CSS: ampliar área de chat y ancho general
	custom_css = """
	.gradio-container { max-width: 1200px !important; }
	#greta-chat { height: 700px !important; }
	#greta-chat .gr-chatbot { height: 700px !important; min-height: 700px !important; }
	#greta-chat .overflow-y-auto { height: 660px !important; max-height: 660px !important; }
	"""

	def _user_submit(user_msg, history):
	"""Añade turno del usuario; el bot responde después."""
	if not user_msg:
	return "", history
	history = history + [[user_msg, None]]
	return "", history

	def _bot_respond(history, target_language):
	"""Genera la respuesta del bot en el idioma solicitado."""
	user_msg = history[-1][0]
	answer = chat_interface(user_msg, history[:-1], target_language=target_language or "Auto")
	history[-1][1] = answer
	return history

	with gr.Blocks(theme=theme, css=custom_css) as chatbot_gradio_app:
	gr.Markdown("<h1 style='text-align:center;color:#f3efe0;'>Green Greta</h1>")
	with gr.Row():
	lang_sel = gr.Dropdown(SUPPORTED_LANGS, value="Auto", label="Answer language")
	chat = gr.Chatbot(label="Chatbot", height=700, elem_id="greta-chat", show_copy_button=True)
	with gr.Row():
	msg = gr.Textbox(placeholder="Type a message…", scale=9)
	send = gr.Button("Submit", scale=1)
	with gr.Row():
	retry = gr.Button("↻ Retry")
	undo = gr.Button("↩︎ Undo")
	clear = gr.Button("🗑 Clear")

	# Envío por botón o Enter (pasamos el idioma al responder)
	send.click(_user_submit, [msg, chat], [msg, chat], queue=False).then(
	_bot_respond, [chat, lang_sel], [chat]
	)
	msg.submit(_user_submit, [msg, chat], [msg, chat], queue=False).then(
	_bot_respond, [chat, lang_sel], [chat]
	)

	# Utilidades
	clear.click(lambda: [], None, chat, queue=False)
	undo.click(lambda h: h[:-1] if h else h, chat, chat, queue=False)
	retry.click(
	lambda h: (h[:-1] + [[h[-1][0], None]]) if h else h, # reintenta la última pregunta
	chat, chat, queue=False
	).then(_bot_respond, [chat, lang_sel], [chat])

	# ============================
	# 8) Tabs + launch
	# ============================
	app = gr.TabbedInterface(
	[banner_tab, image_gradio_app, chatbot_gradio_app],
	tab_names=["Welcome to Green Greta", "Green Greta Image Classification", "Green Greta Chat"],
	theme=theme,
	)

	app.queue()
	app.launch()