Spaces:
Running
Running
| """ | |
| ========================================================= | |
| app.py — Green Greta (Gradio + TF/Keras 3 + LangChain 0.3) | |
| - Chat tab: Blocks + Chatbot(height=...) ✅ | |
| - LLM: meta-llama/Meta-Llama-3.1-8B-Instruct ✅ | |
| - RAG: e5-base-v2 + (BM25+Vector) con fallback + Multi-Query + reranker ✅ | |
| - Responde en el idioma elegido (sin pasar claves extra) ✅ | |
| ========================================================= | |
| """ | |
| import os | |
| import json | |
| import shutil | |
| # --- Env / telemetry (antes de imports que lo usen) --- | |
| os.environ.setdefault("TOKENIZERS_PARALLELISM", "false") | |
| os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1") | |
| os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False") | |
| os.environ.setdefault("ANONYMIZED_TELEMETRY", "false") | |
| os.environ.setdefault("CHROMA_TELEMETRY_ENABLED", "FALSE") | |
| os.environ.setdefault("USER_AGENT", "green-greta/1.0 (+contact-or-repo)") | |
| # Opcional: resultados CPU más estables de TF | |
| # os.environ.setdefault("TF_ENABLE_ONEDNN_OPTS", "0") | |
| import gradio as gr | |
| import tensorflow as tf | |
| from tensorflow import keras | |
| from PIL import Image | |
| import tenacity | |
| try: | |
| from fake_useragent import UserAgent | |
| user_agent = UserAgent().random | |
| except Exception: | |
| user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" | |
| header_template = {"User-Agent": user_agent} | |
| # --- LangChain / RAG --- | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain_community.document_loaders import WebBaseLoader | |
| from langchain_community.vectorstores import Chroma | |
| # Embeddings | |
| try: | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| except ImportError: | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| # Retrieval utilities | |
| from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever | |
| from langchain.retrievers.multi_query import MultiQueryRetriever | |
| from langchain.retrievers.document_compressors import CrossEncoderReranker | |
| from langchain_community.retrievers import BM25Retriever | |
| from langchain_community.cross_encoders import HuggingFaceCrossEncoder | |
| # HF Hub | |
| from huggingface_hub import snapshot_download | |
| # LLM via HF Inference | |
| from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint | |
| # Theming + URL list | |
| import theme | |
| from url_list import URLS | |
| theme = theme.Theme() | |
| # ========================================================= | |
| # 1) IMAGE CLASSIFICATION — Keras 3-safe SavedModel loading | |
| # ========================================================= | |
| MODEL_REPO = "rocioadlc/efficientnetB0_trash" | |
| MODEL_SERVING_SIGNATURE = "serving_default" | |
| model_dir = snapshot_download(MODEL_REPO) | |
| image_model = keras.layers.TFSMLayer(model_dir, call_endpoint=MODEL_SERVING_SIGNATURE) | |
| class_labels = ["cardboard", "glass", "metal", "paper", "plastic", "trash"] | |
| def predict_image(input_image: Image.Image): | |
| img = input_image.convert("RGB").resize((224, 224)) | |
| x = tf.keras.preprocessing.image.img_to_array(img) | |
| x = tf.keras.applications.efficientnet.preprocess_input(x) | |
| x = tf.expand_dims(x, 0) | |
| outputs = image_model(x) | |
| preds = outputs[next(iter(outputs))] if isinstance(outputs, dict) and outputs else outputs | |
| arr = preds.numpy() if hasattr(preds, "numpy") else preds | |
| probs = arr[0].tolist() | |
| return {label: float(probs[i]) for i, label in enumerate(class_labels)} | |
| image_gradio_app = gr.Interface( | |
| fn=predict_image, | |
| inputs=gr.Image(label="Image", sources=["upload", "webcam"], type="pil"), | |
| outputs=[gr.Label(label="Result")], | |
| title="<span style='color: rgb(243, 239, 224);'>Green Greta</span>", | |
| theme=theme, | |
| ) | |
| # ============================================ | |
| # 2) KNOWLEDGE LOADING (RAG: loader + splitter) | |
| # ============================================ | |
| def load_url(url: str): | |
| loader = WebBaseLoader(web_paths=[url], header_template=header_template) | |
| return loader.load() | |
| def safe_load_all_urls(urls): | |
| all_docs = [] | |
| for link in urls: | |
| try: | |
| docs = load_url(link) | |
| all_docs.extend(docs) | |
| except Exception as e: | |
| print(f"Skipping URL due to error: {link}\nError: {e}\n") | |
| return all_docs | |
| all_loaded_docs = safe_load_all_urls(URLS) | |
| base_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=900, | |
| chunk_overlap=100, | |
| length_function=len, | |
| ) | |
| docs = base_splitter.split_documents(all_loaded_docs) | |
| # Embeddings | |
| embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base-v2") | |
| # Vector store | |
| persist_directory = "docs/chroma/" | |
| shutil.rmtree(persist_directory, ignore_errors=True) | |
| vectordb = Chroma.from_documents( | |
| documents=docs, | |
| embedding=embeddings, | |
| persist_directory=persist_directory, | |
| ) | |
| # Vector retriever | |
| vec_retriever = vectordb.as_retriever(search_kwargs={"k": 8}, search_type="mmr") | |
| # BM25 + Ensemble con fallback si falta rank-bm25 | |
| use_bm25 = True | |
| try: | |
| bm25 = BM25Retriever.from_documents(docs) # requiere rank-bm25 | |
| bm25.k = 8 | |
| except Exception as e: | |
| print(f"[RAG] BM25 unavailable ({e}). Falling back to vector-only retriever.") | |
| use_bm25 = False | |
| bm25 = None | |
| if use_bm25: | |
| base_retriever = EnsembleRetriever(retrievers=[bm25, vec_retriever], weights=[0.4, 0.6]) | |
| else: | |
| base_retriever = vec_retriever | |
| # ====================================== | |
| # 3) PROMPT (sin variables extra: solo {context} y {question}) | |
| # Instruimos al modelo a obedecer un prefijo en la propia pregunta. | |
| # ====================================== | |
| SYSTEM_TEMPLATE = ( | |
| "You are Greta, a recycling & sustainability assistant. " | |
| "Follow any explicit language directive at the start of the question, e.g., " | |
| "‘Answer ONLY in Spanish.’ If there is no directive, detect the user's language and answer accordingly. " | |
| "Be direct and practical. If the snippets are insufficient, say so and suggest actionable next steps.\n\n" | |
| "{context}\n\n" | |
| "Question: {question}" | |
| ) | |
| qa_prompt = ChatPromptTemplate.from_template(SYSTEM_TEMPLATE) | |
| # =========================================== | |
| # 4) LLM — Hugging Face Inference (Llama 3.1 8B) | |
| # =========================================== | |
| endpoint = HuggingFaceEndpoint( | |
| repo_id="meta-llama/Meta-Llama-3.1-8B-Instruct", | |
| task="text-generation", | |
| max_new_tokens=900, | |
| temperature=0.2, | |
| top_k=40, | |
| repetition_penalty=1.05, | |
| return_full_text=False, | |
| huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"), | |
| timeout=120, | |
| model_kwargs={}, | |
| ) | |
| llm = ChatHuggingFace(llm=endpoint) | |
| # =========================================== | |
| # 5) Chain (memory + Multi-Query + reranker + compression) | |
| # =========================================== | |
| memory = ConversationBufferMemory( | |
| memory_key="chat_history", | |
| return_messages=True, | |
| ) | |
| # Multi-Query (paráfrasis de la consulta) | |
| mqr = MultiQueryRetriever.from_llm(retriever=base_retriever, llm=llm, include_original=True) | |
| # Reranker (cross-encoder base) | |
| cross_encoder = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base") | |
| reranker = CrossEncoderReranker(model=cross_encoder, top_n=4) | |
| compression_retriever = ContextualCompressionRetriever( | |
| base_retriever=mqr, | |
| base_compressor=reranker, | |
| ) | |
| qa_chain = ConversationalRetrievalChain.from_llm( | |
| llm=llm, | |
| retriever=compression_retriever, | |
| memory=memory, | |
| verbose=True, | |
| combine_docs_chain_kwargs={"prompt": qa_prompt}, | |
| get_chat_history=lambda h: h, | |
| rephrase_question=False, | |
| return_source_documents=False, | |
| ) | |
| # ===== Helper: construir prefijo de idioma en la propia pregunta ===== | |
| def _lang_directive(lang: str) -> str: | |
| if not lang or lang.strip().lower() == "auto": | |
| return "Detect the user's language and answer in that language." | |
| return f"Answer ONLY in {lang}." | |
| def chat_interface(question: str, history, target_language: str = "Auto"): | |
| """Devuelve respuesta limpia en el idioma solicitado, SIN pasar claves extra al chain.""" | |
| try: | |
| directive = _lang_directive(target_language) | |
| combined_q = f"{directive}\n\n{question}" | |
| result = qa_chain.invoke({"question": combined_q}) | |
| answer = result.get("answer", "") | |
| if not answer: | |
| return "Lo siento, no pude generar una respuesta útil con la información disponible." | |
| return answer | |
| except Exception as e: | |
| return ( | |
| "Lo siento, tuve un problema procesando tu pregunta. " | |
| "Intenta de nuevo en un momento o formula la consulta de otra manera.\n\n" | |
| f"Detalle técnico: {e}" | |
| ) | |
| # ============================ | |
| # 6) Banner / Welcome content | |
| # ============================ | |
| banner_tab_content = """ | |
| <div style="background-color: #d3e3c3; text-align: center; padding: 20px; display: flex; flex-direction: column; align-items: center;"> | |
| <img src="https://huggingface.co/spaces/ALVHB95/TFM_DataScience_APP/resolve/main/front_4.jpg" alt="Banner Image" style="width: 50%; max-width: 500px; margin: 0 auto;"> | |
| <h1 style="font-size: 24px; color: #4e6339; margin-top: 20px;">¡Bienvenido a nuestro clasificador de imágenes y chatbot para un reciclaje más inteligente!♻️</h1> | |
| <p style="font-size: 16px; color: #4e6339; text-align: justify;">¿Alguna vez te has preguntado si puedes reciclar un objeto en particular? ¿O te has sentido abrumado por la cantidad de residuos que generas y no sabes cómo manejarlos de manera más sostenible? ¡Estás en el lugar correcto!</p> | |
| <p style="font-size: 16px; color: #4e6339; text-align: justify;">Nuestra plataforma combina la potencia de la inteligencia artificial con la comodidad de un chatbot para brindarte respuestas rápidas y precisas sobre qué objetos son reciclables y cómo hacerlo de la manera más eficiente.</p> | |
| <p style="font-size: 16px; text-align:center;"><strong><span style="color: #4e6339;">¿Cómo usarlo?</span></strong></p> | |
| <ul style="list-style-type: disc; text-align: justify; margin-top: 20px; padding-left: 20px;"> | |
| <li style="font-size: 16px; color: #4e6339;"><strong><span style="color: #4e6339;">Green Greta Image Classification:</span></strong> Ve a la pestaña Greta Image Classification y simplemente carga una foto del objeto que quieras reciclar, y nuestro modelo identificará de qué se trata🕵️♂️ para que puedas desecharlo adecuadamente.</li> | |
| <li style="font-size: 16px; color: #4e6339;"><strong><span style="color: #4e6339;">Green Greta Chat:</span></strong> ¿Tienes preguntas sobre reciclaje, materiales específicos o prácticas sostenibles? ¡Pregunta a nuestro chatbot en la pestaña Green Greta Chat!📝 Está aquí para responder todas tus preguntas y ayudarte a tomar decisiones más informadas sobre tu reciclaje.</li> | |
| </ul> | |
| <h1 style="font-size: 24px; color: #4e6339; margin-top: 20px;">Welcome to our image classifier and chatbot for smarter recycling!♻️</h1> | |
| <p style="font-size: 16px; color: #4e6339; text-align: justify;">Have you ever wondered if you can recycle a particular object? Or felt overwhelmed by the amount of waste you generate and don't know how to handle it more sustainably? You're in the right place!</p> | |
| <p style="font-size: 16px; color: #4e6339; text-align: justify;">Our platform combines the power of artificial intelligence with the convenience of a chatbot to provide you with quick and accurate answers about which objects are recyclable and how to do it most efficiently.</p> | |
| <p style="font-size: 16px; text-align:center;"><strong><span style="color: #4e6339;">How to use it?</span></strong> | |
| <ul style="list-style-type: disc; text-align: justify; margin-top: 20px; padding-left: 20px;"> | |
| <li style="font-size: 16px; color: #4e6339;"><strong><span style="color: #4e6339;">Green Greta Image Classification:</span></strong> Go to the Greta Image Classification tab and simply upload a photo of the object you want to recycle, and our model will identify what it is🕵️♂️ so you can dispose of it properly.</li> | |
| <li style="font-size: 16px; color: #4e6339;"><strong><span style="color: #4e6339;">Green Greta Chat:</span></strong> Have questions about recycling, specific materials, or sustainable practices? Ask our chatbot in the Green Greta Chat tab!📝 It's here to answer all your questions and help you make more informed decisions about your recycling.</li> | |
| </ul> | |
| </div> | |
| """ | |
| banner_tab = gr.Markdown(banner_tab_content) | |
| # ============================ | |
| # 7) Chat tab (Blocks + Chatbot with height + language selector) | |
| # ============================ | |
| SUPPORTED_LANGS = ["Auto", "English", "German", "French", "Italian", "Portuguese", "Hindi", "Spanish", "Thai"] | |
| # CSS: ampliar área de chat y ancho general | |
| custom_css = """ | |
| .gradio-container { max-width: 1200px !important; } | |
| #greta-chat { height: 700px !important; } | |
| #greta-chat .gr-chatbot { height: 700px !important; min-height: 700px !important; } | |
| #greta-chat .overflow-y-auto { height: 660px !important; max-height: 660px !important; } | |
| """ | |
| def _user_submit(user_msg, history): | |
| """Añade turno del usuario; el bot responde después.""" | |
| if not user_msg: | |
| return "", history | |
| history = history + [[user_msg, None]] | |
| return "", history | |
| def _bot_respond(history, target_language): | |
| """Genera la respuesta del bot en el idioma solicitado.""" | |
| user_msg = history[-1][0] | |
| answer = chat_interface(user_msg, history[:-1], target_language=target_language or "Auto") | |
| history[-1][1] = answer | |
| return history | |
| with gr.Blocks(theme=theme, css=custom_css) as chatbot_gradio_app: | |
| gr.Markdown("<h1 style='text-align:center;color:#f3efe0;'>Green Greta</h1>") | |
| with gr.Row(): | |
| lang_sel = gr.Dropdown(SUPPORTED_LANGS, value="Auto", label="Answer language") | |
| chat = gr.Chatbot(label="Chatbot", height=700, elem_id="greta-chat", show_copy_button=True) | |
| with gr.Row(): | |
| msg = gr.Textbox(placeholder="Type a message…", scale=9) | |
| send = gr.Button("Submit", scale=1) | |
| with gr.Row(): | |
| retry = gr.Button("↻ Retry") | |
| undo = gr.Button("↩︎ Undo") | |
| clear = gr.Button("🗑 Clear") | |
| # Envío por botón o Enter (pasamos el idioma al responder) | |
| send.click(_user_submit, [msg, chat], [msg, chat], queue=False).then( | |
| _bot_respond, [chat, lang_sel], [chat] | |
| ) | |
| msg.submit(_user_submit, [msg, chat], [msg, chat], queue=False).then( | |
| _bot_respond, [chat, lang_sel], [chat] | |
| ) | |
| # Utilidades | |
| clear.click(lambda: [], None, chat, queue=False) | |
| undo.click(lambda h: h[:-1] if h else h, chat, chat, queue=False) | |
| retry.click( | |
| lambda h: (h[:-1] + [[h[-1][0], None]]) if h else h, # reintenta la última pregunta | |
| chat, chat, queue=False | |
| ).then(_bot_respond, [chat, lang_sel], [chat]) | |
| # ============================ | |
| # 8) Tabs + launch | |
| # ============================ | |
| app = gr.TabbedInterface( | |
| [banner_tab, image_gradio_app, chatbot_gradio_app], | |
| tab_names=["Welcome to Green Greta", "Green Greta Image Classification", "Green Greta Chat"], | |
| theme=theme, | |
| ) | |
| app.queue() | |
| app.launch() | |