Spaces:

Keyurjotaniya007
/

my-image-chatbot

Running

App Files Files Community

Keyurjotaniya007 commited on Aug 3

Commit

1a954a4

verified ·

1 Parent(s): 5e336bf

Upload 2 files

Browse files

Files changed (2) hide show

app.py +126 -60
chatbot.py +27 -0

app.py CHANGED Viewed

@@ -1,64 +1,130 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )
-if __name__ == "__main__":
-    demo.launch()

+import streamlit as st
+from PIL import Image
+import time
+from chatbot import chat_with_image
+USER_AVATAR = "👤"
+BOT_AVATAR = '<img src="https://img.icons8.com/emoji/48/robot-emoji.png" width="20"/>'
+st.set_page_config(page_title="Gemini 2.5 Image Chatbot", layout="wide")
+st.sidebar.header("🌐 Language Settings")
+selected_language = st.sidebar.selectbox(
+    "Select response language",
+    [
+        "Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Assamese", "Azerbaijani",
+        "Basque", "Belarusian", "Bengali", "Bosnian", "Bulgarian", "Burmese",
+        "Catalan", "Cebuano", "Chinese", "Corsican", "Croatian", "Czech",
+        "Danish", "Dutch", "English", "Esperanto", "Estonian",
+        "Finnish", "French", "Frisian",
+        "Galician", "Georgian", "German", "Greek", "Gujarati",
+        "Haitian Creole", "Hausa", "Hawaiian", "Hebrew", "Hindi", "Hmong", "Hungarian",
+        "Icelandic", "Igbo", "Indonesian", "Irish", "Italian",
+        "Japanese", "Javanese", "Kannada", "Kazakh", "Khmer", "Kinyarwanda", "Korean", "Kurdish",
+        "Kyrgyz", "Lao", "Latin", "Latvian", "Lithuanian", "Luxembourgish",
+        "Macedonian", "Malagasy", "Malay", "Malayalam", "Maltese", "Maori", "Marathi", "Mongolian",
+        "Nepali", "Norwegian", "Nyanja", "Odia", "Pashto", "Persian", "Polish", "Portuguese",
+        "Punjabi", "Quechua", "Romanian", "Russian", "Samoan", "Scots Gaelic", "Serbian", "Sesotho",
+        "Shona", "Sindhi", "Sinhala", "Slovak", "Slovenian", "Somali", "Spanish", "Sundanese",
+        "Swahili", "Swedish", "Tagalog", "Tajik", "Tamil", "Tatar", "Telugu", "Thai",
+        "Tigrinya", "Turkish", "Turkmen", "Ukrainian", "Urdu", "Uyghur", "Uzbek", "Vietnamese",
+        "Welsh", "Xhosa", "Yiddish", "Yoruba", "Zulu"
     ],
+    index=21
 )
+st.markdown("<h1 style='text-align: center;'>Welcome, Gemini 2.5 Flash Image Chatbot 2.0</h1>", unsafe_allow_html=True)
+if "history" not in st.session_state:
+    st.session_state.history = []
+if "images" not in st.session_state:
+    st.session_state.images = []
+if "current_image_index" not in st.session_state:
+    st.session_state.current_image_index = None
+if "last_animated_index" not in st.session_state:
+    st.session_state.last_animated_index = None
+left_col, right_col = st.columns([1, 5])
+with left_col:
+    st.header("📁 Upload Image")
+    uploaded_file = st.file_uploader(
+        "Drop an image here",
+        type=["jpg", "jpeg", "png"],
+        accept_multiple_files=False,
+        label_visibility="collapsed"
+    )
+    if uploaded_file:
+        image = Image.open(uploaded_file).convert("RGB")
+        st.session_state.images = [image]
+        st.session_state.current_image_index = 0
+        st.image(image, caption="Uploaded Image", use_container_width=True)
+    else:
+        st.session_state.images = []
+        st.session_state.current_image_index = None
+with right_col:
+    if st.session_state.images and st.session_state.current_image_index is not None:
+        current_image = st.session_state.images[st.session_state.current_image_index]
+        st.markdown("### Chat With Image:")
+        st.markdown(f"#### Language : `{selected_language}`")
+        history = st.session_state.history
+        latest_index = None
+        for idx in reversed(range(len(history))):
+            msg = history[idx]
+            if (
+                msg["sender"] == "Gemini" and
+                msg["image_index"] == st.session_state.current_image_index and
+                idx != st.session_state.last_animated_index
+            ):
+                latest_index = idx
+                break
+        for i, msg in enumerate(history):
+            if msg["image_index"] != st.session_state.current_image_index:
+                continue
+            if msg["sender"] == "You":
+                st.markdown(f"""
+                <div style="background-color: #f1f1f1; padding: 10px; border-radius: 8px; margin-bottom: 10px;">
+                <strong>{USER_AVATAR} :</strong> {msg['message']}
+                </div>
+                """, unsafe_allow_html=True)
+            elif msg["sender"] == "Gemini":
+                if i == latest_index:
+                    st.session_state.last_animated_index = i
+                    placeholder = st.empty()
+                    full_response = msg["message"]
+                    for j in range(1, len(full_response) + 1):
+                        placeholder.markdown(f"{BOT_AVATAR} : {full_response[:j]}", unsafe_allow_html=True)
+                        time.sleep(0.002)
+                else:
+                    st.markdown(f"{BOT_AVATAR} : {msg['message']}", unsafe_allow_html=True)
+        user_prompt = st.chat_input("Ask About Image")
+        if user_prompt:
+            full_prompt = f"{user_prompt}\n\nPlease respond in {selected_language}."
+            try:
+                gemini_response = chat_with_image(full_prompt, current_image)
+            except Exception as e:
+                gemini_response = f"Gemini failed to respond: {e}"
+            st.session_state.history.append({
+                "sender": "You",
+                "message": user_prompt,
+                "image_index": st.session_state.current_image_index
+            })
+            st.session_state.history.append({
+                "sender": "Gemini",
+                "message": gemini_response,
+                "image_index": st.session_state.current_image_index
+            })
+            st.rerun()
+    else:
+        st.info("Please upload an image to start chatting.")

chatbot.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import base64
+import io
+from PIL import Image
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.messages import HumanMessage
+llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.1)
+def image_to_base64(image: Image.Image) -> str:
+    buffered = io.BytesIO()
+    image.save(buffered, format="JPEG")
+    return base64.b64encode(buffered.getvalue()).decode("utf-8")
+def chat_with_image(user_prompt: str, image: Image.Image) -> str:
+    img_base64 = image_to_base64(image)
+    message = [
+        HumanMessage(content=[
+            {"type": "text", "text": user_prompt},
+            {
+                "type": "media",
+                "mime_type": "image/jpeg",
+                "data": img_base64
+            }
+        ])
+    ]
+    response = llm.invoke(message)
+    return response.content