Spaces:

akhaliq
/

MiniCPM-V-4_5

Running on Zero

App Files Files Community

akhaliq HF Staff commited on Aug 28

Commit

1bf7e3b

verified ·

1 Parent(s): 4ba551f

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +166 -0

app.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import gradio as gr
+import torch
+from PIL import Image
+from transformers import AutoModel, AutoTokenizer
+import spaces
+# Initialize model and tokenizer
+torch.manual_seed(100)
+model = AutoModel.from_pretrained(
+    'openbmb/MiniCPM-V-4_5',
+    trust_remote_code=True,
+    attn_implementation='sdpa',
+    torch_dtype=torch.bfloat16
+)
+model = model.eval().cuda()
+tokenizer = AutoTokenizer.from_pretrained(
+    'openbmb/MiniCPM-V-4_5',
+    trust_remote_code=True
+)
+@spaces.GPU
+def respond(message, history, enable_thinking):
+    """
+    Process user message and generate response
+    """
+    # Build conversation history in the format expected by the model
+    msgs = []
+    # Add previous conversation history
+    for h in history:
+        user_msg = h[0]
+        assistant_msg = h[1]
+        # Parse user message for images and text
+        user_content = []
+        if isinstance(user_msg, tuple):
+            # If user message contains an image
+            img_path, text = user_msg
+            img = Image.open(img_path).convert('RGB')
+            user_content = [img, text] if text else [img]
+        else:
+            # Text only message
+            user_content = [user_msg]
+        msgs.append({"role": "user", "content": user_content})
+        if assistant_msg:
+            msgs.append({"role": "assistant", "content": [assistant_msg]})
+    # Add current message
+    current_content = []
+    if isinstance(message, dict):
+        # Handle multimodal input
+        if message.get("files"):
+            for file_path in message["files"]:
+                img = Image.open(file_path).convert('RGB')
+                current_content.append(img)
+        if message.get("text"):
+            current_content.append(message["text"])
+    else:
+        # Handle text-only input
+        current_content = [message]
+    msgs.append({"role": "user", "content": current_content})
+    # Generate response
+    try:
+        answer = model.chat(
+            msgs=msgs,
+            tokenizer=tokenizer,
+            enable_thinking=enable_thinking
+        )
+        return answer
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="MiniCPM-V Chatbot") as demo:
+    gr.Markdown(
+        """
+        # 🤖 MiniCPM-V Multimodal Chatbot
+        Upload images and ask questions about them, or have a text conversation.
+        The model supports multi-turn conversations with context memory.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=4):
+            chatbot = gr.Chatbot(
+                height=500,
+                show_label=False,
+                container=True,
+                type="tuples"
+            )
+            with gr.Row():
+                msg = gr.MultimodalTextbox(
+                    interactive=True,
+                    file_types=["image"],
+                    placeholder="Type a message or upload an image...",
+                    show_label=False,
+                    container=False
+                )
+            with gr.Row():
+                clear = gr.Button("🗑️ Clear", size="sm")
+                submit = gr.Button("📤 Send", variant="primary", size="sm")
+        with gr.Column(scale=1):
+            gr.Markdown("### Settings")
+            enable_thinking = gr.Checkbox(
+                label="Enable Thinking Mode",
+                value=False,
+                info="Enable the model's thinking process"
+            )
+            gr.Markdown(
+                """
+                ### Examples
+                - Upload an image and ask "What is in this picture?"
+                - Ask "What are the main objects visible?"
+                - Follow up with "What should I pay attention to here?"
+                """
+            )
+    # Handle message submission
+    def user_submit(message, history, enable_thinking):
+        # Format the user message for display
+        if isinstance(message, dict) and message.get("files"):
+            # If there are files, create tuple format for chatbot display
+            user_msg = (message["files"][0], message.get("text", ""))
+        else:
+            user_msg = message.get("text", "") if isinstance(message, dict) else message
+        # Add user message to history
+        history = history + [(user_msg, None)]
+        # Generate response
+        response = respond(message, history[:-1], enable_thinking)
+        # Update history with response
+        history[-1] = (history[-1][0], response)
+        return "", history
+    # Event handlers
+    msg.submit(
+        user_submit,
+        inputs=[msg, chatbot, enable_thinking],
+        outputs=[msg, chatbot]
+    )
+    submit.click(
+        user_submit,
+        inputs=[msg, chatbot, enable_thinking],
+        outputs=[msg, chatbot]
+    )
+    clear.click(
+        lambda: (None, []),
+        outputs=[msg, chatbot]
+    )
+if __name__ == "__main__":
+    demo.launch(share=True)