Spaces:

amberborici
/

Qwen-Qwen2-VL-7B-Instruct

Running

amberborici commited on Aug 8

Commit

cbe84b9

1 Parent(s): f712aee

sss

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,15 @@
 import gradio as gr
-# Load the Qwen2-VL model using gr.load()
-qwen_model = gr.load("models/Qwen/Qwen2-VL-7B-Instruct")
 def process_images(images, prompt):
     """
-    Process multiple images with the Qwen2-VL model
-    Args:
-        images: List of uploaded images
-        prompt: User-provided prompt
-    Returns:
-        List of generated descriptions
     """
     if not images:
         return "Please upload at least one image."
@@ -24,17 +21,31 @@ def process_images(images, prompt):
             continue
         try:
-            # For vision models, we pass the image and text directly
-            # The model expects the image and prompt as separate arguments
-            response = qwen_model(
-                prompt,  # Text prompt
-                image    # Image file
-            )
-            # The response should be the generated text
-            description = response
-            results.append(f"Image {i+1}: {description}")
         except Exception as e:
             results.append(f"Image {i+1}: ❌ Error - {str(e)}")

 import gradio as gr
+from huggingface_hub import InferenceClient
+import base64
+import io
+from PIL import Image
+# Initialize the inference client
+client = InferenceClient()
 def process_images(images, prompt):
     """
+    Process multiple images using Hugging Face Hub Inference Client
     """
     if not images:
         return "Please upload at least one image."
             continue
         try:
+            # Convert numpy array to PIL Image
+            pil_image = Image.fromarray(image)
+            # Encode image to base64
+            buffered = io.BytesIO()
+            pil_image.save(buffered, format="JPEG")
+            img_str = base64.b64encode(buffered.getvalue()).decode()
+            base64_image = f"data:image/jpeg;base64,{img_str}"
+            # Use the inference client to generate response
+            response = client.post(
+                "Qwen/Qwen2-VL-7B-Instruct",
+                inputs={
+                    "text": prompt,
+                    "image": base64_image
+                }
+            )
+            # Extract the response
+            if response and len(response) > 0:
+                description = response[0]
+                results.append(f"Image {i+1}: {description}")
+            else:
+                results.append(f"Image {i+1}: ❌ No response from model")
         except Exception as e:
             results.append(f"Image {i+1}: ❌ Error - {str(e)}")