Spaces:

obichimav
/

Object-Detection-and-Plant-Analysis-System

Sleeping

App Files Files Community

obichimav commited on Jun 9

Commit

3aab296

verified ·

1 Parent(s): df109e5

Update app.py

Browse files

Files changed (1) hide show

app.py +351 -12

app.py CHANGED Viewed

@@ -1,16 +1,355 @@
-import numpy as np
 import gradio as gr
-def sepia(input_img):
-    sepia_filter = np.array([
-        [0.393, 0.769, 0.189],
-        [0.349, 0.686, 0.168],
-        [0.272, 0.534, 0.131]
-    ])
-    sepia_img = input_img.dot(sepia_filter.T)
-    sepia_img /= sepia_img.max()
-    return sepia_img
-demo = gr.Interface(sepia, gr.Image(), "image")
 if __name__ == "__main__":
     demo.launch()

+import os
+import openai
 import gradio as gr
+import numpy as np
+import torch
+from PIL import Image
+import matplotlib.pyplot as plt
+import importlib.util
+from transformers import pipeline
+import requests
+# Set your OpenAI API key (ensure the environment variable is set or replace with your key)
+openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key-here")
+def install_sam2_if_needed():
+    """
+    Check if SAM2 is installed, and install it if needed.
+    """
+    if importlib.util.find_spec("sam2") is not None:
+        print("SAM2 is already installed.")
+        return
+    try:
+        import pip
+        print("Installing SAM2 from GitHub...")
+        pip.main(['install', 'git+https://github.com/facebookresearch/sam2.git'])
+        print("SAM2 installed successfully.")
+    except Exception as e:
+        print(f"Error installing SAM2: {e}")
+        print("You may need to manually install SAM2: !pip install git+https://github.com/facebookresearch/sam2.git")
+        raise
+def detect_objects_owlv2(text_query, image, threshold=0.1):
+    """
+    Detect objects in an image using OWLv2 model.
+    Args:
+        text_query (str): Text description of objects to detect
+        image (PIL.Image or numpy.ndarray): Input image
+        threshold (float): Detection threshold
+    Returns:
+        list: List of detections with bbox, label, and score
+    """
+    # Initialize the OWL-ViT model
+    detector = pipeline(model="google/owlv2-base-patch16-ensemble", task="zero-shot-object-detection")
+    # Convert numpy array to PIL Image if needed
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    # Run detection
+    predictions = detector(image, candidate_labels=[text_query])
+    # Filter by threshold and format results
+    detections = []
+    for pred in predictions:
+        if pred['score'] >= threshold:
+            bbox = pred['box']
+            # Normalize bbox coordinates (OWL-ViT returns absolute coordinates)
+            width, height = image.size
+            normalized_bbox = [
+                bbox['xmin'] / width,
+                bbox['ymin'] / height,
+                bbox['xmax'] / width,
+                bbox['ymax'] / height
+            ]
+            detection = {
+                'label': pred['label'],
+                'bbox': normalized_bbox,
+                'score': pred['score']
+            }
+            detections.append(detection)
+    return detections
+def generate_masks_from_detections(detections, image, model_name="facebook/sam2-hiera-large"):
+    """
+    Generate segmentation masks for objects detected by OWLv2 using SAM2 from Hugging Face.
+    Args:
+        detections (list): List of detections [{'label': str, 'bbox': [x1, y1, x2, y2], 'score': float}, ...]
+        image (PIL.Image.Image or str): The image or path to the image to analyze
+        model_name (str): Hugging Face model name for SAM2.
+    Returns:
+        list: List of detections with added 'mask' arrays.
+    """
+    install_sam2_if_needed()
+    from sam2.sam2_image_predictor import SAM2ImagePredictor
+    # Load image
+    if isinstance(image, str):
+        image = Image.open(image)
+    elif isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    image_np = np.array(image.convert("RGB"))
+    H, W = image_np.shape[:2]
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Using device: {device}")
+    print(f"Loading SAM2 model from Hugging Face: {model_name}")
+    predictor = SAM2ImagePredictor.from_pretrained(model_name)
+    predictor.model.to(device)
+    # Convert normalized bboxes to pixels
+    input_boxes = []
+    for det in detections:
+        x1, y1, x2, y2 = det['bbox']
+        input_boxes.append([int(x1 * W), int(y1 * H), int(x2 * W), int(y2 * H)])
+    input_boxes = np.array(input_boxes)
+    print(f"Processing image and predicting masks for {len(input_boxes)} boxes...")
+    with torch.inference_mode():
+        predictor.set_image(image_np)
+        if device == "cuda":
+            with torch.autocast("cuda", dtype=torch.bfloat16):
+                masks, scores, _ = predictor.predict(
+                    point_coords=None, point_labels=None,
+                    box=input_boxes, multimask_output=False
+                )
+        else:
+            masks, scores, _ = predictor.predict(
+                point_coords=None, point_labels=None,
+                box=input_boxes, multimask_output=False
+            )
+    # Attach masks to detections, handling both (1,H,W) and (H,W) outputs
+    results = []
+    for i, det in enumerate(detections):
+        raw = masks[i]
+        if raw.ndim == 3:
+            mask = raw[0]
+        else:
+            mask = raw
+        mask = mask.astype(np.uint8)
+        new_det = det.copy()
+        new_det['mask'] = mask
+        results.append(new_det)
+    print(f"Successfully generated {len(results)} masks.")
+    return results
+def overlay_detections_on_image(image, detections_with_masks, show_masks=True, show_boxes=True, show_labels=True):
+    """
+    Overlay detections (boxes and/or masks) on the image and return as numpy array.
+    Args:
+        image: Input image (PIL.Image or numpy array)
+        detections_with_masks: List of detections with masks
+        show_masks: Whether to show segmentation masks
+        show_boxes: Whether to show bounding boxes
+        show_labels: Whether to show labels
+    Returns:
+        numpy.ndarray: Image with overlaid detections
+    """
+    # Convert to PIL Image if needed
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    image_np = np.array(image.convert("RGB"))
+    height, width = image_np.shape[:2]
+    # Create figure without displaying
+    fig, ax = plt.subplots(1, 1, figsize=(12, 8))
+    ax.imshow(image_np)
+    # Define colors for different instances
+    colors = plt.cm.tab10(np.linspace(0, 1, 10))
+    # Plot each detection
+    for i, detection in enumerate(detections_with_masks):
+        bbox = detection['bbox']
+        label = detection['label']
+        score = detection['score']
+        # Convert normalized bbox to pixel coordinates
+        x1, y1, x2, y2 = bbox
+        x1_px, y1_px = int(x1 * width), int(y1 * height)
+        x2_px, y2_px = int(x2 * width), int(y2 * height)
+        # Color for this instance
+        color = colors[i % len(colors)]
+        # Display mask if available and requested
+        if show_masks and 'mask' in detection:
+            mask = detection['mask']
+            mask_color = np.zeros((height, width, 4), dtype=np.float32)
+            mask_color[mask > 0] = [color[0], color[1], color[2], 0.5]
+            ax.imshow(mask_color)
+        # Draw bounding box if requested
+        if show_boxes:
+            rect = plt.Rectangle((x1_px, y1_px), x2_px - x1_px, y2_px - y1_px,
+                                fill=False, edgecolor=color, linewidth=2)
+            ax.add_patch(rect)
+        # Add label and score if requested
+        if show_labels:
+            ax.text(x1_px, y1_px - 5, f"{label}: {score:.2f}",
+                    color='white', bbox=dict(facecolor=color, alpha=0.8), fontsize=10)
+    ax.axis('off')
+    # Convert plot to numpy array
+    fig.canvas.draw()
+    result_array = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
+    result_array = result_array.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+    plt.close(fig)  # Important: close the figure to free memory
+    return result_array
+def get_single_prompt(user_input):
+    """
+    Uses OpenAI to rephrase the user's chatter into a single, concise prompt for object detection.
+    The generated prompt will not include any question marks.
+    """
+    if not user_input.strip():
+        user_input = "Detect objects in the image"
+    prompt_instruction = (
+        f"Based on the following user input, generate a single, concise prompt for object detection. "
+        f"Do not include any question marks in the output. "
+        f"User input: \"{user_input}\""
+    )
+    response = openai.chat.completions.create(
+        model="gpt-4o",  # adjust model name if needed
+        messages=[{"role": "user", "content": prompt_instruction}],
+        temperature=0.3,
+        max_tokens=50,
+    )
+    generated_prompt = response.choices[0].message.content.strip()
+    # Ensure no question marks remain
+    generated_prompt = generated_prompt.replace("?", "")
+    return generated_prompt
+def is_count_query(user_input):
+    """
+    Check if the user's input indicates a counting request.
+    Looks for common keywords such as "count", "how many", "number of", etc.
+    """
+    keywords = ["count", "how many", "number of", "total", "get me a count"]
+    for kw in keywords:
+        if kw.lower() in user_input.lower():
+            return True
+    return False
+def process_question_and_detect(user_input, image, threshold, use_sam):
+    """
+    1. Uses OpenAI to generate a single, concise prompt (without question marks) from the user's input.
+    2. Feeds that prompt to the custom detection function.
+    3. Optionally generates segmentation masks using SAM2.
+    4. Overlays the detection results on the image.
+    5. If the user's input implies a counting request, it also returns the count of detected objects.
+    """
+    if image is None:
+        return None, "Please upload an image."
+    try:
+        # Generate the concise prompt from the user's input
+        generated_prompt = get_single_prompt(user_input)
+        # Run object detection using the generated prompt
+        detections = detect_objects_owlv2(generated_prompt, image, threshold=threshold)
+        # Generate masks if SAM is enabled
+        if use_sam and len(detections) > 0:
+            try:
+                detections_with_masks = generate_masks_from_detections(detections, image)
+            except Exception as e:
+                print(f"SAM2 failed, using detections without masks: {e}")
+                detections_with_masks = detections
+        else:
+            detections_with_masks = detections
+        # Overlay results on the image
+        viz = overlay_detections_on_image(image, detections_with_masks,
+                                        show_masks=use_sam,
+                                        show_boxes=True,
+                                        show_labels=True)
+        # If the user's input implies a counting request, include the count
+        count_text = ""
+        if is_count_query(user_input):
+            count = len(detections)
+            count_text = f"Detected {count} objects."
+        output_text = f"Generated prompt: {generated_prompt}\n{count_text}"
+        if len(detections) == 0:
+            output_text += f"\nNo objects detected with threshold {threshold}. Try lowering the threshold."
+        print(output_text)
+        return viz, output_text
+    except Exception as e:
+        error_msg = f"Error during detection: {str(e)}"
+        print(error_msg)
+        return image, error_msg
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Custom Object Detection and Counting App")
+    gr.Markdown(
+        """
+        Enter your input (for example:
+        - "What is the number of fruit in my image?"
+        - "How many bicycles can you see?"
+        - "Get me a count of my bottles")
+        and upload an image.
+        The app uses OpenAI to generate a single, concise prompt for object detection (without question marks),
+        then runs the detection using OWL-ViT. Optionally, SAM2 can generate precise segmentation masks.
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            user_input = gr.Textbox(label="Enter your input", placeholder="Type your input here...")
+            image_input = gr.Image(label="Upload Image", type="numpy")
+            with gr.Row():
+                threshold_slider = gr.Slider(
+                    minimum=0.01,
+                    maximum=1.0,
+                    value=0.1,
+                    step=0.01,
+                    label="Detection Threshold",
+                    info="Lower values detect more objects but may include false positives"
+                )
+                use_sam_checkbox = gr.Checkbox(
+                    label="Use SAM2 for Segmentation",
+                    value=False,
+                    info="Enable to generate precise segmentation masks (requires additional computation)"
+                )
+            submit_btn = gr.Button("Detect and Count")
+        with gr.Column():
+            output_image = gr.Image(label="Detection Result")
+            output_text = gr.Textbox(label="Output Details", lines=3)
+    submit_btn.click(
+        fn=process_question_and_detect,
+        inputs=[user_input, image_input, threshold_slider, use_sam_checkbox],
+        outputs=[output_image, output_text]
+    )
 if __name__ == "__main__":
     demo.launch()