Spaces:

reab5555
/

Owlv2-Video-Object-Detection

Paused

App Files Files Community

reab5555 commited on Jul 24, 2024

Commit

5dde850

verified ·

1 Parent(s): 1b0b39f

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -28

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import spaces
 import gradio as gr
 import cv2
 from PIL import Image, ImageDraw, ImageFont
@@ -9,6 +8,7 @@ import os
 import matplotlib.pyplot as plt
 from io import BytesIO
 import tempfile
 # Check if CUDA is available, otherwise use CPU
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -33,18 +33,19 @@ def process_video(video_path, target, progress=gr.Progress()):
     frame_duration = 1 / output_fps
     video_duration = frame_count / original_fps
-    processed_frames = []
     frame_scores = []
-    for time in progress.tqdm(np.arange(0, video_duration, frame_duration)):
         frame_number = int(time * original_fps)
         cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
         ret, img = cap.read()
         if not ret:
             break
-        # Resize the frame to 640x480
-        #img_resized = cv2.resize(img, (640, 360))
         pil_img = Image.fromarray(cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB))
         # Process single image
@@ -58,7 +59,7 @@ def process_video(video_path, target, progress=gr.Progress()):
         max_score = 0
         try:
-            font = ImageFont.truetype("arial.ttf", 20)  # Reduced font size for smaller image
         except IOError:
             font = ImageFont.load_default()
@@ -77,15 +78,22 @@ def process_video(video_path, target, progress=gr.Progress()):
                 max_score = max(max_score, confidence)
-        processed_frames.append(np.array(pil_img))
         frame_scores.append(max_score)
     cap.release()
-    return processed_frames, frame_scores, None
 def create_heatmap(frame_scores, current_frame):
     plt.figure(figsize=(12, 3))
-    plt.imshow([frame_scores], cmap='hot_r', aspect='auto')  # 'hot_r' for reversed hot colormap
     cbar = plt.colorbar(label='Confidence')
     cbar.ax.yaxis.set_ticks_position('left')
     cbar.ax.yaxis.set_label_position('left')
@@ -93,13 +101,11 @@ def create_heatmap(frame_scores, current_frame):
     plt.xlabel('Frame')
     plt.yticks([])
-    # Add more frame numbers on x-axis
     num_frames = len(frame_scores)
-    step = max(1, num_frames // 10)  # Show at most 10 frame numbers
     frame_numbers = range(0, num_frames, step)
     plt.xticks(frame_numbers, [str(i) for i in frame_numbers])
-    # Add vertical line for current frame
     plt.axvline(x=current_frame, color='blue', linestyle='--', linewidth=2)
     plt.tight_layout()
@@ -121,6 +127,13 @@ def load_sample_frame(video_path):
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     return frame_rgb
 def gradio_app():
     with gr.Blocks() as app:
         gr.Markdown("# Video Object Detection with Owlv2")
@@ -135,28 +148,23 @@ def gradio_app():
         use_sample_button = gr.Button("Use Sample Video")
         progress_bar = gr.Progress()
-        processed_frames = gr.State([])
         frame_scores = gr.State([])
         def process_and_update(video, target):
-            frames, scores, error = process_video(video, target, progress_bar)
-            if frames is not None:
-                heatmap_path = create_heatmap(scores, 0)  # Initial heatmap with current frame at 0
-                return frames, scores, frames[0], heatmap_path, error, gr.Slider(maximum=len(frames) - 1, value=0)
             return None, None, None, None, error, gr.Slider(maximum=100, value=0)
-        def update_frame_and_heatmap(frame_index, frames, scores):
-            if frames and 0 <= frame_index < len(frames):
-                heatmap_path = create_heatmap(scores, frame_index)
-                return frames[frame_index], heatmap_path
-            return None, None
         video_input.upload(process_and_update,
                            inputs=[video_input, target_input],
-                           outputs=[processed_frames, frame_scores, output_image, heatmap_output, error_output, frame_slider])
         frame_slider.change(update_frame_and_heatmap,
-                            inputs=[frame_slider, processed_frames, frame_scores],
                             outputs=[output_image, heatmap_output])
         def use_sample_video():
@@ -165,7 +173,7 @@ def gradio_app():
         use_sample_button.click(use_sample_video,
                                 inputs=None,
-                                outputs=[processed_frames, frame_scores, output_image, heatmap_output, error_output, frame_slider])
         # Layout
         with gr.Row():
@@ -179,4 +187,15 @@ def gradio_app():
 if __name__ == "__main__":
     app = gradio_app()
-    app.launch(share=True)

 import gradio as gr
 import cv2
 from PIL import Image, ImageDraw, ImageFont
 import matplotlib.pyplot as plt
 from io import BytesIO
 import tempfile
+import shutil
 # Check if CUDA is available, otherwise use CPU
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
     frame_duration = 1 / output_fps
     video_duration = frame_count / original_fps
     frame_scores = []
+    temp_dir = tempfile.mkdtemp()
+    frame_paths = []
+    for i, time in enumerate(progress.tqdm(np.arange(0, video_duration, frame_duration))):
         frame_number = int(time * original_fps)
         cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
         ret, img = cap.read()
         if not ret:
             break
+        # Resize the frame
+        img_resized = cv2.resize(img, (640, 360))
         pil_img = Image.fromarray(cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB))
         # Process single image
         max_score = 0
         try:
+            font = ImageFont.truetype("arial.ttf", 20)
         except IOError:
             font = ImageFont.load_default()
                 max_score = max(max_score, confidence)
+        # Save frame to disk
+        frame_path = os.path.join(temp_dir, f"frame_{i:04d}.png")
+        pil_img.save(frame_path)
+        frame_paths.append(frame_path)
         frame_scores.append(max_score)
+        # Clear GPU cache every 10 frames
+        if i % 10 == 0:
+            torch.cuda.empty_cache()
     cap.release()
+    return frame_paths, frame_scores, None
 def create_heatmap(frame_scores, current_frame):
     plt.figure(figsize=(12, 3))
+    plt.imshow([frame_scores], cmap='hot_r', aspect='auto')
     cbar = plt.colorbar(label='Confidence')
     cbar.ax.yaxis.set_ticks_position('left')
     cbar.ax.yaxis.set_label_position('left')
     plt.xlabel('Frame')
     plt.yticks([])
     num_frames = len(frame_scores)
+    step = max(1, num_frames // 10)
     frame_numbers = range(0, num_frames, step)
     plt.xticks(frame_numbers, [str(i) for i in frame_numbers])
     plt.axvline(x=current_frame, color='blue', linestyle='--', linewidth=2)
     plt.tight_layout()
     frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     return frame_rgb
+def update_frame_and_heatmap(frame_index, frame_paths, scores):
+    if frame_paths and 0 <= frame_index < len(frame_paths):
+        frame = Image.open(frame_paths[frame_index])
+        heatmap_path = create_heatmap(scores, frame_index)
+        return np.array(frame), heatmap_path
+    return None, None
 def gradio_app():
     with gr.Blocks() as app:
         gr.Markdown("# Video Object Detection with Owlv2")
         use_sample_button = gr.Button("Use Sample Video")
         progress_bar = gr.Progress()
+        frame_paths = gr.State([])
         frame_scores = gr.State([])
         def process_and_update(video, target):
+            paths, scores, error = process_video(video, target, progress_bar)
+            if paths is not None:
+                heatmap_path = create_heatmap(scores, 0)
+                first_frame = Image.open(paths[0])
+                return paths, scores, np.array(first_frame), heatmap_path, error, gr.Slider(maximum=len(paths) - 1, value=0)
             return None, None, None, None, error, gr.Slider(maximum=100, value=0)
         video_input.upload(process_and_update,
                            inputs=[video_input, target_input],
+                           outputs=[frame_paths, frame_scores, output_image, heatmap_output, error_output, frame_slider])
         frame_slider.change(update_frame_and_heatmap,
+                            inputs=[frame_slider, frame_paths, frame_scores],
                             outputs=[output_image, heatmap_output])
         def use_sample_video():
         use_sample_button.click(use_sample_video,
                                 inputs=None,
+                                outputs=[frame_paths, frame_scores, output_image, heatmap_output, error_output, frame_slider])
         # Layout
         with gr.Row():
 if __name__ == "__main__":
     app = gradio_app()
+    app.launch(share=True)
+    # Cleanup temporary files
+    def cleanup():
+        for path in frame_paths.value:
+            if os.path.exists(path):
+                os.remove(path)
+        if os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
+    # Make sure to call cleanup when the app is closed
+    # This might require additional setup depending on how you're running the app