Spaces:

AC2513
/

gemma-demo

Running on Zero

App Files Files Community

AC2513 commited on Jun 14

Commit

76e1435

1 Parent(s): eab0adb

added tests for processing video

Browse files

Files changed (2) hide show

src/app.py +28 -7
tests/test_video.py +82 -7

src/app.py CHANGED Viewed

@@ -1,8 +1,13 @@
 import torch
 from huggingface_hub import login
 from collections.abc import Iterator
-from transformers import Gemma3ForConditionalGeneration, TextIteratorStreamer, Gemma3Processor
 import spaces
 from threading import Thread
 import gradio as gr
 import os
@@ -26,21 +31,21 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
     attn_implementation="eager",
 )
 def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, float]]:
     frames: list[tuple[Image.Image, float]] = []
     capture = cv2.VideoCapture(video_path)
     if not capture.isOpened():
         raise ValueError(f"Could not open video file: {video_path}")
     fps = capture.get(cv2.CAP_PROP_FPS)
     total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
     frame_interval = max(total_frames // max_images, 1)
-    for i in range(0, min(total_frames, max_images * frame_interval), frame_interval):
-        if len(frames) >= max_images:
-            break
         capture.set(cv2.CAP_PROP_POS_FRAMES, i)
         success, image = capture.read()
         if success:
@@ -49,5 +54,21 @@ def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, floa
             timestamp = round(i / fps, 2)
             frames.append((pil_image, timestamp))
     capture.release()
-    return frames

 import torch
 from huggingface_hub import login
 from collections.abc import Iterator
+from transformers import (
+    Gemma3ForConditionalGeneration,
+    TextIteratorStreamer,
+    Gemma3Processor,
+)
 import spaces
+import tempfile
 from threading import Thread
 import gradio as gr
 import os
     attn_implementation="eager",
 )
 def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, float]]:
     frames: list[tuple[Image.Image, float]] = []
     capture = cv2.VideoCapture(video_path)
     if not capture.isOpened():
         raise ValueError(f"Could not open video file: {video_path}")
     fps = capture.get(cv2.CAP_PROP_FPS)
     total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
     frame_interval = max(total_frames // max_images, 1)
+    max_position = min(total_frames, max_images * frame_interval)
+    i = 0
+    while i < max_position and len(frames) < max_images:
         capture.set(cv2.CAP_PROP_POS_FRAMES, i)
         success, image = capture.read()
         if success:
             timestamp = round(i / fps, 2)
             frames.append((pil_image, timestamp))
+        i += frame_interval
     capture.release()
+    return frames
+def process_video(video_path: str, max_images: int) -> list[dict]:
+    result_content = []
+    # TODO: Change max_image to slider
+    frames = get_frames(video_path, max_images)
+    # Take frame and attach to result_content with timestamp
+    for frame in frames:
+        image, timestamp = frame
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
+            image.save(temp_file.name)
+            result_content.append({"type": "text", "text": f"Frame {timestamp}:"})
+            result_content.append({"type": "image", "url": temp_file.name})
+    logger.debug(f"Processed {len(frames)} frames from video {video_path} with frames {result_content}")
+    return result_content

tests/test_video.py CHANGED Viewed

@@ -3,25 +3,100 @@ import os
 import cv2
 from PIL import Image
 from pathlib import Path
-from src.app import get_frames
 # Get the project root directory
 ROOT_DIR = Path(__file__).parent.parent
 def test_correct_frame_return():
     """Test that get_frames returns a list of (Image, float) tuples."""
     # Path to a test video file
     video_path = os.path.join(ROOT_DIR, "assets", "test_video.mp4")
     # Ensure the test video exists
-    assert os.path.exists(video_path), f"Test video not found at {video_path}"
     # Test with a small number of frames
     max_images = 3
     frames = get_frames(video_path, max_images)
-    # Check return type
     assert isinstance(frames, list)
     assert all(isinstance(item, tuple) and len(item) == 2 for item in frames)
-    assert all(isinstance(img, Image.Image) and isinstance(ts, float) for img, ts in frames)

 import cv2
 from PIL import Image
 from pathlib import Path
+import tempfile
+from src.app import get_frames, process_video
 # Get the project root directory
 ROOT_DIR = Path(__file__).parent.parent
 def test_correct_frame_return():
     """Test that get_frames returns a list of (Image, float) tuples."""
     # Path to a test video file
     video_path = os.path.join(ROOT_DIR, "assets", "test_video.mp4")
     # Ensure the test video exists
+    assert os.path.exists(video_path)
     # Test with a small number of frames
     max_images = 3
     frames = get_frames(video_path, max_images)
     assert isinstance(frames, list)
     assert all(isinstance(item, tuple) and len(item) == 2 for item in frames)
+    assert all(
+        isinstance(img, Image.Image) and isinstance(ts, float) for img, ts in frames
+    )
+def test_process_video_structure():
+    """Test that process_video returns the expected list structure."""
+    video_path = os.path.join(ROOT_DIR, "assets", "test_video.mp4")
+    max_images = 2
+    result = process_video(video_path, max_images)
+    # Should have 2 items (text + image) per frame
+    assert len(result) == max_images * 2
+    # Check structure of items
+    for i in range(0, len(result), 2):
+        # Text item
+        assert result[i]["type"] == "text"
+        assert result[i]["text"].startswith("Frame ")
+        # Image item
+        assert result[i + 1]["type"] == "image"
+        assert "url" in result[i + 1]
+        assert os.path.exists(result[i + 1]["url"])
+        # Verify the image file is valid
+        try:
+            img = Image.open(result[i + 1]["url"])
+            img.verify()  # Make sure it's a valid image
+        except Exception as e:
+            pytest.fail(f"Invalid image file: {e}")
+def test_process_video_timestamps():
+    """Test that timestamps in the result are properly formatted."""
+    video_path = os.path.join(ROOT_DIR, "assets", "test_video.mp4")
+    max_images = 3
+    result = process_video(video_path, max_images)
+    # Extract timestamps from text items
+    timestamps = []
+    for i in range(0, len(result), 2):
+        if result[i]["type"] == "text":
+            # Extract timestamp from "Frame X.XX:" format
+            timestamp_text = result[i]["text"].split()[1].rstrip(":")
+            timestamps.append(float(timestamp_text))
+    # Check timestamps are ascending
+    assert len(timestamps) == max_images
+    assert all(timestamps[i] <= timestamps[i + 1] for i in range(len(timestamps) - 1))
+def test_process_video_temp_files():
+    """Test that temporary files are created and cleaned up properly."""
+    video_path = os.path.join(ROOT_DIR, "assets", "test_video.mp4")
+    max_images = 1
+    result = process_video(video_path, max_images)
+    # Verify temp file exists
+    image_path = result[1]["url"]
+    assert os.path.exists(image_path)
+    assert image_path.endswith(".png")
+def test_process_video_invalid_path():
+    """Test that process_video handles invalid paths appropriately."""
+    with pytest.raises(ValueError):
+        process_video("nonexistent_video.mp4", 3)