Spaces:
Running
on
Zero
Running
on
Zero
Added frame capture for video inputs
Browse files- .vscode/settings.json +7 -0
- src/app.py +28 -0
- src/requirements.txt +2 -1
.vscode/settings.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python.testing.pytestArgs": [
|
| 3 |
+
"src"
|
| 4 |
+
],
|
| 5 |
+
"python.testing.unittestEnabled": false,
|
| 6 |
+
"python.testing.pytestEnabled": true
|
| 7 |
+
}
|
src/app.py
CHANGED
|
@@ -11,6 +11,8 @@ import cv2
|
|
| 11 |
from loguru import logger
|
| 12 |
from PIL import Image
|
| 13 |
|
|
|
|
|
|
|
| 14 |
dotenv_path = find_dotenv()
|
| 15 |
|
| 16 |
load_dotenv(dotenv_path)
|
|
@@ -25,3 +27,29 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
|
|
| 25 |
device_map="auto",
|
| 26 |
attn_implementation="eager",
|
| 27 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from loguru import logger
|
| 12 |
from PIL import Image
|
| 13 |
|
| 14 |
+
MAX_NUM_IMAGES = 6
|
| 15 |
+
|
| 16 |
dotenv_path = find_dotenv()
|
| 17 |
|
| 18 |
load_dotenv(dotenv_path)
|
|
|
|
| 27 |
device_map="auto",
|
| 28 |
attn_implementation="eager",
|
| 29 |
)
|
| 30 |
+
|
| 31 |
+
def get_frames(video_path: str) -> list[tuple[Image.Image, float]]:
|
| 32 |
+
capture = cv2.VideoCapture(video_path)
|
| 33 |
+
if not capture.isOpened():
|
| 34 |
+
raise ValueError(f"Could not open video file: {video_path}")
|
| 35 |
+
|
| 36 |
+
fps = capture.get(cv2.CAP_PROP_FPS)
|
| 37 |
+
total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 38 |
+
|
| 39 |
+
frame_interval = max(total_frames // MAX_NUM_IMAGES, 1)
|
| 40 |
+
frames: list[tuple[Image.Image, float]] = []
|
| 41 |
+
|
| 42 |
+
for i in range(0, min(total_frames, MAX_NUM_IMAGES * frame_interval), frame_interval):
|
| 43 |
+
if len(frames) >= MAX_NUM_IMAGES:
|
| 44 |
+
break
|
| 45 |
+
|
| 46 |
+
capture.set(cv2.CAP_PROP_POS_FRAMES, i)
|
| 47 |
+
success, image = capture.read()
|
| 48 |
+
if success:
|
| 49 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 50 |
+
pil_image = Image.fromarray(image)
|
| 51 |
+
timestamp = round(i / fps, 2)
|
| 52 |
+
frames.append((pil_image, timestamp))
|
| 53 |
+
|
| 54 |
+
capture.release()
|
| 55 |
+
return frames
|
src/requirements.txt
CHANGED
|
@@ -2,4 +2,5 @@ torch
|
|
| 2 |
spaces
|
| 3 |
gradio
|
| 4 |
transformers==4.50.2
|
| 5 |
-
accelerate
|
|
|
|
|
|
| 2 |
spaces
|
| 3 |
gradio
|
| 4 |
transformers==4.50.2
|
| 5 |
+
accelerate
|
| 6 |
+
pytest
|