Spaces:
Running
Running
FIX: video
Browse files- __pycache__/detection.cpython-310.pyc +0 -0
- __pycache__/model.cpython-310.pyc +0 -0
- detection.py +11 -4
__pycache__/detection.cpython-310.pyc
ADDED
|
Binary file (2.23 kB). View file
|
|
|
__pycache__/model.cpython-310.pyc
ADDED
|
Binary file (795 Bytes). View file
|
|
|
detection.py
CHANGED
|
@@ -70,10 +70,17 @@ def detect_video(frames, processor, clip_model, detection_model):
|
|
| 70 |
|
| 71 |
pred_score = float(detection_model(last_hidden_states)[0][0].cpu().detach().numpy())
|
| 72 |
assert 0 <= pred_score <= 1
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
blended_image = vis_attn(image, cls_attention_map)
|
| 79 |
|
|
|
|
| 70 |
|
| 71 |
pred_score = float(detection_model(last_hidden_states)[0][0].cpu().detach().numpy())
|
| 72 |
assert 0 <= pred_score <= 1
|
| 73 |
+
|
| 74 |
+
for layer_idx in range(len(outputs.attentions)):
|
| 75 |
+
attn_map = outputs.attentions[layer_idx]
|
| 76 |
+
if layer_idx == 0:
|
| 77 |
+
last_layer_attn = attn_map
|
| 78 |
+
else:
|
| 79 |
+
if layer_idx < 6:
|
| 80 |
+
last_layer_attn += attn_map
|
| 81 |
+
|
| 82 |
+
head_mean_attn = last_layer_attn.mean(dim=1)[0]
|
| 83 |
+
cls_attention_map = head_mean_attn[0, 1:]
|
| 84 |
|
| 85 |
blended_image = vis_attn(image, cls_attention_map)
|
| 86 |
|