Spaces:
Sleeping
Sleeping
Sergey Kolbin
commited on
Commit
·
9b4c792
1
Parent(s):
0c726f6
init
Browse files- app.py +114 -0
- requirements.txt +7 -0
app.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from transformers import pipeline
|
| 3 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
|
| 6 |
+
# 1) Zero-shot detector (works on CPU Spaces)
|
| 7 |
+
# You can upgrade model to "google/owlv2-base-patch16-ensemble" for higher accuracy (slower).
|
| 8 |
+
detector = pipeline(
|
| 9 |
+
task="zero-shot-object-detection",
|
| 10 |
+
model="google/owlvit-base-patch32" # fast & lightweight
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
# Keep labels explicit so the model can choose the right class.
|
| 14 |
+
# (You can add synonyms like "Bengal tiger", "African lion" if you want.)
|
| 15 |
+
LABELS = ["tiger", "lion"]
|
| 16 |
+
|
| 17 |
+
COLOR_BY_LABEL = {
|
| 18 |
+
"tiger": "red",
|
| 19 |
+
"lion": "blue",
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
def iou(box_a, box_b):
|
| 23 |
+
xA = max(box_a["xmin"], box_b["xmin"])
|
| 24 |
+
yA = max(box_a["ymin"], box_b["ymin"])
|
| 25 |
+
xB = min(box_a["xmax"], box_b["xmax"])
|
| 26 |
+
yB = min(box_a["ymax"], box_b["ymax"])
|
| 27 |
+
inter_w = max(0.0, xB - xA)
|
| 28 |
+
inter_h = max(0.0, yB - yA)
|
| 29 |
+
inter = inter_w * inter_h
|
| 30 |
+
area_a = (box_a["xmax"] - box_a["xmin"]) * (box_a["ymax"] - box_a["ymin"])
|
| 31 |
+
area_b = (box_b["xmax"] - box_b["xmin"]) * (box_b["ymax"] - box_b["ymin"])
|
| 32 |
+
denom = area_a + area_b - inter + 1e-9
|
| 33 |
+
return inter / denom
|
| 34 |
+
|
| 35 |
+
def nms_single_class(dets, iou_thresh=0.5):
|
| 36 |
+
# dets: list of dicts with keys {"box": {...}, "score": float, "label": str}
|
| 37 |
+
dets = sorted(dets, key=lambda d: d["score"], reverse=True)
|
| 38 |
+
kept = []
|
| 39 |
+
while dets:
|
| 40 |
+
best = dets.pop(0)
|
| 41 |
+
kept.append(best)
|
| 42 |
+
dets = [d for d in dets if iou(best["box"], d["box"]) < iou_thresh]
|
| 43 |
+
return kept
|
| 44 |
+
|
| 45 |
+
def class_aware_nms(dets, iou_thresh=0.5):
|
| 46 |
+
# Run NMS separately per class so lions don't suppress tigers (and vice versa)
|
| 47 |
+
by_label = defaultdict(list)
|
| 48 |
+
for d in dets:
|
| 49 |
+
by_label[d["label"].lower()].append(d)
|
| 50 |
+
merged = []
|
| 51 |
+
for label, per_class in by_label.items():
|
| 52 |
+
merged.extend(nms_single_class(per_class, iou_thresh=iou_thresh))
|
| 53 |
+
return merged
|
| 54 |
+
|
| 55 |
+
def annotate(img, dets):
|
| 56 |
+
draw = ImageDraw.Draw(img)
|
| 57 |
+
try:
|
| 58 |
+
font = ImageFont.truetype("DejaVuSans.ttf", 14)
|
| 59 |
+
except:
|
| 60 |
+
font = None
|
| 61 |
+
for d in dets:
|
| 62 |
+
b = d["box"]
|
| 63 |
+
color = COLOR_BY_LABEL.get(d["label"].lower(), "red")
|
| 64 |
+
draw.rectangle([(b["xmin"], b["ymin"]), (b["xmax"], b["ymax"])], outline=color, width=3)
|
| 65 |
+
txt = f"{d['label']} {d['score']:.2f}"
|
| 66 |
+
# Estimate text width
|
| 67 |
+
try:
|
| 68 |
+
txt_w = draw.textlength(txt, font=font)
|
| 69 |
+
except AttributeError:
|
| 70 |
+
txt_w = 8 * len(txt)
|
| 71 |
+
pad = 3
|
| 72 |
+
draw.rectangle(
|
| 73 |
+
[(b["xmin"], b["ymin"] - 18), (b["xmin"] + txt_w + 2 * pad, b["ymin"])],
|
| 74 |
+
fill=color
|
| 75 |
+
)
|
| 76 |
+
draw.text((b["xmin"] + pad, b["ymin"] - 16), txt, fill="white", font=font)
|
| 77 |
+
return img
|
| 78 |
+
|
| 79 |
+
def count_big_cats(img, score_threshold, iou_threshold):
|
| 80 |
+
# 2) Run zero-shot detection with both labels
|
| 81 |
+
preds = detector(img, candidate_labels=LABELS)
|
| 82 |
+
|
| 83 |
+
# 3) Keep only our labels and apply score filter
|
| 84 |
+
preds = [p for p in preds if p["label"].lower() in LABELS and p["score"] >= score_threshold]
|
| 85 |
+
|
| 86 |
+
# 4) Class-aware NMS
|
| 87 |
+
preds = class_aware_nms(preds, iou_thresh=iou_threshold)
|
| 88 |
+
|
| 89 |
+
# 5) Prepare counts
|
| 90 |
+
tiger_count = sum(1 for p in preds if p["label"].lower() == "tiger")
|
| 91 |
+
lion_count = sum(1 for p in preds if p["label"].lower() == "lion")
|
| 92 |
+
total_count = tiger_count + lion_count
|
| 93 |
+
|
| 94 |
+
# 6) Draw boxes
|
| 95 |
+
img_annotated = annotate(img.copy(), preds)
|
| 96 |
+
return tiger_count, lion_count, total_count, img_annotated
|
| 97 |
+
|
| 98 |
+
with gr.Blocks(title="Big Cat Counter") as demo:
|
| 99 |
+
gr.Markdown("# 🐯🦁 Big Cat Counter\nUpload an image and I’ll count how many **tigers** and **lions** I see.")
|
| 100 |
+
with gr.Row():
|
| 101 |
+
with gr.Column():
|
| 102 |
+
inp = gr.Image(type="pil", label="Input image")
|
| 103 |
+
score_th = gr.Slider(0.05, 0.95, value=0.20, step=0.05, label="Score threshold")
|
| 104 |
+
iou_th = gr.Slider(0.1, 0.9, value=0.50, step=0.05, label="IOU (NMS) threshold")
|
| 105 |
+
btn = gr.Button("Count Big Cats")
|
| 106 |
+
with gr.Column():
|
| 107 |
+
out_tiger = gr.Number(label="Tiger count", precision=0)
|
| 108 |
+
out_lion = gr.Number(label="Lion count", precision=0)
|
| 109 |
+
out_total = gr.Number(label="Total big cats", precision=0)
|
| 110 |
+
out_img = gr.Image(label="Annotated output")
|
| 111 |
+
btn.click(fn=count_big_cats, inputs=[inp, score_th, iou_th], outputs=[out_tiger, out_lion, out_total, out_img])
|
| 112 |
+
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers>=4.41.0
|
| 2 |
+
huggingface_hub>=0.23.0
|
| 3 |
+
torch
|
| 4 |
+
gradio>=4.0.0
|
| 5 |
+
pillow
|
| 6 |
+
safetensors
|
| 7 |
+
accelerate
|