Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -300,17 +300,72 @@ def _predict_single_dog(image):
|
|
| 300 |
# dogs.append((cropped_image, confidence, xyxy))
|
| 301 |
# return dogs
|
| 302 |
|
| 303 |
-
async def detect_multiple_dogs(image, conf_threshold=0.2, iou_threshold=0.5):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
|
| 305 |
dogs = []
|
|
|
|
|
|
|
|
|
|
| 306 |
for box in results.boxes:
|
| 307 |
if box.cls == 16: # COCO 資料集中狗的類別是 16
|
| 308 |
xyxy = box.xyxy[0].tolist()
|
| 309 |
confidence = box.conf.item()
|
| 310 |
-
|
| 311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
return dogs
|
| 313 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
# async def predict(image):
|
| 316 |
# if image is None:
|
|
@@ -432,59 +487,63 @@ async def predict(image):
|
|
| 432 |
if isinstance(image, np.ndarray):
|
| 433 |
image = Image.fromarray(image)
|
| 434 |
|
| 435 |
-
dogs = await detect_multiple_dogs(image, conf_threshold=0.05)
|
| 436 |
|
| 437 |
-
if len(dogs)
|
|
|
|
| 438 |
return await process_single_dog(image)
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
explanations = []
|
| 443 |
-
buttons = []
|
| 444 |
-
annotated_image = image.copy()
|
| 445 |
-
draw = ImageDraw.Draw(annotated_image)
|
| 446 |
-
font = ImageFont.load_default()
|
| 447 |
-
|
| 448 |
-
for i, (cropped_image, _, box) in enumerate(dogs):
|
| 449 |
-
top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
|
| 450 |
-
color = color_list[i % len(color_list)]
|
| 451 |
-
draw.rectangle(box, outline=color, width=3)
|
| 452 |
-
draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
|
| 453 |
-
|
| 454 |
-
breed = topk_breeds[0]
|
| 455 |
-
if top1_prob >= 0.5:
|
| 456 |
-
description = get_dog_description(breed)
|
| 457 |
-
formatted_description = format_description(description, breed)
|
| 458 |
-
explanations.append(f"Dog {i+1}: {formatted_description}")
|
| 459 |
-
elif top1_prob >= 0.2:
|
| 460 |
-
dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
|
| 461 |
-
dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
|
| 462 |
-
explanations.append(dog_explanation)
|
| 463 |
-
buttons.extend([gr.update(visible=True, value=f"Dog {i+1}: More about {breed}") for breed in topk_breeds[:3]])
|
| 464 |
-
else:
|
| 465 |
-
explanations.append(f"Dog {i+1}: The image is unclear or the breed is not in the dataset.")
|
| 466 |
-
|
| 467 |
-
final_explanation = "\n\n".join(explanations)
|
| 468 |
-
if buttons:
|
| 469 |
-
final_explanation += "\n\nClick on a button to view more information about the breed."
|
| 470 |
-
initial_state = {
|
| 471 |
-
"explanation": final_explanation,
|
| 472 |
-
"buttons": buttons,
|
| 473 |
-
"show_back": True
|
| 474 |
-
}
|
| 475 |
-
return (final_explanation, annotated_image,
|
| 476 |
-
buttons[0] if len(buttons) > 0 else gr.update(visible=False),
|
| 477 |
-
buttons[1] if len(buttons) > 1 else gr.update(visible=False),
|
| 478 |
-
buttons[2] if len(buttons) > 2 else gr.update(visible=False),
|
| 479 |
-
gr.update(visible=True),
|
| 480 |
-
initial_state)
|
| 481 |
else:
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
| 489 |
except Exception as e:
|
| 490 |
error_msg = f"An error occurred: {str(e)}"
|
|
|
|
| 300 |
# dogs.append((cropped_image, confidence, xyxy))
|
| 301 |
# return dogs
|
| 302 |
|
| 303 |
+
# async def detect_multiple_dogs(image, conf_threshold=0.2, iou_threshold=0.5):
|
| 304 |
+
# results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
|
| 305 |
+
# dogs = []
|
| 306 |
+
# for box in results.boxes:
|
| 307 |
+
# if box.cls == 16: # COCO 資料集中狗的類別是 16
|
| 308 |
+
# xyxy = box.xyxy[0].tolist()
|
| 309 |
+
# confidence = box.conf.item()
|
| 310 |
+
# cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
|
| 311 |
+
# dogs.append((cropped_image, confidence, xyxy))
|
| 312 |
+
# return dogs
|
| 313 |
+
# 此為如果後面調不好 使用的版本
|
| 314 |
+
|
| 315 |
+
async def detect_multiple_dogs(image, conf_threshold=0.2, iou_threshold=0.45):
|
| 316 |
results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
|
| 317 |
dogs = []
|
| 318 |
+
all_boxes = []
|
| 319 |
+
|
| 320 |
+
# 首先收集所有可能的狗的邊界框
|
| 321 |
for box in results.boxes:
|
| 322 |
if box.cls == 16: # COCO 資料集中狗的類別是 16
|
| 323 |
xyxy = box.xyxy[0].tolist()
|
| 324 |
confidence = box.conf.item()
|
| 325 |
+
all_boxes.append((xyxy, confidence))
|
| 326 |
+
|
| 327 |
+
# 按置信度排序
|
| 328 |
+
all_boxes.sort(key=lambda x: x[1], reverse=True)
|
| 329 |
+
|
| 330 |
+
# 應用非最大抑制
|
| 331 |
+
for box, confidence in all_boxes:
|
| 332 |
+
if not is_box_overlapping(box, [d[2] for d in dogs], iou_threshold):
|
| 333 |
+
cropped_image = image.crop((box[0], box[1], box[2], box[3]))
|
| 334 |
+
dogs.append((cropped_image, confidence, box))
|
| 335 |
+
|
| 336 |
+
# 如果沒有檢測到狗,嘗試降低閾值再次檢測
|
| 337 |
+
if len(dogs) == 0:
|
| 338 |
+
results = model_yolo(image, conf=conf_threshold/2, iou=iou_threshold)[0]
|
| 339 |
+
for box in results.boxes:
|
| 340 |
+
if box.cls == 16:
|
| 341 |
+
xyxy = box.xyxy[0].tolist()
|
| 342 |
+
confidence = box.conf.item()
|
| 343 |
+
cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
|
| 344 |
+
dogs.append((cropped_image, confidence, xyxy))
|
| 345 |
+
|
| 346 |
return dogs
|
| 347 |
|
| 348 |
+
def is_box_overlapping(box, existing_boxes, iou_threshold):
|
| 349 |
+
for existing_box in existing_boxes:
|
| 350 |
+
if calculate_iou(box, existing_box) > iou_threshold:
|
| 351 |
+
return True
|
| 352 |
+
return False
|
| 353 |
+
|
| 354 |
+
def calculate_iou(box1, box2):
|
| 355 |
+
# 計算兩個邊界框的交集面積
|
| 356 |
+
x1 = max(box1[0], box2[0])
|
| 357 |
+
y1 = max(box1[1], box2[1])
|
| 358 |
+
x2 = min(box1[2], box2[2])
|
| 359 |
+
y2 = min(box1[3], box2[3])
|
| 360 |
+
|
| 361 |
+
intersection = max(0, x2 - x1) * max(0, y2 - y1)
|
| 362 |
+
|
| 363 |
+
area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
| 364 |
+
area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
|
| 365 |
+
|
| 366 |
+
iou = intersection / float(area1 + area2 - intersection)
|
| 367 |
+
return iou
|
| 368 |
+
|
| 369 |
|
| 370 |
# async def predict(image):
|
| 371 |
# if image is None:
|
|
|
|
| 487 |
if isinstance(image, np.ndarray):
|
| 488 |
image = Image.fromarray(image)
|
| 489 |
|
| 490 |
+
dogs = await detect_multiple_dogs(image, conf_threshold=0.05, iou_threshold=0.45)
|
| 491 |
|
| 492 |
+
if len(dogs) == 0:
|
| 493 |
+
# 沒有檢測到狗,使用原始圖像進行單狗處理
|
| 494 |
return await process_single_dog(image)
|
| 495 |
+
elif len(dogs) == 1:
|
| 496 |
+
# 只檢測到一隻狗,使用裁剪後的圖像進行處理
|
| 497 |
+
return await process_single_dog(dogs[0][0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
else:
|
| 499 |
+
# 多狗情境
|
| 500 |
+
color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
|
| 501 |
+
explanations = []
|
| 502 |
+
buttons = []
|
| 503 |
+
annotated_image = image.copy()
|
| 504 |
+
draw = ImageDraw.Draw(annotated_image)
|
| 505 |
+
font = ImageFont.load_default()
|
| 506 |
+
|
| 507 |
+
for i, (cropped_image, confidence, box) in enumerate(dogs):
|
| 508 |
+
top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
|
| 509 |
+
color = color_list[i % len(color_list)]
|
| 510 |
+
draw.rectangle(box, outline=color, width=3)
|
| 511 |
+
draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
|
| 512 |
+
|
| 513 |
+
breed = topk_breeds[0]
|
| 514 |
+
if top1_prob >= 0.5:
|
| 515 |
+
description = get_dog_description(breed)
|
| 516 |
+
formatted_description = format_description(description, breed)
|
| 517 |
+
explanations.append(f"Dog {i+1}: {formatted_description}")
|
| 518 |
+
elif top1_prob >= 0.2:
|
| 519 |
+
dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
|
| 520 |
+
dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
|
| 521 |
+
explanations.append(dog_explanation)
|
| 522 |
+
buttons.extend([gr.update(visible=True, value=f"Dog {i+1}: More about {breed}") for breed in topk_breeds[:3]])
|
| 523 |
+
else:
|
| 524 |
+
explanations.append(f"Dog {i+1}: The image is unclear or the breed is not in the dataset.")
|
| 525 |
+
|
| 526 |
+
final_explanation = "\n\n".join(explanations)
|
| 527 |
+
if buttons:
|
| 528 |
+
final_explanation += "\n\nClick on a button to view more information about the breed."
|
| 529 |
+
initial_state = {
|
| 530 |
+
"explanation": final_explanation,
|
| 531 |
+
"buttons": buttons,
|
| 532 |
+
"show_back": True
|
| 533 |
+
}
|
| 534 |
+
return (final_explanation, annotated_image,
|
| 535 |
+
buttons[0] if len(buttons) > 0 else gr.update(visible=False),
|
| 536 |
+
buttons[1] if len(buttons) > 1 else gr.update(visible=False),
|
| 537 |
+
buttons[2] if len(buttons) > 2 else gr.update(visible=False),
|
| 538 |
+
gr.update(visible=True),
|
| 539 |
+
initial_state)
|
| 540 |
+
else:
|
| 541 |
+
initial_state = {
|
| 542 |
+
"explanation": final_explanation,
|
| 543 |
+
"buttons": [],
|
| 544 |
+
"show_back": False
|
| 545 |
+
}
|
| 546 |
+
return final_explanation, annotated_image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
|
| 547 |
|
| 548 |
except Exception as e:
|
| 549 |
error_msg = f"An error occurred: {str(e)}"
|