Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import gradio as gr
|
|
| 6 |
from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
|
| 7 |
import torch.nn.functional as F
|
| 8 |
from torchvision import transforms
|
| 9 |
-
from PIL import Image, ImageDraw, ImageFont
|
| 10 |
from data_manager import get_dog_description
|
| 11 |
from urllib.parse import quote
|
| 12 |
from ultralytics import YOLO
|
|
@@ -312,7 +312,7 @@ def _predict_single_dog(image):
|
|
| 312 |
# return dogs
|
| 313 |
# 此為如果後面調不好 使用的版本
|
| 314 |
|
| 315 |
-
async def detect_multiple_dogs(image, conf_threshold=0.
|
| 316 |
results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
|
| 317 |
dogs = []
|
| 318 |
for box in results.boxes:
|
|
@@ -321,7 +321,7 @@ async def detect_multiple_dogs(image, conf_threshold=0.2, iou_threshold=0.3):
|
|
| 321 |
confidence = box.conf.item()
|
| 322 |
area = (xyxy[2] - xyxy[0]) * (xyxy[3] - xyxy[1])
|
| 323 |
image_area = image.width * image.height
|
| 324 |
-
if area > 0.
|
| 325 |
cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
|
| 326 |
dogs.append((cropped_image, confidence, xyxy))
|
| 327 |
|
|
@@ -334,7 +334,7 @@ async def detect_multiple_dogs(image, conf_threshold=0.2, iou_threshold=0.3):
|
|
| 334 |
confidence = box.conf.item()
|
| 335 |
area = (xyxy[2] - xyxy[0]) * (xyxy[3] - xyxy[1])
|
| 336 |
image_area = image.width * image.height
|
| 337 |
-
if area > 0.
|
| 338 |
cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
|
| 339 |
dogs.append((cropped_image, confidence, xyxy))
|
| 340 |
|
|
@@ -347,7 +347,6 @@ def is_box_duplicate(new_box, existing_boxes, iou_threshold=0.5):
|
|
| 347 |
return False
|
| 348 |
|
| 349 |
def calculate_iou(box1, box2):
|
| 350 |
-
# 計算兩個邊界框的交集面積
|
| 351 |
x1 = max(box1[0], box2[0])
|
| 352 |
y1 = max(box1[1], box2[1])
|
| 353 |
x2 = min(box1[2], box2[2])
|
|
@@ -473,7 +472,6 @@ def calculate_iou(box1, box2):
|
|
| 473 |
# if __name__ == "__main__":
|
| 474 |
# iface.launch()
|
| 475 |
|
| 476 |
-
|
| 477 |
async def predict(image):
|
| 478 |
if image is None:
|
| 479 |
return "Please upload an image to start.", None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
|
|
@@ -482,65 +480,15 @@ async def predict(image):
|
|
| 482 |
if isinstance(image, np.ndarray):
|
| 483 |
image = Image.fromarray(image)
|
| 484 |
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
|
|
|
|
|
|
| 488 |
return await process_single_dog(image)
|
| 489 |
-
elif len(dogs) == 1:
|
| 490 |
-
# 如果只檢測到一隻狗,但圖像可能包含多隻狗,再次嘗試檢測
|
| 491 |
-
if has_multiple_dogs(image):
|
| 492 |
-
dogs = await detect_multiple_dogs(image, conf_threshold=0.1, iou_threshold=0.2)
|
| 493 |
-
if len(dogs) == 1:
|
| 494 |
-
return await process_single_dog(dogs[0][0])
|
| 495 |
else:
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
explanations = []
|
| 499 |
-
buttons = []
|
| 500 |
-
annotated_image = image.copy()
|
| 501 |
-
draw = ImageDraw.Draw(annotated_image)
|
| 502 |
-
font = ImageFont.load_default()
|
| 503 |
-
|
| 504 |
-
for i, (cropped_image, confidence, box) in enumerate(dogs):
|
| 505 |
-
top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
|
| 506 |
-
color = color_list[i % len(color_list)]
|
| 507 |
-
draw.rectangle(box, outline=color, width=3)
|
| 508 |
-
draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
|
| 509 |
-
|
| 510 |
-
breed = topk_breeds[0]
|
| 511 |
-
if top1_prob >= 0.5:
|
| 512 |
-
description = get_dog_description(breed)
|
| 513 |
-
formatted_description = format_description(description, breed)
|
| 514 |
-
explanations.append(f"Dog {i+1}: {formatted_description}")
|
| 515 |
-
elif top1_prob >= 0.2:
|
| 516 |
-
dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
|
| 517 |
-
dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
|
| 518 |
-
explanations.append(dog_explanation)
|
| 519 |
-
buttons.extend([gr.update(visible=True, value=f"Dog {i+1}: More about {breed}") for breed in topk_breeds[:3]])
|
| 520 |
-
else:
|
| 521 |
-
explanations.append(f"Dog {i+1}: The image is unclear or the breed is not in the dataset.")
|
| 522 |
-
|
| 523 |
-
final_explanation = "\n\n".join(explanations)
|
| 524 |
-
if buttons:
|
| 525 |
-
final_explanation += "\n\nClick on a button to view more information about the breed."
|
| 526 |
-
initial_state = {
|
| 527 |
-
"explanation": final_explanation,
|
| 528 |
-
"buttons": buttons,
|
| 529 |
-
"show_back": True
|
| 530 |
-
}
|
| 531 |
-
return (final_explanation, annotated_image,
|
| 532 |
-
buttons[0] if len(buttons) > 0 else gr.update(visible=False),
|
| 533 |
-
buttons[1] if len(buttons) > 1 else gr.update(visible=False),
|
| 534 |
-
buttons[2] if len(buttons) > 2 else gr.update(visible=False),
|
| 535 |
-
gr.update(visible=True),
|
| 536 |
-
initial_state)
|
| 537 |
-
else:
|
| 538 |
-
initial_state = {
|
| 539 |
-
"explanation": final_explanation,
|
| 540 |
-
"buttons": [],
|
| 541 |
-
"show_back": False
|
| 542 |
-
}
|
| 543 |
-
return final_explanation, annotated_image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
|
| 544 |
|
| 545 |
except Exception as e:
|
| 546 |
error_msg = f"An error occurred: {str(e)}"
|
|
@@ -548,23 +496,14 @@ async def predict(image):
|
|
| 548 |
return error_msg, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
|
| 549 |
|
| 550 |
def has_multiple_dogs(image):
|
| 551 |
-
# 使用簡單的啟發式方法來檢查圖像是否可能包含多隻狗
|
| 552 |
-
# 這裡可以使用更複雜的方法,如特徵提取或輕量級模型
|
| 553 |
gray = image.convert('L')
|
| 554 |
edges = gray.filter(ImageFilter.FIND_EDGES)
|
| 555 |
edge_pixels = np.array(edges)
|
| 556 |
-
return np.sum(edge_pixels > 128) > image.width * image.height * 0.1
|
| 557 |
|
| 558 |
async def process_single_dog(image):
|
| 559 |
top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(image)
|
| 560 |
-
|
| 561 |
-
initial_state = {
|
| 562 |
-
"explanation": "The image is unclear or the breed is not in the dataset. Please upload a clearer image of a dog.",
|
| 563 |
-
"buttons": [],
|
| 564 |
-
"show_back": False
|
| 565 |
-
}
|
| 566 |
-
return initial_state["explanation"], None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
|
| 567 |
-
|
| 568 |
breed = topk_breeds[0]
|
| 569 |
description = get_dog_description(breed)
|
| 570 |
|
|
@@ -576,7 +515,7 @@ async def process_single_dog(image):
|
|
| 576 |
"show_back": False
|
| 577 |
}
|
| 578 |
return formatted_description, image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
|
| 579 |
-
|
| 580 |
explanation = (
|
| 581 |
f"The model couldn't confidently identify the breed. Here are the top 3 possible breeds:\n\n"
|
| 582 |
f"1. **{topk_breeds[0]}** ({topk_probs_percent[0]} confidence)\n"
|
|
@@ -595,20 +534,74 @@ async def process_single_dog(image):
|
|
| 595 |
"show_back": True
|
| 596 |
}
|
| 597 |
return explanation, image, buttons[0], buttons[1], buttons[2], gr.update(visible=True), initial_state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
|
| 599 |
-
def show_details(choice, previous_output
|
| 600 |
if not choice:
|
| 601 |
-
return previous_output, gr.update(visible=True)
|
| 602 |
|
| 603 |
try:
|
| 604 |
-
breed = choice.split("More about ")[-1]
|
| 605 |
description = get_dog_description(breed)
|
| 606 |
-
|
| 607 |
-
return formatted_description, gr.update(visible=True), initial_state
|
| 608 |
except Exception as e:
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
return error_msg, gr.update(visible=True), initial_state
|
| 612 |
|
| 613 |
# 介面部分
|
| 614 |
with gr.Blocks() as iface:
|
|
|
|
| 6 |
from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
|
| 7 |
import torch.nn.functional as F
|
| 8 |
from torchvision import transforms
|
| 9 |
+
from PIL import Image, ImageDraw, ImageFont, ImageFilter
|
| 10 |
from data_manager import get_dog_description
|
| 11 |
from urllib.parse import quote
|
| 12 |
from ultralytics import YOLO
|
|
|
|
| 312 |
# return dogs
|
| 313 |
# 此為如果後面調不好 使用的版本
|
| 314 |
|
| 315 |
+
async def detect_multiple_dogs(image, conf_threshold=0.1, iou_threshold=0.3):
|
| 316 |
results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
|
| 317 |
dogs = []
|
| 318 |
for box in results.boxes:
|
|
|
|
| 321 |
confidence = box.conf.item()
|
| 322 |
area = (xyxy[2] - xyxy[0]) * (xyxy[3] - xyxy[1])
|
| 323 |
image_area = image.width * image.height
|
| 324 |
+
if area > 0.005 * image_area: # 降低面積閾值以檢測更多狗
|
| 325 |
cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
|
| 326 |
dogs.append((cropped_image, confidence, xyxy))
|
| 327 |
|
|
|
|
| 334 |
confidence = box.conf.item()
|
| 335 |
area = (xyxy[2] - xyxy[0]) * (xyxy[3] - xyxy[1])
|
| 336 |
image_area = image.width * image.height
|
| 337 |
+
if area > 0.005 * image_area and not is_box_duplicate(xyxy, [d[2] for d in dogs]):
|
| 338 |
cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
|
| 339 |
dogs.append((cropped_image, confidence, xyxy))
|
| 340 |
|
|
|
|
| 347 |
return False
|
| 348 |
|
| 349 |
def calculate_iou(box1, box2):
|
|
|
|
| 350 |
x1 = max(box1[0], box2[0])
|
| 351 |
y1 = max(box1[1], box2[1])
|
| 352 |
x2 = min(box1[2], box2[2])
|
|
|
|
| 472 |
# if __name__ == "__main__":
|
| 473 |
# iface.launch()
|
| 474 |
|
|
|
|
| 475 |
async def predict(image):
|
| 476 |
if image is None:
|
| 477 |
return "Please upload an image to start.", None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
|
|
|
|
| 480 |
if isinstance(image, np.ndarray):
|
| 481 |
image = Image.fromarray(image)
|
| 482 |
|
| 483 |
+
# 首先使用YOLO檢測是否有多隻狗
|
| 484 |
+
dogs = await detect_multiple_dogs(image)
|
| 485 |
+
|
| 486 |
+
if len(dogs) <= 1:
|
| 487 |
+
# 單狗情境或沒有檢測到狗,使用整張圖片進行預測
|
| 488 |
return await process_single_dog(image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
else:
|
| 490 |
+
# 多狗情境
|
| 491 |
+
return await process_multiple_dogs(image, dogs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
|
| 493 |
except Exception as e:
|
| 494 |
error_msg = f"An error occurred: {str(e)}"
|
|
|
|
| 496 |
return error_msg, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
|
| 497 |
|
| 498 |
def has_multiple_dogs(image):
|
|
|
|
|
|
|
| 499 |
gray = image.convert('L')
|
| 500 |
edges = gray.filter(ImageFilter.FIND_EDGES)
|
| 501 |
edge_pixels = np.array(edges)
|
| 502 |
+
return np.sum(edge_pixels > 128) > image.width * image.height * 0.1
|
| 503 |
|
| 504 |
async def process_single_dog(image):
|
| 505 |
top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(image)
|
| 506 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
breed = topk_breeds[0]
|
| 508 |
description = get_dog_description(breed)
|
| 509 |
|
|
|
|
| 515 |
"show_back": False
|
| 516 |
}
|
| 517 |
return formatted_description, image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
|
| 518 |
+
elif top1_prob >= 0.2:
|
| 519 |
explanation = (
|
| 520 |
f"The model couldn't confidently identify the breed. Here are the top 3 possible breeds:\n\n"
|
| 521 |
f"1. **{topk_breeds[0]}** ({topk_probs_percent[0]} confidence)\n"
|
|
|
|
| 534 |
"show_back": True
|
| 535 |
}
|
| 536 |
return explanation, image, buttons[0], buttons[1], buttons[2], gr.update(visible=True), initial_state
|
| 537 |
+
else:
|
| 538 |
+
initial_state = {
|
| 539 |
+
"explanation": "The image is unclear or the breed is not in the dataset. Please upload a clearer image of a dog.",
|
| 540 |
+
"buttons": [],
|
| 541 |
+
"show_back": False
|
| 542 |
+
}
|
| 543 |
+
return initial_state["explanation"], None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
|
| 544 |
+
|
| 545 |
+
async def process_multiple_dogs(image, dogs):
|
| 546 |
+
color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
|
| 547 |
+
explanations = []
|
| 548 |
+
buttons = []
|
| 549 |
+
annotated_image = image.copy()
|
| 550 |
+
draw = ImageDraw.Draw(annotated_image)
|
| 551 |
+
font = ImageFont.load_default()
|
| 552 |
+
|
| 553 |
+
for i, (cropped_image, _, box) in enumerate(dogs):
|
| 554 |
+
top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
|
| 555 |
+
color = color_list[i % len(color_list)]
|
| 556 |
+
draw.rectangle(box, outline=color, width=3)
|
| 557 |
+
draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
|
| 558 |
+
|
| 559 |
+
breed = topk_breeds[0]
|
| 560 |
+
if top1_prob >= 0.5:
|
| 561 |
+
description = get_dog_description(breed)
|
| 562 |
+
formatted_description = format_description(description, breed)
|
| 563 |
+
explanations.append(f"Dog {i+1}: {formatted_description}")
|
| 564 |
+
elif top1_prob >= 0.2:
|
| 565 |
+
dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
|
| 566 |
+
dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
|
| 567 |
+
explanations.append(dog_explanation)
|
| 568 |
+
buttons.extend([gr.update(visible=True, value=f"Dog {i+1}: More about {breed}") for breed in topk_breeds[:3]])
|
| 569 |
+
else:
|
| 570 |
+
explanations.append(f"Dog {i+1}: The image is unclear or the breed is not in the dataset.")
|
| 571 |
+
|
| 572 |
+
final_explanation = "\n\n".join(explanations)
|
| 573 |
+
if buttons:
|
| 574 |
+
final_explanation += "\n\nClick on a button to view more information about the breed."
|
| 575 |
+
initial_state = {
|
| 576 |
+
"explanation": final_explanation,
|
| 577 |
+
"buttons": buttons,
|
| 578 |
+
"show_back": True
|
| 579 |
+
}
|
| 580 |
+
return (final_explanation, annotated_image,
|
| 581 |
+
buttons[0] if len(buttons) > 0 else gr.update(visible=False),
|
| 582 |
+
buttons[1] if len(buttons) > 1 else gr.update(visible=False),
|
| 583 |
+
buttons[2] if len(buttons) > 2 else gr.update(visible=False),
|
| 584 |
+
gr.update(visible=True),
|
| 585 |
+
initial_state)
|
| 586 |
+
else:
|
| 587 |
+
initial_state = {
|
| 588 |
+
"explanation": final_explanation,
|
| 589 |
+
"buttons": [],
|
| 590 |
+
"show_back": False
|
| 591 |
+
}
|
| 592 |
+
return final_explanation, annotated_image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
|
| 593 |
|
| 594 |
+
def show_details(choice, previous_output):
|
| 595 |
if not choice:
|
| 596 |
+
return previous_output, gr.update(visible=True)
|
| 597 |
|
| 598 |
try:
|
| 599 |
+
breed = choice.split("More about ")[-1].split(": ")[-1]
|
| 600 |
description = get_dog_description(breed)
|
| 601 |
+
return format_description(description, breed), gr.update(visible=True)
|
|
|
|
| 602 |
except Exception as e:
|
| 603 |
+
return f"An error occurred while showing details: {e}", gr.update(visible=True)
|
| 604 |
+
|
|
|
|
| 605 |
|
| 606 |
# 介面部分
|
| 607 |
with gr.Blocks() as iface:
|