Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -503,9 +503,6 @@ import traceback
|
|
| 503 |
# iface.launch()
|
| 504 |
|
| 505 |
|
| 506 |
-
model_yolo = YOLO('yolov8l.pt')
|
| 507 |
-
|
| 508 |
-
history_manager = UserHistoryManager()
|
| 509 |
|
| 510 |
dog_breeds = ["Afghan_Hound", "African_Hunting_Dog", "Airedale", "American_Staffordshire_Terrier",
|
| 511 |
"Appenzeller", "Australian_Terrier", "Bedlington_Terrier", "Bernese_Mountain_Dog", "Bichon_Frise",
|
|
@@ -537,6 +534,8 @@ dog_breeds = ["Afghan_Hound", "African_Hunting_Dog", "Airedale", "American_Staff
|
|
| 537 |
|
| 538 |
device_mgr = DeviceManager()
|
| 539 |
|
|
|
|
|
|
|
| 540 |
class MultiHeadAttention(nn.Module):
|
| 541 |
|
| 542 |
def __init__(self, in_dim, num_heads=8):
|
|
@@ -597,15 +596,18 @@ num_classes = len(dog_breeds)
|
|
| 597 |
|
| 598 |
# Initialize base model
|
| 599 |
model = BaseModel(num_classes=num_classes)
|
| 600 |
-
|
| 601 |
# Load model path
|
| 602 |
model_path = '124_best_model_dog.pth'
|
| 603 |
-
checkpoint = torch.load(model_path, map_location=device_mgr.
|
| 604 |
|
| 605 |
# Load model state
|
| 606 |
model.load_state_dict(checkpoint['base_model'], strict=False)
|
| 607 |
model.eval()
|
| 608 |
|
|
|
|
|
|
|
|
|
|
| 609 |
# Image preprocessing function
|
| 610 |
def preprocess_image(image):
|
| 611 |
# If the image is numpy.ndarray turn into PIL.Image
|
|
@@ -621,74 +623,59 @@ def preprocess_image(image):
|
|
| 621 |
|
| 622 |
return transform(image).unsqueeze(0)
|
| 623 |
|
|
|
|
|
|
|
| 624 |
async def predict_single_dog(image):
|
| 625 |
-
"""
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
|
|
|
|
|
|
|
|
|
| 630 |
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
probs = F.softmax(logits, dim=1)
|
| 635 |
-
|
| 636 |
-
top5_prob, top5_idx = torch.topk(probs, k=5)
|
| 637 |
-
breeds = [dog_breeds[idx.item()] for idx in top5_idx[0]]
|
| 638 |
-
probabilities = [prob.item() for prob in top5_prob[0]]
|
| 639 |
-
|
| 640 |
-
sum_probs = sum(probabilities[:3])
|
| 641 |
-
relative_probs = [f"{(prob/sum_probs * 100):.2f}%" for prob in probabilities[:3]]
|
| 642 |
-
|
| 643 |
-
print("\nClassifier Predictions:")
|
| 644 |
-
for breed, prob in zip(breeds[:5], probabilities[:5]):
|
| 645 |
-
print(f"{breed}: {prob:.4f}")
|
| 646 |
-
|
| 647 |
-
return probabilities[0], breeds[:3], relative_probs
|
| 648 |
-
|
| 649 |
-
except RuntimeError as e:
|
| 650 |
-
if "out of memory" in str(e):
|
| 651 |
-
logger.warning("GPU memory exceeded, falling back to CPU")
|
| 652 |
-
device_mgr._current_device = torch.device('cpu')
|
| 653 |
-
return await predict_single_dog(image)
|
| 654 |
-
raise e
|
| 655 |
|
|
|
|
|
|
|
| 656 |
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
for box in results.boxes:
|
| 663 |
-
if box.cls == 16: # COCO dataset class for dog is 16
|
| 664 |
-
xyxy = box.xyxy[0].tolist()
|
| 665 |
-
confidence = box.conf.item()
|
| 666 |
-
boxes.append((xyxy, confidence))
|
| 667 |
-
|
| 668 |
-
if not boxes:
|
| 669 |
-
dogs.append((image, 1.0, [0, 0, image.width, image.height]))
|
| 670 |
-
else:
|
| 671 |
-
nms_boxes = non_max_suppression(boxes, iou_threshold)
|
| 672 |
-
|
| 673 |
-
for box, confidence in nms_boxes:
|
| 674 |
-
x1, y1, x2, y2 = box
|
| 675 |
-
w, h = x2 - x1, y2 - y1
|
| 676 |
-
x1 = max(0, x1 - w * 0.05)
|
| 677 |
-
y1 = max(0, y1 - h * 0.05)
|
| 678 |
-
x2 = min(image.width, x2 + w * 0.05)
|
| 679 |
-
y2 = min(image.height, y2 + h * 0.05)
|
| 680 |
-
cropped_image = image.crop((x1, y1, x2, y2))
|
| 681 |
-
dogs.append((cropped_image, confidence, [x1, y1, x2, y2]))
|
| 682 |
-
|
| 683 |
-
return dogs
|
| 684 |
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 692 |
|
| 693 |
|
| 694 |
def non_max_suppression(boxes, iou_threshold):
|
|
|
|
| 503 |
# iface.launch()
|
| 504 |
|
| 505 |
|
|
|
|
|
|
|
|
|
|
| 506 |
|
| 507 |
dog_breeds = ["Afghan_Hound", "African_Hunting_Dog", "Airedale", "American_Staffordshire_Terrier",
|
| 508 |
"Appenzeller", "Australian_Terrier", "Bedlington_Terrier", "Bernese_Mountain_Dog", "Bichon_Frise",
|
|
|
|
| 534 |
|
| 535 |
device_mgr = DeviceManager()
|
| 536 |
|
| 537 |
+
history_manager = UserHistoryManager()
|
| 538 |
+
|
| 539 |
class MultiHeadAttention(nn.Module):
|
| 540 |
|
| 541 |
def __init__(self, in_dim, num_heads=8):
|
|
|
|
| 596 |
|
| 597 |
# Initialize base model
|
| 598 |
model = BaseModel(num_classes=num_classes)
|
| 599 |
+
model = device_mgr.to_device(model)
|
| 600 |
# Load model path
|
| 601 |
model_path = '124_best_model_dog.pth'
|
| 602 |
+
checkpoint = torch.load(model_path, map_location=device_mgr.get_device(), weights_only=True)
|
| 603 |
|
| 604 |
# Load model state
|
| 605 |
model.load_state_dict(checkpoint['base_model'], strict=False)
|
| 606 |
model.eval()
|
| 607 |
|
| 608 |
+
model_yolo = YOLO('yolov8l.pt')
|
| 609 |
+
model_yolo = device_mgr.to_device(model_yolo)
|
| 610 |
+
|
| 611 |
# Image preprocessing function
|
| 612 |
def preprocess_image(image):
|
| 613 |
# If the image is numpy.ndarray turn into PIL.Image
|
|
|
|
| 623 |
|
| 624 |
return transform(image).unsqueeze(0)
|
| 625 |
|
| 626 |
+
|
| 627 |
+
@adaptive_gpu(duration=30)
|
| 628 |
async def predict_single_dog(image):
|
| 629 |
+
"""單獨的狗預測函數"""
|
| 630 |
+
image_tensor = preprocess_image(image)
|
| 631 |
+
image_tensor = device_mgr.to_device(image_tensor)
|
| 632 |
+
|
| 633 |
+
with torch.no_grad():
|
| 634 |
+
outputs = model(image_tensor)
|
| 635 |
+
logits = outputs[0] if isinstance(outputs, tuple) else outputs
|
| 636 |
+
probs = F.softmax(logits, dim=1)
|
| 637 |
|
| 638 |
+
top5_prob, top5_idx = torch.topk(probs, k=5)
|
| 639 |
+
breeds = [dog_breeds[idx.item()] for idx in top5_idx[0]]
|
| 640 |
+
probabilities = [prob.item() for prob in top5_prob[0]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
|
| 642 |
+
sum_probs = sum(probabilities[:3])
|
| 643 |
+
relative_probs = [f"{(prob/sum_probs * 100):.2f}%" for prob in probabilities[:3]]
|
| 644 |
|
| 645 |
+
print("\nClassifier Predictions:")
|
| 646 |
+
for breed, prob in zip(breeds[:5], probabilities[:5]):
|
| 647 |
+
print(f"{breed}: {prob:.4f}")
|
| 648 |
+
|
| 649 |
+
return probabilities[0], breeds[:3], relative_probs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
|
| 651 |
+
@adaptive_gpu(duration=30)
|
| 652 |
+
async def detect_multiple_dogs(image, conf_threshold=0.3, iou_threshold=0.55):
|
| 653 |
+
"""複數狗預測函數"""
|
| 654 |
+
results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
|
| 655 |
+
dogs = []
|
| 656 |
+
boxes = []
|
| 657 |
+
for box in results.boxes:
|
| 658 |
+
if box.cls == 16: # COCO dataset class for dog is 16
|
| 659 |
+
xyxy = box.xyxy[0].tolist()
|
| 660 |
+
confidence = box.conf.item()
|
| 661 |
+
boxes.append((xyxy, confidence))
|
| 662 |
+
|
| 663 |
+
if not boxes:
|
| 664 |
+
dogs.append((image, 1.0, [0, 0, image.width, image.height]))
|
| 665 |
+
else:
|
| 666 |
+
nms_boxes = non_max_suppression(boxes, iou_threshold)
|
| 667 |
+
|
| 668 |
+
for box, confidence in nms_boxes:
|
| 669 |
+
x1, y1, x2, y2 = box
|
| 670 |
+
w, h = x2 - x1, y2 - y1
|
| 671 |
+
x1 = max(0, x1 - w * 0.05)
|
| 672 |
+
y1 = max(0, y1 - h * 0.05)
|
| 673 |
+
x2 = min(image.width, x2 + w * 0.05)
|
| 674 |
+
y2 = min(image.height, y2 + h * 0.05)
|
| 675 |
+
cropped_image = image.crop((x1, y1, x2, y2))
|
| 676 |
+
dogs.append((cropped_image, confidence, [x1, y1, x2, y2]))
|
| 677 |
+
|
| 678 |
+
return dogs
|
| 679 |
|
| 680 |
|
| 681 |
def non_max_suppression(boxes, iou_threshold):
|