JointTaggerProject-Inference-Beta-AttnVis

Sleeping

drhead commited on May 1

Commit

ff9a3d4

verified ·

1 Parent(s): 3b3f560

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,6 +11,8 @@ from torchvision.transforms import transforms
 from torchvision.transforms import InterpolationMode
 import torchvision.transforms.functional as TF
 from huggingface_hub import hf_hub_download
 class Fit(torch.nn.Module):
     def __init__(
@@ -198,7 +200,8 @@ def hook_forward(module, input, output):
 def hook_backward(module, grad_in, grad_out):
     gradients['value'] = grad_out[0]
-def cam_inference(target_tag, threshold):
     print(f"target_tag: {target_tag}")
     global input_image, sorted_tag_score, target_tag_index, gradients, activations
     img = input_image
@@ -268,7 +271,7 @@ def create_cam_visualization_pil(cam, alpha=0.6, vis_threshold=0.2):
     w, h = image_pil.size
     # Resize CAM to match image
-    cam_resized = np.array(Image.fromarray(cam).resize((w, h), resample=Image.BILINEAR))
     # Normalize CAM to [0, 1]
     cam_norm = (cam_resized - cam_resized.min()) / (cam_resized.ptp() + 1e-8)
@@ -335,7 +338,7 @@ with gr.Blocks(css=".output-class { display: none; }") as demo:
     label_box.select(
         fn=cam_inference,
-        inputs=[label_box, threshold_slider],
         outputs=[image_input]
     )

 from torchvision.transforms import InterpolationMode
 import torchvision.transforms.functional as TF
 from huggingface_hub import hf_hub_download
+import numpy as np
+import matplotlib.cm as cm
 class Fit(torch.nn.Module):
     def __init__(
 def hook_backward(module, grad_in, grad_out):
     gradients['value'] = grad_out[0]
+def cam_inference(threshold, evt: gr.SelectData):
+    target_tag = evt.value
     print(f"target_tag: {target_tag}")
     global input_image, sorted_tag_score, target_tag_index, gradients, activations
     img = input_image
     w, h = image_pil.size
     # Resize CAM to match image
+    cam_resized = np.array(Image.fromarray(cam).resize((w, h), resample=Image.Resampling.BILINEAR))
     # Normalize CAM to [0, 1]
     cam_norm = (cam_resized - cam_resized.min()) / (cam_resized.ptp() + 1e-8)
     label_box.select(
         fn=cam_inference,
+        inputs=[threshold_slider],
         outputs=[image_input]
     )