Spaces:

rdesai2
/

FaceEnhance

Paused

App Files Files Community

Rishi Desai commited on Apr 16

Commit

8308bbd

1 Parent(s): 632672e

init dump

Browse files

Files changed (3) hide show

.env +0 -0
main.py +43 -0
utils.py +190 -0

.env ADDED Viewed

File without changes

main.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import argparse
+import os
+from utils import crop_face, upscale_image
+def parse_args():
+    parser = argparse.ArgumentParser(description='Face Enhancement Tool')
+    parser.add_argument('--input', type=str, required=True, help='Path to the input image')
+    parser.add_argument('--crop', action='store_true', help='Whether to crop the image')
+    parser.add_argument('--upscale', action='store_true', help='Whether to upscale the image')
+    parser.add_argument('--output', type=str, required=True, help='Path to save the output image')
+    args = parser.parse_args()
+    # Validate input file exists
+    if not os.path.exists(args.input):
+        parser.error(f"Input file does not exist: {args.input}")
+    # Validate output directory exists
+    output_dir = os.path.dirname(args.output)
+    if output_dir and not os.path.exists(output_dir):
+        parser.error(f"Output directory does not exist: {output_dir}")
+    return args
+def main():
+    args = parse_args()
+    print(f"Processing image: {args.input}")
+    print(f"Crop enabled: {args.crop}")
+    print(f"Upscale enabled: {args.upscale}")
+    print(f"Output will be saved to: {args.output}")
+    face_image = args.input
+    if args.crop:
+        crop_face(args.input, "./scratch/cropped_face.png")
+        face_image = "./scratch/cropped_face.png"
+    if args.upscale:
+        upscale_image(face_image, "./scratch/upscaled_face.png")
+        face_image = "./scratch/upscaled_face.png"
+if __name__ == "__main__":
+    main()

utils.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import os
+import torch
+import numpy as np
+from PIL import Image
+import sys
+import cv2
+import base64
+import aiohttp
+from fal import Client as FalClient
+sys.path.append('./ComfyUI_AutoCropFaces')
+from dotenv import load_dotenv
+load_dotenv()
+from Pytorch_Retinaface.pytorch_retinaface import Pytorch_RetinaFace
+from transformers import AutoProcessor, AutoModelForCausalLM
+from transformers import CLIPProcessor, CLIPModel
+import gc
+CACHE_DIR = '/workspace/huggingface_cache'
+os.environ["HF_HOME"] = CACHE_DIR
+os.makedirs(CACHE_DIR, exist_ok=True)
+device = "cuda"
+def clear_cuda_memory():
+    """Aggressively clear CUDA memory"""
+    gc.collect()
+    torch.cuda.empty_cache()
+    torch.cuda.synchronize()
+def load_vision_models():
+    print("Loading CLIP and Florence models...")
+    # Load CLIP
+    clip_model = CLIPModel.from_pretrained(
+        "openai/clip-vit-large-patch14",
+        cache_dir=CACHE_DIR
+    ).to(device)
+    clip_processor = CLIPProcessor.from_pretrained(
+        "openai/clip-vit-large-patch14",
+        cache_dir=CACHE_DIR
+    )
+    # Load Florence
+    florence_model = AutoModelForCausalLM.from_pretrained(
+        "microsoft/Florence-2-large",
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        trust_remote_code=True,
+        cache_dir=CACHE_DIR
+    ).to(device)
+    florence_processor = AutoProcessor.from_pretrained(
+        "microsoft/Florence-2-large",
+        trust_remote_code=True,
+        cache_dir=CACHE_DIR
+    )
+    return {
+        'clip_model': clip_model,
+        'clip_processor': clip_processor,
+        'florence_model': florence_model,
+        'florence_processor': florence_processor,
+    }
+def generate_caption(image):
+    vision_models = load_vision_models()
+    # Ensure the image is a PIL Image
+    if not isinstance(image, Image.Image):
+        image = Image.fromarray(image)
+    # Convert the image to RGB if it has an alpha channel
+    if image.mode == 'RGBA':
+        image = image.convert('RGB')
+    prompt = "<DETAILED_CAPTION>"
+    inputs = vision_models['florence_processor'](
+        text=prompt,
+        images=image,
+        return_tensors="pt"
+    ).to(device, torch.float16 if torch.cuda.is_available() else torch.float32)
+    generated_ids = vision_models['florence_model'].generate(
+        input_ids=inputs["input_ids"],
+        pixel_values=inputs["pixel_values"],
+        max_new_tokens=1024,
+        num_beams=3,
+        do_sample=False
+    )
+    generated_text = vision_models['florence_processor'].batch_decode(generated_ids, skip_special_tokens=True)[0]
+    parsed_answer = vision_models['florence_processor'].post_process_generation(
+        generated_text, task="<DETAILED_CAPTION>",
+        image_size=(image.width, image.height)
+    )
+    clear_cuda_memory()
+    return parsed_answer['<DETAILED_CAPTION>']
+def crop_face(image_path, output_dir, output_name, scale_factor=4.0):
+    image = Image.open(image_path).convert("RGB")
+    img_raw = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    img_raw = img_raw.astype(np.float32)
+    rf = Pytorch_RetinaFace(
+        cfg='mobile0.25',
+        pretrained_path='./weights/mobilenet0.25_Final.pth',
+        confidence_threshold=0.02,
+        nms_threshold=0.4,
+        vis_thres=0.6
+    )
+    dets = rf.detect_faces(img_raw)
+    print("Dets: ", dets)
+    # Instead of asserting, handle multiple faces gracefully
+    if len(dets) == 0:
+        print("No faces detected!")
+        return False
+    # If multiple faces detected, use the one with highest confidence
+    if len(dets) > 1:
+        print(f"Warning: {len(dets)} faces detected, using the one with highest confidence")
+        # Assuming dets is a list of [bbox, landmark, score] and we want to sort by score
+        dets = sorted(dets, key=lambda x: x[2], reverse=True)  # Sort by confidence score
+        # Just keep the highest confidence detection
+        dets = [dets[0]]
+    # Pass the scale_factor to center_and_crop_rescale for adjustable crop size
+    try:
+        # Unpack the tuple correctly - the function returns (cropped_imgs, bbox_infos)
+        cropped_imgs, bbox_infos = rf.center_and_crop_rescale(img_raw, dets, shift_factor=0.45, scale_factor=scale_factor)
+        # Check if we got any cropped images
+        if not cropped_imgs or len(cropped_imgs) == 0:
+            print("No cropped images returned")
+            return False
+        # Use the first cropped face image directly - it's not nested
+        img_to_save = cropped_imgs[0]
+        os.makedirs(output_dir, exist_ok=True)
+        cv2.imwrite(os.path.join(output_dir, output_name), img_to_save)
+        print(f"Saved: {output_name}")
+        return True
+    except Exception as e:
+        print(f"Error during face cropping: {e}")
+        return False
+async def upscale_image(image_path, output_path):
+    """Upscale an image using fal.ai's RealESRGAN model"""
+    fal_client = FalClient()
+    # Read and encode the image
+    with open(image_path, "rb") as image_file:
+        encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
+        data_uri = f"data:image/jpeg;base64,{encoded_image}"
+    try:
+        # Submit the upscaling request
+        handler = await fal_client.submit_async(
+            "fal-ai/real-esrgan",
+            arguments={
+                "image_url": data_uri,
+                "scale": 2,
+                "model": "RealESRGAN_x4plus",
+                "output_format": "png",
+                "face": True
+            },
+        )
+        result = await handler.get()
+        # Download and save the upscaled image
+        image_url = result['image_url']
+        async with aiohttp.ClientSession() as session:
+            async with session.get(image_url) as response:
+                if response.status == 200:
+                    with open(output_path, 'wb') as f:
+                        f.write(await response.read())
+                    return True
+                else:
+                    print(f"Failed to download upscaled image: {response.status}")
+                    return False
+    except Exception as e:
+        print(f"Error during upscaling: {e}")
+        return False