Spaces:

Bordoglor
/

Ramzes

Configuration error

File size: 7,308 Bytes

302920f

# Copyright 2023-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# The implementation is based on "Parameter-Efficient Orthogonal Finetuning
# via Butterfly Factorization" (https://huggingface.co/papers/2311.06243) in ICLR 2024.

import glob
import os
from pathlib import Path

import cv2
import face_alignment
import numpy as np
import torch
from accelerate import Accelerator
from skimage.io import imread
from torchvision.utils import save_image
from tqdm import tqdm
from transformers import AutoTokenizer
from utils.args_loader import parse_args
from utils.dataset import make_dataset


# Determine the best available device
if torch.cuda.is_available():
    device = "cuda:0"
else:
    # TODO: xpu support in facealignment will be ready after this PR is merged:https://github.com/1adrianb/face-alignment/pull/371
    device = "cpu"

detect_model = face_alignment.FaceAlignment(face_alignment.LandmarksType.TWO_D, device=device, flip_input=False)
# with open('./data/celebhq-text/prompt_val_blip_full.json', 'rt') as f:    # fill50k, COCO
#     for line in f:
#         val_data = json.loads(line)

end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1


def count_txt_files(directory):
    pattern = os.path.join(directory, "*.txt")
    txt_files = glob.glob(pattern)
    return len(txt_files)


def plot_kpts(image, kpts, color="g"):
    """Draw 68 key points
    Args:
        image: the input image
        kpt: (68, 3).
    """
    if color == "r":
        c = (255, 0, 0)
    elif color == "g":
        c = (0, 255, 0)
    elif color == "b":
        c = (255, 0, 0)
    image = image.copy()
    kpts = kpts.copy()
    radius = max(int(min(image.shape[0], image.shape[1]) / 200), 1)
    for i in range(kpts.shape[0]):
        st = kpts[i, :2]
        if kpts.shape[1] == 4:
            if kpts[i, 3] > 0.5:
                c = (0, 255, 0)
            else:
                c = (0, 0, 255)
        image = cv2.circle(image, (int(st[0]), int(st[1])), radius, c, radius * 2)
        if i in end_list:
            continue
        ed = kpts[i + 1, :2]
        image = cv2.line(image, (int(st[0]), int(st[1])), (int(ed[0]), int(ed[1])), (255, 255, 255), radius)
    return image


def generate_landmark2d(dataset, input_dir, pred_lmk_dir, gt_lmk_dir, vis=False):
    print("Generate 2d landmarks ...")
    os.makedirs(pred_lmk_dir, exist_ok=True)

    imagepath_list = sorted(glob.glob(f"{input_dir}/pred*.png"))

    for imagepath in tqdm(imagepath_list):
        name = Path(imagepath).stem
        idx = int(name.split("_")[-1])
        pred_txt_path = os.path.join(pred_lmk_dir, f"{idx}.txt")
        gt_lmk_path = os.path.join(gt_lmk_dir, f"{idx}_gt_lmk.jpg")
        gt_txt_path = os.path.join(gt_lmk_dir, f"{idx}.txt")
        gt_img_path = os.path.join(gt_lmk_dir, f"{idx}_gt_img.jpg")

        if (not os.path.exists(pred_txt_path)) or (not os.path.exists(gt_txt_path)):
            image = imread(imagepath)  # [:, :, :3]
            out = detect_model.get_landmarks(image)
            if out is None:
                continue

            pred_kpt = out[0].squeeze()
            np.savetxt(pred_txt_path, pred_kpt)

            # Your existing code for obtaining the image tensor
            gt_lmk_img = dataset[idx]["conditioning_pixel_values"]
            save_image(gt_lmk_img, gt_lmk_path)

            gt_img = (dataset[idx]["pixel_values"]) * 0.5 + 0.5
            save_image(gt_img, gt_img_path)

            gt_img = (gt_img.permute(1, 2, 0) * 255).type(torch.uint8).cpu().numpy()
            out = detect_model.get_landmarks(gt_img)
            if out is None:
                continue

            gt_kpt = out[0].squeeze()
            np.savetxt(gt_txt_path, gt_kpt)
            # gt_image = cv2.resize(cv2.imread(gt_lmk_path), (512, 512))

            if vis:
                gt_lmk_image = cv2.imread(gt_lmk_path)

                # visualize predicted landmarks
                vis_path = os.path.join(pred_lmk_dir, f"{idx}_overlay.jpg")
                image = cv2.imread(imagepath)
                image_point = plot_kpts(image, pred_kpt)
                cv2.imwrite(vis_path, np.concatenate([image_point, gt_lmk_image], axis=1))

                # visualize gt landmarks
                vis_path = os.path.join(gt_lmk_dir, f"{idx}_overlay.jpg")
                image = cv2.imread(gt_img_path)
                image_point = plot_kpts(image, gt_kpt)
                cv2.imwrite(vis_path, np.concatenate([image_point, gt_lmk_image], axis=1))


def landmark_comparison(val_dataset, lmk_dir, gt_lmk_dir):
    print("Calculating reprojection error")
    lmk_err = []

    pbar = tqdm(range(len(val_dataset)))
    for i in pbar:
        # line = val_dataset[i]
        # img_name = line["image"].split(".")[0]
        lmk1_path = os.path.join(gt_lmk_dir, f"{i}.txt")
        lmk1 = np.loadtxt(lmk1_path)
        lmk2_path = os.path.join(lmk_dir, f"{i}.txt")

        if not os.path.exists(lmk2_path):
            print(f"{lmk2_path} not exist")
            continue

        lmk2 = np.loadtxt(lmk2_path)
        lmk_err.append(np.mean(np.linalg.norm(lmk1 - lmk2, axis=1)))
        pbar.set_description(f"lmk_err: {np.mean(lmk_err):.5f}")

    print("Reprojection error:", np.mean(lmk_err))
    np.save(os.path.join(lmk_dir, "lmk_err.npy"), lmk_err)


def main(args):
    logging_dir = Path(args.output_dir, args.logging_dir)

    accelerator = Accelerator(
        gradient_accumulation_steps=args.gradient_accumulation_steps,
        mixed_precision=args.mixed_precision,
        log_with=args.report_to,
        project_dir=logging_dir,
    )

    # Load the tokenizer
    if args.tokenizer_name:
        tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, revision=args.revision, use_fast=False)
    elif args.pretrained_model_name_or_path:
        tokenizer = AutoTokenizer.from_pretrained(
            args.pretrained_model_name_or_path,
            subfolder="tokenizer",
            revision=args.revision,
            use_fast=False,
        )

    val_dataset = make_dataset(args, tokenizer, accelerator, "test")

    gt_lmk_dir = os.path.join(args.output_dir, "gt_lmk")
    if not os.path.exists(gt_lmk_dir):
        os.makedirs(gt_lmk_dir, exist_ok=True)

    pred_lmk_dir = os.path.join(args.output_dir, "pred_lmk")
    if not os.path.exists(pred_lmk_dir):
        os.makedirs(pred_lmk_dir, exist_ok=True)

    input_dir = os.path.join(args.output_dir, "results")

    generate_landmark2d(val_dataset, input_dir, pred_lmk_dir, gt_lmk_dir, args.vis_overlays)

    if count_txt_files(pred_lmk_dir) == len(val_dataset) and count_txt_files(gt_lmk_dir) == len(val_dataset):
        landmark_comparison(val_dataset, pred_lmk_dir, gt_lmk_dir)


if __name__ == "__main__":
    args = parse_args()
    main(args)