nvidia
/

nemoretriever-page-elements-v3

Object Detection

Model card Files Files and versions

nemoretriever-page-elements-v3 / yolox /boxes.py

BoLiu's picture

update SPDX

5facae9 14 days ago

history blame contribute delete

2.15 kB

	#!/usr/bin/env python3

	# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: Apache-2.0

	# Copyright (c) Megvii Inc. All rights reserved.

	import torch
	import torchvision


	def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
	"""
	Copied from YOLOX/yolox/utils/boxes.py
	"""
	box_corner = prediction.new(prediction.shape)
	box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
	box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
	box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
	box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
	prediction[:, :, :4] = box_corner[:, :, :4]

	output = [None for _ in range(len(prediction))]
	for i, image_pred in enumerate(prediction):

	# If none are remaining => process next image
	if not image_pred.size(0):
	continue
	# Get score and class with highest confidence
	class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)

	conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
	# Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
	detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
	detections = detections[conf_mask]
	if not detections.size(0):
	continue

	if class_agnostic:
	nms_out_index = torchvision.ops.nms(
	detections[:, :4],
	detections[:, 4] * detections[:, 5],
	nms_thre,
	)
	else:
	nms_out_index = torchvision.ops.batched_nms(
	detections[:, :4],
	detections[:, 4] * detections[:, 5],
	detections[:, 6],
	nms_thre,
	)

	detections = detections[nms_out_index]
	if output[i] is None:
	output[i] = detections
	else:
	output[i] = torch.cat((output[i], detections))

	return output