Source code for akida_models.centernet.centernet_processing

#!/usr/bin/env python
# ******************************************************************************
# Copyright 2023 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""
Processing tools for CenterNet data handling.
"""

__all__ = ["decode_output"]

import numpy as np
from tensorflow.nn import max_pool2d

from ..detection.processing import BoundingBox


[docs] def decode_output(output, nb_classes, obj_threshold=0.1, max_detections=100, kernel=5): """ Decodes a CenterNet model. Args: output (tf.Tensor): model output to decode. nb_classes (int): number of classes. obj_threshold (float, optional): confidence threshold for a box. Defaults to 0.1. max_detection (int, optional): maximum number of boxes the model is allowed to produce. Defaults to 100. kernel (int, optional): max pool kernel size. Defaults to 5. Returns: List: `BoundingBox` objects """ def _sigmoid(x): return 1. / (1. + np.exp(-x)) grid_h, grid_w = output.shape[:2] # Decode the output of the network center_heatmap_pred = _sigmoid(output[..., :nb_classes]) wh_pred = output[..., nb_classes:nb_classes + 2] offset_pred = output[..., nb_classes + 2:nb_classes + 4] # Get local maximum hmax = max_pool2d(center_heatmap_pred[None, ...], ksize=[kernel, kernel], strides=1, padding='SAME', data_format='NHWC') center_heatmap_pred[hmax[0] != center_heatmap_pred] = 0 # Get top k from the heatmap perm_center_heatmap = np.transpose(center_heatmap_pred, (2, 0, 1)) flattened_heatmap = np.reshape(perm_center_heatmap, (-1)) topk_scores = np.partition(flattened_heatmap, -max_detections)[-max_detections:] topk_scores = np.flip(np.sort(topk_scores)) topk_inds = np.argpartition(flattened_heatmap, -max_detections)[-max_detections:] topk_inds = topk_inds[np.argsort(flattened_heatmap[topk_inds])][::-1] topk_labels = topk_inds // (grid_h * grid_w) topk_inds = topk_inds % (grid_h * grid_w) topk_ys = topk_inds // grid_h topk_xs = topk_inds % grid_w # Transpose and gather features for the WH and OFFSET. # Removed the transpose as we don't do it above either wh_pred = np.reshape(wh_pred, [-1, wh_pred.shape[-1]]) wh = wh_pred[topk_inds, ...] offset_pred = np.reshape(offset_pred, [-1, offset_pred.shape[-1]]) offset = offset_pred[topk_inds, ...] # The output should be x,y,w,h topk_xs = topk_xs + offset[..., 0] topk_ys = topk_ys + offset[..., 1] tl_x = np.clip((topk_xs - wh[..., 0] / 2) / grid_w, a_min=0, a_max=grid_w) tl_y = np.clip((topk_ys - wh[..., 1] / 2) / grid_h, a_min=0, a_max=grid_h) br_x = np.clip((topk_xs + wh[..., 0] / 2) / grid_w, a_min=0, a_max=grid_w) br_y = np.clip((topk_ys + wh[..., 1] / 2) / grid_h, a_min=0, a_max=grid_h) boxes = [] for i in range(max_detections): score = topk_scores[i] if score > obj_threshold: label = topk_labels[i] box = BoundingBox(tl_x[i], tl_y[i], br_x[i], br_y[i], score=score) box.label = label boxes.append(box) else: break return boxes