Source code for quantizeml.layers.output_observer

#!/usr/bin/env python
# ******************************************************************************
# Copyright 2023 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************

__all__ = ["OutputObserver"]

import tensorflow as tf

from keras.layers import Layer


[docs]@tf.keras.utils.register_keras_serializable() class OutputObserver(Layer): """ Calibration layer. This layer is used to compute the future `range_max` of the equivalent OutputQuantizer in the quantized model. It is placed where the OutputQuantizer will be inserted (end of blocks) and accumulates the observed maximum values (with momentum) for input in the float model. Args: axis (str): the quantization range is a scalar ('per-tensor') or a vector corresponding to the last axis ('per-axis'). Defaults to 'per-tensor'. momentum (float): the momentum for the moving average. Defaults to 0.9. """ def __init__(self, axis="per-tensor", momentum=0.9, **kwargs): super().__init__(**kwargs) if not (isinstance(axis, str) and axis in ["per-tensor", "per-axis"]): raise ValueError(f"Only support reduction 'per-tensor' or 'per-axis'. Given {axis}.") self.axis = axis self.momentum = momentum self._decay = tf.convert_to_tensor(1.0 - momentum, name="decay") def build(self, input_shape): """Build the layer. Args: input_shape (list): the shape of input tensor. """ super().build(input_shape) # Convert axis to a list of int if self.axis == "per-axis": ndims = len(input_shape) if ndims < 3: raise ValueError("OutputObserver cannot quantize per-axis tensors " " with 2 dimensions or less.") self._axis = list(range(len(input_shape) - 1)) else: self._axis = None # Declares the constant/vector that will store the maximum values of the input. self.range_max = self.add_weight( name="range_max", shape=input_shape[-1] if self._axis is not None else (), dtype=tf.float32, initializer="ones", synchronization=tf.VariableSynchronization.ON_READ, trainable=False, aggregation=tf.VariableAggregation.MEAN, experimental_autocast=False, ) def call(self, inputs): """ Observe inputs and update the maximum value with momentum. Args: inputs (tf.Tensor): the inputs tensor. Returns: tf.Tensor: unchanged inputs """ # Compute the new range_max from inputs range_max = tf.math.reduce_max(tf.math.abs(inputs), self._axis) # If range_max was never updated set their newly computed values otherwise update with # moving average algorithm if tf.reduce_all(tf.math.equal(self.range_max, tf.constant(1.))): new_range_max = range_max else: # The new value is just the multiplication by decay old_value = self.range_max update_delta = (old_value - tf.cast(range_max, old_value.dtype)) * self._decay new_range_max = old_value - update_delta self.range_max.assign(new_range_max) return inputs def get_config(self): """Get the config of the layer. Returns: dict: the config of the layer. """ config = super().get_config() config.update({"axis": self.axis, "momentum": self.momentum}) return config