Source code for quantizeml.layers.batch_normalization

#!/usr/bin/env python
# ******************************************************************************
# Copyright 2023 Brainchip Holdings Ltd.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************

__all__ = ["QuantizedBatchNormalization"]

import tensorflow as tf
import keras

from .layers_base import (register_quantize_target, rescale_outputs,
                          tensor_inputs, apply_buffer_bitwidth, init_quant_config)
from .quantizers import WeightQuantizer, AlignedWeightQuantizer, OutputQuantizer
from ..tensors import QTensor

[docs]@register_quantize_target(keras.layers.BatchNormalization) @tf.keras.utils.register_keras_serializable() class QuantizedBatchNormalization(keras.layers.Layer): r"""Layer that normalizes its inputs, on the last axis. The normalization is applied like this: .. math:: y = \\frac{(x - \\mu) \\cdot \\gamma}{\\sigma} + \\beta \\ = \\frac{x \\cdot \\gamma}{\\sigma} - \\ \\frac{\\mu\\cdot \\gamma}{\\gamma} + \\beta if we consider: .. math:: a = \\frac{\\gamma}{\\sigma} and .. math:: b = -\\frac{\\mu\\cdot \\gamma}{\\sigma} + \\beta The normalization can be re-written as: .. math:: y = a \\cdot x + b Note that this layer will hold variables with names gamma, beta, moving_mean (:math:`\\mu`), and moving_variance (:math:`\\sigma = \\sqrt{moving\_variance + \\epsilon}`), so they can be converted from a BatchNormalization layer. However, it's a and b that are going to be quantized. Args: quant_config (dict, optional): the serialized quantization configuration. Defaults to None. axis (int, optional): The axis that was normalized on the BatchNormalization layer. The only supported value is the last dimension. epsilon (float, optional): Small value to avoid dividing by zero. Defaults to 1e-3. """ ignored_args = ["momentum", "center", "scale", "beta_initializer", "gamma_initializer", "moving_mean_initializer", "moving_variance_initializer", "beta_regularizer", "gamma_regularizer", "beta_constraint", "gamma_constraint", "renorm", "renorm_clipping", "renorm_momentum", "fused", "trainable", "virtual_batch_size", "adjustment" ] def __init__(self, *args, quant_config=None, axis=-1, epsilon=1e-3, **kwargs): super().__init__(*args, **kwargs) self.quant_config = init_quant_config(quant_config) out_quant_cfg = self.quant_config.get("output_quantizer", False) if out_quant_cfg: self.out_quantizer = OutputQuantizer( name="output_quantizer", **out_quant_cfg) else: self.out_quantizer = None if "a_quantizer" not in self.quant_config: self.quant_config["a_quantizer"] = {"bitwidth": 8} a_quantizer_cfg = self.quant_config["a_quantizer"] self.a_quantizer = WeightQuantizer(name="a_quantizer", **a_quantizer_cfg) b_quantizer_cfg = self.quant_config.get("b_quantizer", {}) self.b_quantizer = AlignedWeightQuantizer(name="b_quantizer", **b_quantizer_cfg) self.buffer_bitwidth = apply_buffer_bitwidth(self.quant_config, signed=True) # Define a small float number to avoid dividing by zero. self.epsilon = epsilon # Axis on which operation is applied self.axis = axis def build(self, input_shape): input_shape = tf.TensorShape(input_shape) rank = input_shape.rank if rank not in (3, 4): raise ValueError( "QuantizedBatchNormalization only supports 3D or 4D tensors. " f"Received tensor with shape: {tuple(input_shape)}.") # Normalize axis self.axis = keras.utils.tf_utils.validate_axis(self.axis, input_shape) # Check selected axis is valid if len(self.axis) != 1 and (self.axis[0] != rank - 1): raise ValueError("QuantizedBatchNormalization only supports axis " "argument set to the last dimension.") # Shape for variables is always as if it was applied on the # last dimension. param_shape = input_shape[-1] # Add BN compatible weights # Gamma self.gamma = self.add_weight( name="gamma", shape=param_shape, dtype=tf.float32, initializer="ones", regularizer=None, constraint=None, trainable=True, experimental_autocast=False, ) # Beta self.beta = self.add_weight( name="beta", shape=param_shape, dtype=tf.float32, initializer="zeros", regularizer=None, constraint=None, trainable=True, experimental_autocast=False, ) # Mu = moving mean self.moving_mean = self.add_weight( name="moving_mean", shape=param_shape, dtype=tf.float32, initializer="zeros", regularizer=None, constraint=None, trainable=False, experimental_autocast=False, ) # Sigma² = moving variance self.moving_variance = self.add_weight( name="moving_variance", shape=param_shape, dtype=tf.float32, initializer="ones", regularizer=None, constraint=None, trainable=False, experimental_autocast=False, ) @property def sigma_rec(self): # Sigma reciprocal = 1 / sigma = 1 / sqrt(moving_variance + epsilon) sigma_rec = tf.math.rsqrt(self.moving_variance + self.epsilon) return sigma_rec @property def a(self): a_var = self.gamma * self.sigma_rec q_a = self.a_quantizer(a_var) return q_a def b(self, inputs): sigma_rec = self.sigma_rec b_var = -self.moving_mean * self.gamma * sigma_rec + self.beta q_b = self.b_quantizer(b_var, inputs) return q_b @tensor_inputs([QTensor]) @rescale_outputs def call(self, inputs): # Calculation is equivalent to # y = (x - mu) * gamma / sigma + beta # = x * gamma / sigma - mu * gamma / sigma + beta # # So if we consider # a = gamma / sigma # b = -mu * gamma / sigma + beta # Then the evaluation is just y = a * x + b. # outputs = a * x outputs = tf.multiply(inputs, self.a) # quantize and retrieve b, aligned on the outputs to allow sum b = self.b(outputs) # y = outputs + b return tf.add(outputs, b) def get_config(self): config = super().get_config() config.update({ "quant_config": self.quant_config, "epsilon": self.epsilon, "axis": self.axis, }) return config