Source code for cnn2snn.calibration.bias_correction

# ******************************************************************************
# Copyright 2020 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""Implementation of the Bias Correction algorithm from:
   Data-Free Quantization Through Weight Equalization and Bias Correction
   Markus Nagel, Mart van Baalen, Tijmen Blankevoort, Max Welling
   https://arxiv.org/abs/1906.04721
"""
import keras
import numpy as np
import tensorflow as tf

from .calibration import QuantizationSampler, is_quantized_neural


def get_bias_index(layer):
    """Get the index of the bias weights

    Args:
        layer (:obj:`keras.Layer`): a Keras neural layer.

    Returns:
        int: the index of the bias in the weights list
    """
    if isinstance(layer, keras.layers.SeparableConv2D):
        return 2
    if isinstance(layer, (keras.layers.Dense, keras.layers.Conv2D)):
        return 1
    raise ValueError("{} is not a neural layer".format(layer.name))


def clone_model_with_biases(model):
    """Clones the model and add zero biases if needed

    The cloned model is identical to the input model, except that zero biases
    are added to neural layers that did not use them.

    Args:
        model (:obj:`keras.Model`): a Sequential Keras model.

    Returns:
        :obj:`keras.Model`: a Sequential Keras model.
    """

    assert isinstance(model, keras.Sequential)

    clone_model = keras.Sequential()
    clone_model.add(keras.layers.Input(model.input_shape[1:]))
    for layer in model.layers:
        config = layer.get_config()
        if is_quantized_neural(layer) and not config['use_bias']:
            # Modify configuration to use a bias
            config['use_bias'] = True
            # Create a cloned layer
            clone_layer = layer.__class__.from_config(config)
            clone_model.add(clone_layer)
            # Get original weights
            w_list = layer.get_weights()
            # Insert zero bias
            bias_index = get_bias_index(layer)
            bias_shape = clone_layer.get_weights()[bias_index].shape
            w_list.insert(bias_index, np.zeros(bias_shape, dtype=np.float32))
            # Update cloned layer weights
            clone_layer.set_weights(w_list)
        else:
            # Simply clone the layer
            clone_layer = layer.__class__.from_config(config)
            clone_model.add(clone_layer)
            clone_layer.set_weights(layer.get_weights())
    return clone_model


[docs]def bias_correction(model, samples, batch_size=None): """Apply a corrective bias to quantized layers. This implements the Bias Correction algorithm described in: Data-Free Quantization Through Weight Equalization and Bias Correction Markus Nagel, Mart van Baalen, Tijmen Blankevoort, Max Welling https://arxiv.org/abs/1906.04721 It is empirically demonstrated in the original paper that the weight quantization can introduce a biased error in the activations that is quite significant for low bitwidth weights (i.e. lower than 8-bit). This algorithm simply estimates the quantization bias on a set of samples, and subtracts it from the layer bias variable. If the accuracy of the quantized model suffers a huge drop as compared to the original model, this simple correction can recover the largest part of the drop, but not all of it. When optimizing a model, nothing is required but a set of samples for calibration (typically from the training dataset). Depending on the model and dataset, your mileage may vary, but it has been observed empirically that there is no significant difference between the models fixed with a very few samples (16) and those fixed with a higher number of samples (1024). Args: model (:obj:`keras.Model`): a quantized Keras Model samples (:obj:`np.ndarray`): a set of samples used for calibration batch_size (int): the batch size used when evaluating samples Returns: keras.Model: a quantized Keras model whose biases have been corrected """ # Clone the model, adding biases to layers that don't use them bc_model = clone_model_with_biases(model) m = keras.metrics.MeanSquaredError() sampler = QuantizationSampler(bc_model, samples, batch_size) # Adjust bias of each layer iteratively for layer in bc_model.layers: if is_quantized_neural(layer): print("Adjusting bias for {}".format(layer.name)) sampler.select_layer(layer, include_activation=False) # Evaluate quantization error err_before = sampler.quantization_error(m) # Iterate over sample batches to evaluate raw error q_bias = 0 n_batches = sampler.n_batches float_outputs = [] for i in range(n_batches): # Evaluate the layer outputs outputs = sampler.quantized_outputs(i) # Evaluate and store the layer float outputs float_outputs.append(sampler.float_outputs(i)) # Evaluate quantization error error = float_outputs[i] - outputs axis = tf.range(tf.rank(error) - 1) # Add the contribution of this batch to the corrective bias q_bias += tf.math.reduce_mean(error, axis=axis) / n_batches # Adjust quantized layer bias w_list = layer.get_weights() w_list[get_bias_index(layer)] += q_bias layer.set_weights(w_list) # Evaluate quantization error compared to previous float outputs m.reset_state() for i in range(n_batches): # Evaluate the layer outputs outputs = sampler.quantized_outputs(i) m.update_state(float_outputs[i], outputs) err_after = m.result().numpy() print(f"quantization error: {err_before:.4f} -> {err_after:.4f}") return bc_model