Source code for cnn2snn.quantization_ops

#!/usr/bin/env python
# ******************************************************************************
# Copyright 2020 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""Weight quantizers API"""

# -*- coding: utf-8 -*-
from __future__ import absolute_import
import keras.backend as K
import tensorflow as tf
from keras.layers import Layer


[docs]class WeightQuantizer(Layer): """The base class for all weight quantizers. This base class must be overloaded as well as the two functions `quantize` and `scale_factor`. Quantizers derived from this class must be symmetric uniform mid-tread quantizers, in order to be compatible with the conversion into an Akida model. Quantization is usually done in two steps: #. The weights must be first quantized on integer values in the range imposed by the bitwidth, e.g. from -7 to 7 for a 4-bit quantization. #. These integer weights are then reconstructed to float discretized values, in the range of the original weights. For example, 4-bit integer weights are reconstructed on a grid from -7*qstep to 7*qstep, where qstep is the quantization step size between two levels of the uniform grid. For a full explanation about mid-tread uniform quantization, one can take a look at `the Wikipedia page <https://en.wikipedia.org/wiki/Quantization_(signal_processing)#Example>`_. The `quantize` function takes as inputs the original weights and must return the reconstructed float values after quantization. The `scale_factor` function must return the factor used to transform the float reconstructed weights into the integer values obtained after step 1. In other words, given a set of float weights "w": quantize(w) * scale_factor(w) is a set of integer weights. The bitwidth defines the number of quantization levels on which the weights will be quantized. For instance, a 4-bit quantization gives integer values between -7 and 7. More generally, for a n-bit quantization, values are ranged from -kmax to kmax where kmax is (2^(n-1) - 1). Args: bitwidth (int): the quantization bitwidth. """ def __init__(self, bitwidth, **kwargs): if bitwidth <= 1: raise ValueError( f"The quantization bitwidth should be higher than 1.\ Receives bitwidth {bitwidth}") self.bitwidth_ = int(bitwidth) self.kmax_ = (2.**(bitwidth - 1) - 1) super().__init__(**kwargs)
[docs] def quantize(self, w): """Quantizes the specified weights Tensor. This function must return a tf.Tensor containing float weights discretized on a uniform grid based on the scale factor "sf". In other words, the discretized weights must be values among: -kmax/sf, ..., -2/sf, -1/sf, 0, 1/sf, 2/sf, ..., kmax/sf Args: w (:obj:`tensorflow.Tensor`): the weights Tensor to quantize. Returns: :obj:`tensorflow.Tensor`: a Tensor of quantized weights. """ raise NotImplementedError()
[docs] def scale_factor(self, w): """Evaluates the scale factor for the specified weights tf.Tensor. This function returns the scale factor to get the quantized integer weights from the reconstructed float weights. It is equal to the inverse of the quantization step size. The scale factor can be a scalar that is applied on the whole tensor of weights. It can also be a vector of length the number of filters, where each value applies to the weights of the corresponding output filter. This is called a per-axis quantization, as opposed to a per-tensor quantization. The number of filters is usually the last dimension of the weights tensor. More details are given `here <https://www.tensorflow.org/lite/performance/quantization_spec?hl=sv#per-axis_vs_per-tensor>`__ Note that the `quantizer_dw` of a depthwise convolution in a QuantizedSeparableConv2D layer must imperatively return a scalar scale factor. Args: w (:obj:`tensorflow.Tensor`): the weights Tensor to quantize. Returns: :obj:`tensorflow.Tensor`: a Tensor containing a list of scalar values (1 or more). """ raise NotImplementedError()
@property def bitwidth(self): """Returns the bitwidth of the quantizer""" return self.bitwidth_
[docs] def get_config(self): config = super().get_config() config.update({'bitwidth': self.bitwidth_}) return config
[docs]class LinearWeightQuantizer(WeightQuantizer): """An abstract linear weight quantizer This abstract class proposes a linear symmetric and uniform quantization function. The "linear" term here means that there is no non-linear transformation of the weights before the uniform quantization. The `scale_factor` function must be overloaded. """
[docs] def scale_factor(self, w): raise NotImplementedError()
[docs] def quantize(self, w): """Linearly quantizes the input weights on a symmetric uniform grid based on the scale factor. The input weights are directly rounded to the closest discretized value, without any transformation on the input weights. The gradient is estimated using the Straight-Through Estimator (STE), i.e. the gradient is computed as if there were no quantization. """ sf = self.scale_factor(w) return tf.clip_by_value(round_through(w * sf), -self.kmax_, self.kmax_) / sf
[docs]class StdWeightQuantizer(LinearWeightQuantizer): """A uniform quantizer based on weights standard deviation. Quantizes the specified weights into 2^bitwidth-1 values centered on zero. E.g. with bitwidth = 4, 15 quantization levels: from -7 * qstep to 7 * qstep with qstep being the quantization step. The quantization step is defined by: qstep = threshold * std(W) / max_value with max_value being 2^(bitwidth-1) - 1. E.g with bitwidth = 4, max_value = 7. All values below or above threshold * std(W) are automatically assigned to the min (resp max) value. Args: threshold (int): the standard deviation multiplier used to exclude outliers. bitwidth (int): the quantizer bitwidth defining the number of quantized values. """ def __init__(self, threshold=3, bitwidth=4, **kwargs): # Having a cast guarantees a check when the parameters are not numbers # (e.g.: None) self.threshold_ = float(threshold) # Initialize parent to store the bitwidth super().__init__(bitwidth, **kwargs)
[docs] @staticmethod def sigma_(w): """Returns the standard deviation(s) of a set of weights""" return tf.math.reduce_std(w)
@property def threshold(self): """Returns the threshold of the std quantizer""" return self.threshold_
[docs] def scale_factor(self, w): sigma_scaled = self.sigma_(w) * self.threshold_ return clip_scale_factor(self.kmax_ / sigma_scaled)
[docs] def get_config(self): config = super().get_config() config.update({'threshold': self.threshold_}) return config
[docs]class StdPerAxisQuantizer(StdWeightQuantizer): """A quantizer that relies on weights standard deviation per axis. Quantizes the specified weights into 2^bitwidth-1 values centered on zero. E.g. with bitwidth = 4, 15 quantization levels: from -7 * qstep to 7 * qstep with qstep being the quantization step. The quantization step is defined by: qstep = max_range / max_value with: - max_range = max(abs(W)) - max_value = 2^(bitwidth-1) - 1. E.g with bitwidth = 4, max_value = 7. This is an evolution of the StdWeightQuantizer that defines the weights range per axis. The last dimension is used as axis, meaning that the scaling factor is a vector with as many values as "filters", or "neurons". Note: for a DepthwiseConv2D layer that has a single filter, this quantizer is strictly equivalent to the StdWeightQuantizer. """
[docs] def sigma_(self, w): red_range = tf.range(tf.rank(w) - 1) return tf.math.reduce_std(w, red_range)
[docs]class MaxQuantizer(LinearWeightQuantizer): """A quantizer that relies on maximum range. Quantizes the specified weights into 2^bitwidth-1 values centered on zero. E.g. with bitwidth = 4, 15 quantization levels: from -7 * qstep to 7 * qstep with qstep being the quantization step. The quantization step is defined by: qstep = max_range / max_value with: - max_range = max(abs(W)) - max_value = 2^(bitwidth-1) - 1. E.g with bitwidth = 4, max_value = 7. Args: bitwidth (int): the quantizer bitwidth defining the number of quantized values. """ def __init__(self, bitwidth=4, **kwargs): # Initialize parent to store the bitwidth super().__init__(bitwidth, **kwargs)
[docs] @staticmethod def max_range_(w): """Get the range on which the weights are quantized. This quantizer discretizes weights in the range: [-max(weights) ; max(weights)] """ return tf.math.reduce_max(tf.math.abs(w))
[docs] def scale_factor(self, w): return clip_scale_factor(self.kmax_ / self.max_range_(w))
[docs]class MaxPerAxisQuantizer(MaxQuantizer): """A quantizer that relies on maximum range per axis. Quantizes the specified weights into 2^bitwidth-1 values centered on zero. E.g. with bitwidth = 4, 15 quantization levels: from -7 * qstep to 7 * qstep with qstep being the quantization step. The quantization step is defined by: qstep = max_range / max_value with: - max_range = max(abs(W)) - max_value = 2^(bitwidth-1) - 1. E.g with bitwidth = 4, max_value = 7. This is an evolution of the MaxQuantizer that defines the max_range per axis. The last dimension is used as axis, meaning that the scaling factor is a vector with as many values as "filters", or "neurons". Note: for a DepthwiseConv2D layer that has a single filter, this quantizer is strictly equivalent to the MaxQuantizer. """
[docs] def max_range_(self, w): red_range = tf.range(tf.rank(w) - 1) low_limit_range = 1e-10 return tf.clip_by_value(tf.math.reduce_max(tf.math.abs(w), red_range), low_limit_range, tf.float32.max)
def get(identifier): """Returns the weight quantizer corresponding to the identifier. The 'identifier' input can take two types: either a weight quantizer instance, or a dictionary corresponding to the config serialization of a weight quantizer. Args: identifier (WeightQuantizer or dict): either a WeightQuantizer instance or a configuration dictionary to deserialize. Returns: :obj:`cnn2snn.WeightQuantizer`: a weight quantizer """ if isinstance(identifier, WeightQuantizer): return identifier if isinstance(identifier, dict): return tf.keras.utils.deserialize_keras_object( identifier, custom_objects={ 'StdWeightQuantizer': StdWeightQuantizer, 'StdPerAxisQuantizer': StdPerAxisQuantizer, 'MaxQuantizer': MaxQuantizer, 'MaxPerAxisQuantizer': MaxPerAxisQuantizer }) raise ValueError(f"Could not interpret identifier {identifier} " "for a weight quantizer object") def round_through(x): """Element-wise rounding to the closest integer with full gradient propagation. A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182). """ rounded = K.round(x) return x + K.stop_gradient(rounded - x) def ceil_through(x): """Element-wise ceiling operation (to the closest greater integer) with full gradient propagation. """ ceiling_value = tf.math.ceil(x) return x + K.stop_gradient(ceiling_value - x) def clip_scale_factor(scale_factor): """ Clips scale factor(s) to 1000, in order to avoid very large scale factors and thus very large Akida thresholds after conversion. """ return tf.clip_by_value(scale_factor, 0, 1e3)