Source code for quantizeml.layers.quantization_params

#!/usr/bin/env python
# ******************************************************************************
# Copyright 2023 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************

__all__ = ["QuantizationParams", "get_quantization_params", "quantization"]

import numpy as np
from contextlib import contextmanager


[docs] class QuantizationParams: """ Class that holds quantization parameters. This is a read-only data class. Args: activation_bits (int, optional): activations quantization bitwidth. Defaults to 8. per_tensor_activations (bool, optional): whether to quantize activation per-tensor or per-axis. Defaults to False. weight_bits (int, optional): weights quantization bitwidth. Defaults to 8. output_bits (int, optional): outputs quantization bitwidth. Defaults to 8. input_weight_bits (int, optional): weights quantization bitwidth for the first layer. Defaults to 8. input_dtype (np.dtype or str, optional): expected model input format. If given as a string, should follow numpy string type requirements. Defaults to 'uint8'. buffer_bits (int, optional): maximal buffer bitwidth allowed in operations. Defaults to 32. """ def __init__(self, activation_bits=8, per_tensor_activations=False, weight_bits=8, output_bits=8, input_weight_bits=8, input_dtype='uint8', buffer_bits=32): self._activation_bits = activation_bits self._per_tensor_activations = per_tensor_activations self._weight_bits = weight_bits self._output_bits = output_bits self._input_weight_bits = input_weight_bits self._buffer_bits = buffer_bits self._input_dtype = np.dtype(input_dtype) if not np.issubdtype(self._input_dtype, np.integer): raise ValueError(f"Unsupported {self._input_dtype} input dtype: " "it should be a integer subdtype.") @property def activation_bits(self): return self._activation_bits @property def per_tensor_activations(self): return self._per_tensor_activations @property def weight_bits(self): return self._weight_bits @property def output_bits(self): return self._output_bits @property def input_weight_bits(self): return self._input_weight_bits @property def input_dtype(self): return self._input_dtype @property def buffer_bits(self): return self._buffer_bits def __repr__(self) -> str: return ( f"QuantizationParams(" f"activation_bits={self.activation_bits}, " f"per_tensor_activations={self.per_tensor_activations}, " f"weight_bits={self.weight_bits}, " f"output_bits={self.output_bits}, " f"input_weight_bits={self.input_weight_bits}, " f"input_dtype={str(self.input_dtype)}, " f"buffer_bits={self.buffer_bits})" ) def __str__(self) -> str: return ( f"Activation bits: {self.activation_bits}, " f"Per tensor activations: {self.per_tensor_activations}, " f"Weight bits: {self.weight_bits}, " f"Output bits: {self.output_bits}, " f"Input weight bits: {self.input_weight_bits}, " f"Input dtype: {str(self.input_dtype)}, " f"Buffer bits: {self.buffer_bits}" )
_quantization = QuantizationParams()
[docs] def get_quantization_params(): """ Returns global quantization parameters. Returns: QuantizationParams: the quantization parameters """ return _quantization
[docs] @contextmanager def quantization(qparams): """ Sets quantization parameters in a context. Args: qparams (QuantizationParams): quantization parameters """ # Use of global parameters global _quantization previous_state = _quantization try: # Set provided values _quantization = qparams yield finally: # Restore previous state _quantization = previous_state