Source code for quantizeml.analysis.quantization_error_api

#!/usr/bin/env python
# ******************************************************************************
# Copyright 2024 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
__all__ = ["measure_layer_quantization_error", "measure_cumulative_quantization_error"]

import keras
import onnx

from .quantization_error import (keras_layer_quantization_error, onnx_node_quantization_error,
                                 keras_cumulative_quantization_error,
                                 onnx_cumulative_quantization_error)


[docs] def measure_layer_quantization_error(fmodel, qmodel, target_layer=None, batch_size=16, seed=None): """Measures the layer quantization error Returns a dictionary where the keys are the name of each layer and the values are a dictionary composed of the set of the following metrics: * Symmetrical Mean Absolute Percentage Error (SMAPE): :func:`tools.metrics.SMAPE` * Saturation: Percentage of how many values in the quantized layer saturate Example: >>> summary = measure_layer_quantization_error(fmodel, qmodel) >>> assert isinstance(summary[a_layer_name], dict) >>> assert "SMAPE" in summary[a_layer_name] Args: fmodel (onnx.ModelProto or tf.keras.Model): the float model. qmodel (onnx.ModelProto or tf.keras.Model): the quantized version of `fmodel`. target_layer (str, optional): computation error is performed only in the target layer/node, expanding the analysis to each output channel. Defaults to None. batch_size (int, optional): the batch size of the samples to be generated. It allows a better metrics generalization, but consumes more resources. Defaults to 16. seed (int, optional): a random seed. Defaults to None. Returns: dict: the quantization error for each layer Notes: * Layers/Nodes that do not produce quantization errors will not be taken into account (e.g. QuantizedReshape). """ keras_model_types = (keras.Sequential, keras.Model) # Check both models have the same type if isinstance(fmodel, onnx.ModelProto) and isinstance(qmodel, onnx.ModelProto): summary = onnx_node_quantization_error(fmodel, qmodel, target_node=target_layer, batch_size=batch_size, seed=seed) elif isinstance(fmodel, keras_model_types) and isinstance(qmodel, keras_model_types): summary = keras_layer_quantization_error(fmodel, qmodel, target_layer=target_layer, batch_size=batch_size, seed=seed) else: model_types = (onnx.ModelProto, *keras_model_types) raise TypeError(f"Both models should be the same type, one of {model_types}. " f"Received: {type(fmodel)} and {type(fmodel)}.") return summary
[docs] def measure_cumulative_quantization_error(fmodel, qmodel, target_layer=None, batch_size=16, seed=None): """Measures the cumulative quantization error Returns a dictionary where the keys are the name of each layer and the values are a dictionary composed of the set of the following metrics: * Symmetrical Mean Absolute Percentage Error (SMAPE): :func:`tools.metrics.SMAPE` * Saturation: Percentage of how many values in the quantized layer saturate Each metric measures the quantization error from the input to the layer. Example: >>> summary = measure_cumulative_quantization_error(fmodel, qmodel) >>> assert isinstance(summary[a_layer_name], dict) >>> assert "SMAPE" in summary[a_layer_name] Args: fmodel (onnx.ModelProto or tf.keras.Model): the float model. qmodel (onnx.ModelProto or tf.keras.Model): the quantized version of `fmodel`. target_layer (str, optional): error computation is performed only in the target layer/node, expanding the analysis to each output channel. Defaults to None. batch_size (int, optional): the batch size of the samples to be generated. It allows a better metrics generalization, but consumes more resources. Defaults to 16. seed (int, optional): a random seed. Defaults to None. Returns: dict: the quantization error for each layer Notes: * Layers/Nodes that do not produce quantization errors will not be taken into account (e.g. QuantizedReshape). """ keras_model_types = (keras.Sequential, keras.Model) # Check both models have the same type if isinstance(fmodel, onnx.ModelProto) and isinstance(qmodel, onnx.ModelProto): summary = onnx_cumulative_quantization_error(fmodel, qmodel, target_node=target_layer, batch_size=batch_size, seed=seed) elif isinstance(fmodel, keras_model_types) and isinstance(qmodel, keras_model_types): summary = keras_cumulative_quantization_error(fmodel, qmodel, target_layer=target_layer, batch_size=batch_size, seed=seed) else: model_types = (onnx.ModelProto, *keras_model_types) raise TypeError(f"Both models should be the same type, one of {model_types}. " f"Received: {type(fmodel)} and {type(fmodel)}.") return summary