Source code for akida_models.imagenet.preprocessing

# ******************************************************************************
# Copyright 2022 Brainchip Holdings Ltd.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
Preprocessing tools for ImageNet dataset.

__all__ = ["RandomColorJitter", "ThreeAugment", "preprocess_image", "index_to_label"]

import numpy as np
import keras
import tensorflow as tf
from scipy.ndimage import gaussian_filter

from .imagenet_labels2names import imagenet_labels

class RandomColorJitter(keras.layers.Layer):
    """RandomColorJitter class.

    Randomly adds color jitter to an image. Color jitter means to add random brightness, contrast,
    saturation, and hue to an image. There is a 80% chance that an image will be randomly
    color-jittered. Taken on

        proba(float, optional): Probability of applying the color jitter. Defaults to 0.8.

    def __init__(self, *args, proba=0.8, **kwargs):
        super().__init__(*args, **kwargs)
        self.proba = proba

    def call(self, image):
        def _color_jitter(image):
            image = tf.image.random_brightness(image, 0.8)
            image = tf.image.random_contrast(image, 0.4, 1.6)
            image = tf.image.random_saturation(image, 0.4, 1.6)
            image = tf.image.random_hue(image, 0.2)
            return image
        return tf.cond(
            tf.random.uniform([]) < self.proba, lambda: _color_jitter(image), lambda: image)

class ThreeAugment(keras.layers.Layer):
    """Define a simple data augmentation pipeline of three augmentations, following the explaining
    in the paper: For that, this augmentation choses one of:
        - GrayScale: This favors color invariance and give more focus on shapes.
        - Solarization: This adds strong noise on the colour to be more robust to the variation
          of colour intensity and so focus more on shape
        - Gaussian Blur: In order to slightly alter details in the image.
    def call(self, image):
        def _to_gray():
            return tf.image.grayscale_to_rgb(tf.image.rgb_to_grayscale(image))

        def _solarize():
            # Taken of:
            return tf.where(image < 10, image, 255 - image)

        def _gaussian_blur():
            # Taken of:
            s = np.random.random()
            return gaussian_filter(input=image, sigma=s)

        proba = tf.random.uniform([])
        cases = [(proba < 1 / 3, _to_gray), (proba < 2 / 3, _solarize)]
        return, default=_gaussian_blur, name='3-Augment', exclusive=False)

DATA_AUGMENTATION = keras.Sequential([RandomColorJitter(), ThreeAugment()])

[docs]@tf.function def preprocess_image(image, image_size, training=False, data_aug=None): """ ImageNet data preprocessing. Preprocessing includes cropping, and resizing for both training and validation images. Training preprocessing introduces some random distortion of the image to improve accuracy. Args: image (tf.Tensor): input image as a 3-D tensor image_size (tuple): desired image size training (bool, optional): True for training preprocessing, False for validation and inference. Defaults to False. data_aug (keras.Sequential, optional): data augmentation. Defaults to None. Returns: :obj:`tensorflow.Tensor`: preprocessed image """ assert len(image_size) == 2, f"image_size should have 2 elements (H, W), received {image_size}." shape = tf.shape(image) if training: # For training: crop, flip and resize bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box( shape, tf.zeros([0, 0, 4], tf.float32), # force using whole image use_image_if_no_bounding_boxes=True, min_object_covered=0.1, aspect_ratio_range=[0.75, 1.33], area_range=[0.05, 1.0], max_attempts=100) image = tf.slice(image, bbox_begin, bbox_size) image = tf.image.resize(image, image_size) # Make all data augmentation after resize to decrease computational cost image = tf.image.random_flip_left_right(image) if data_aug is not None: image = data_aug(image) else: # For validation/inference: aspect preserving resize and central crop height = tf.cast(shape[0], tf.float32) width = tf.cast(shape[1], tf.float32) # Scale image before cropping, keeping aspect ratio resize_min_h = np.round(image_size[0] * 1.143).astype(np.float32) resize_min_w = np.round(image_size[1] * 1.143).astype(np.float32) min_dim = tf.minimum(height, width) scale_ratio = (resize_min_h / min_dim, resize_min_w / min_dim) # Convert back to int for TF ops new_height = tf.cast(height * scale_ratio[0], tf.int32) new_width = tf.cast(width * scale_ratio[1], tf.int32) image = tf.image.resize(image, [new_height, new_width]) # Second: central crop to desired image_size image = tf.image.resize_with_crop_or_pad(image, image_size[0], image_size[1]) return tf.cast(image, tf.float32)
[docs]def index_to_label(index): """ Function to get an ImageNet label from an index. Args: index: between 0 and 999 Returns: str: a string of comma separated labels """ return imagenet_labels[index]