data_preprocessing.py

# Copyright (C) 2020 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html

import numpy as np
import os
import tensorflow as tf
import tensorflow_addons as tfa


def _get_label(file_path, one_hot, num_classes, class_names):
    """Given a path to label file. Expect label file to be in the format
    generated by 'source/generate_source_file.py'.

    Args:
        file_path: Str, path to label file.
        one_hot: Bool, if true, return labels in form of one-hot encoding.
        num_classes: Int, number of classes in total.
        class_names: List of Str, list of all class names.

    Returns:
        label (if using one-hot encoding): tf.Tensor, one hot encoding of
            label. For example '[0,0,1,0,0]'.
        OR
        idx (if not using one-hot encoding): tf.Tensor, label index. For
            example '2'.
    """
    # Convert path to file name
    file_name = tf.strings.split(file_path, os.path.sep)[-1]
    # Derive label from file name
    class_name = tf.strings.split(file_name, '_')[0]
    idx = tf.reduce_min(tf.where(tf.equal(class_names, class_name)))
    if one_hot:
        label = tf.one_hot(idx, num_classes)
        return label
    else:
        return idx


def _decode_img(img):
    """Decode image and convert to tf.Tensor.

    Args:
        img: tf.Tensor, image of type String.

    Returns:
        img: tf.Tensor, image of type float32.
    """
    # Convert compressed string to a 3D uint8 tensor
    img = tf.io.decode_png(img)
    # Convert data type to float between 0 and 1
    return tf.image.convert_image_dtype(img, tf.float32)


def process_path(file_path, one_hot, num_classes, class_names):
    """Process file path to produce image and label tensor.

    Args:
        file_path: Str, path to image file.
        one_hot: Bool, if true, return labels in form of one-hot encoding.
        num_classes: Int, number of classes in total.
        class_names: List of Str, list of all class names.

    Returns:
        img: tf.Tensor, image tensor in type tf.float32.
        label: tf.Tensor, label tensor represented by one-hot encoding.
    """
    # Get label and image Tensor
    label = _get_label(file_path, one_hot, num_classes, class_names)
    img = tf.io.read_file(file_path)
    img = _decode_img(img)
    return img, label


def process_img_path(file_path):
    """Similar to process_path. Do not require label path! Uses file
    name as label.

    Args:
        file_path: Str, path to image file.

    Returns:
        img: tf.Tensor, image tensor in type tf.float32.
        label: tf.Tensor, label tensor represented by file name.
    """
    label = tf.strings.split(file_path, os.path.sep)[-1]
    img = tf.io.read_file(file_path)
    img = _decode_img(img)
    return img, label


def convert_format(img, label, grayscale_in, grayscale_out):
    """Convert image format from RGB to grayscale or grayscale to RGB.

    Args:
        img: tf.Tensor: image tensor in type tf.float32.
        label: tf.Tensor: label tensor.

    Returns:
        img: tf.Tensor, image tensor in type tf.float32.
        label: tf.Tensor, label tensor.
        grayscale_in: Bool, format of the input image. If true, handled as
            grayscale image. If false, handled as RGB image.
        grayscale_out: bool, format of the output image. If true, train model
            with grayscale image. If false, train with RGB image.
    """
    # Convert between graycale and rgb
    if grayscale_in and not grayscale_out:
        # If input is grayscale and output is rgb
        img = tf.image.grayscale_to_rgb(img)  # use tensorflow function
    elif not grayscale_in and grayscale_out:
        # If input is rgb and output is grayscale
        # img = tf.reduce_mean(img, axis=2)
        img = tf.image.rgb_to_grayscale(img)
    return img, label


def random_rotate(img, label, stddev):
    """Rotate image by x degree. Variable x follows normal distribution
    with 'stddev' as standard deviation. Truncate x to be only within plus
    minus 2 standard deviations (re-sample if not within range). Round x
    according to bankers rounding.

    Args:
        img: tf.Tensor, image tensor.
        label: tf.Tensor: label tensor.
        stddev: Float, standard deviation of the normal distribution.

    Returns:
        img: tf.Tensor, image after rotation.
    """
    # Follows normal distribution whose magnitude is more than 2
    # standard deviations from the mean are dropped and re-picked.
    degree = tf.random.truncated_normal(shape=[], stddev=stddev)
    # Rounds half to even. Also known as bankers rounding.
    degree = tf.math.round(degree)

    return tfa.image.rotate(img, degree), label


def random_zoom(img, label, max_percent, stddev, img_height, img_width):
    """Crop to zoom in on the image by x percent. Largest value of x is
    restricted by 'max_percent'. x follows normal distribution with 'stddev'
    as standard deviation. Truncate x to be only within plus minus 2
    standard deviations (re-sample if not within range). Round x according
    to bankers rounding and get absolute value.

    Args:
        img: tf.Tensor, image tensor.
        label: tf.Tensor: label tensor.
        max_percent: Float, the maximum percent to zoom in. For example, if
            max_percent = 4.0, at most, zoom in at 96%.
        stddev: Float, standard deviation of the normal distribution.
        img_height: Int, height of the original image tensor.
        img_width: Int, width of the original image tensor.
    """
    # Generate n crop settings, ranging from a 0% to n% crop.
    scales = list(np.arange((100 - max_percent) / 100, 1.0, 0.01))
    # Reverse crop settings to make sure most of the image are unchanged.
    scales.reverse()
    boxes = np.zeros((len(scales), 4))
    for i, scale in enumerate(scales):
        x1 = y1 = 0.5 - (0.5 * scale)
        x2 = y2 = 0.5 + (0.5 * scale)
        boxes[i] = [x1, y1, x2, y2]
    # Get n cropped images
    crops = tf.image.crop_and_resize([img], boxes=boxes,
                                     box_indices=np.zeros(len(scales)),
                                     crop_size=(img_height, img_width))
    # I am personally shamed of this implementation here
    # TODO: Change distribution here
    # TODO: Add fault proof
    idx = tf.random.truncated_normal(shape=[], stddev=stddev)
    idx = tf.math.abs(idx)  # idx >= 0
    idx = tf.math.round(idx)  # Bankers rounding
    idx = tf.cast(idx, tf.dtypes.int32)
    img = crops[idx]

    return img, label


def augment(img, label, random_rotate, rotate_stddev, random_zoom,
            zoom_percent, zoom_stddev, height, width):
    """Data augmentation. Two augmentation method are carried out:
        1. Random rotate: randomly rotate image by x degree. x follows
            normal distribution with 0 as mean as described in config file.
        2. Random zoom: crop (zoom in) on the image by x percent. x follows
            normal distribution with 0 as mean as described in config file.

    Args:
        img: tf.Tensor, image tensor in type tf.float32.
        label: tf.Tensor, label tensor.
        random_rotate: Bool, if true, randomly rotate image. Image dimension
            won't change.
        rotate_stddev: Float, standard deviation of rotation angle in degree,
            which follows normal distribution.
        random_zoom: Bool, if true, randomly zoom in on the image. Image
            dimension won't change.
        zoom_percent: Float, the maximum percentage to zoom in on the image.
        zoom_stddev: Float, standard deviation of zoom-in percentage, which
            follows half-normal distribution.
        height: Int, height of the image.
        width: Int, width of the image.


    Returns:
        img: tf.Tensor: image tensor in type tf.float32.
        label: tf.Tensor: label tensor.
    """
    # Randomly rotate by -2 to +2 standard deviations.
    if random_rotate:
        img = random_rotate(img, label, rotate_stddev)

    # Randomly zoom in on image by maximum zoom_percent.
    if random_zoom:
        img = random_zoom(img, label, zoom_percent, zoom_stddev, height, width)

    return img, label


def resize(img, label, height, width):
    """Resize image for compatibility with Keras model.
    TODO: Add custom CNN models to avoid resizing

    Args:
        img: tf.Tensor: image tensor in type tf.float32.
        label: tf.Tensor: label tensor.
        height: Int, intended output height of the image.
        width: Int, intended output width of the image.

    Returns:
        img: tf.Tensor: image tensor in type tf.float32.
        label: tf.Tensor: label tensor.
    """

    return tf.image.resize(img, (height, width)), label