Source code for pydeep.base.corruptor

""" This module provides implementations for corrupting the training data. 

    :Implemented:
        - Identity
        - Sampling Binary
        - BinaryNoise
        - Additive Gauss Noise
        - Multiplicative Gauss Noise
        - Dropout
        - Random Permutation
        - KeepKWinner
        - KWinnerTakesAll

    :Info: 
        http://ufldl.stanford.edu/wiki/index.php/Sparse_Coding:_Autoencoder_Interpretation
   
    :Version:
        1.1.0

    :Date:
        13.03.2017

    :Author:
        Jan Melchior

    :Contact:
        JanMelchior@gmx.de

    :License:

        Copyright (C) 2017 Jan Melchior

        This file is part of the Python library PyDeep.

        PyDeep is free software: you can redistribute it and/or modify
        it under the terms of the GNU General Public License as published by
        the Free Software Foundation, either version 3 of the License, or
        (at your option) any later version.

        This program is distributed in the hope that it will be useful,
        but WITHOUT ANY WARRANTY; without even the implied warranty of
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        GNU General Public License for more details.

        You should have received a copy of the GNU General Public License
        along with this program.  If not, see <http://www.gnu.org/licenses/>.
            
"""
import numpy as numx


[docs]class Identity(object):
    """ Dummy corruptor object.
    """

[docs]    @classmethod
    def corrupt(cls, data):
        """ The function corrupts the data.

        :param data: Input of the layer.
        :type data: numpy array [num samples, layer dim]

        :return: Corrupted data.
        :rtype: numpy array [num samples, layer dim]
        """
        return data


[docs]class AdditiveGaussNoise(object):
    """ An object that corrupts data by adding Gauss noise.
    """

[docs]    def __init__(self, mean, std):
        """ The function corrupts the data.

        :param mean: Constant the data is shifted
        :type mean: float

        :param std: Standard deviation Added to the data.
        :type std: float
        """
        self.mean = mean
        self.std = std

[docs]    def corrupt(self, data):
        """ The function corrupts the data.

        :param data: Input of the layer.
        :type data: numpy array [num samples, layer dim]

        :return: Corrupted data.
        :rtype: numpy array [num samples, layer dim]
        """
        return data + self.mean + numx.random.standard_normal(data.shape) * self.std


[docs]class MultiGaussNoise(object):
    """ An object that corrupts data by multiplying Gauss noise.
    """

[docs]    def __init__(self, mean, std):
        """ Corruptor contructor.

        :param mean: Constant the data is shifted
        :type mean: float

        :param std: Standard deviation Added to the data.
        :type std: float
        """
        self.mean = mean
        self.std = std

[docs]    def corrupt(self, data):
        """ The function corrupts the data.

        :param data: Input of the layer.
        :type data: numpy array [num samples, layer dim]

        :return: Corrupted data.
        :rtype: numpy array [num samples, layer dim]
        """
        return data * (self.mean + numx.random.standard_normal(data.shape) * self.std)


[docs]class SamplingBinary(object):
    """ Sample binary states (zero out) corruption.
    """

[docs]    @classmethod
    def corrupt(cls, data):
        """ The function corrupts the data.

        :param data: Input of the layer.
        :type data: numpy array [num samples, layer dim]

        :return: Corrupted data.
        :rtype: numpy array [num samples, layer dim]
        """
        return data > numx.random.random(data.shape)

    
class BinaryNoise(object):
    """ Binary Noise.
    """

    def __init__(self, percentage):
        """ Corruptor contructor.

        :param percentage: Percent of random chosen pixel/states.
        :type percentage: float [0,1]

        :param std: Standard deviation Added to the data.
        """
        self.percentage = percentage

    def corrupt(self, data):
        """ The function corrupts the data.

        :param data: Input of the layer.
        :type data: numpy array [num samples, layer dim]

        :return: Corrupted data.
        :rtype: numpy array [num samples, layer dim]
        """
        return numx.abs(data - numx.random.binomial(1, self.percentage, data.shape))

    
[docs]class Dropout(object):
    """ Dropout (zero out) corruption.
    """

[docs]    def __init__(self, dropout_percentage=0.2):
        """ Corruptor contructor.

        :param dropout_percentage: Dropout percentage
        :type dropout_percentage: float
        """
        self.dropout_percentage = dropout_percentage

[docs]    def corrupt(self, data):
        """ The function corrupts the data.

        :param data: Input of the layer.
        :type data: numpy array [num samples, layer dim]

        :return: Corrupted data.
        :rtype: numpy array [num samples, layer dim]
        """
        return data * numx.random.binomial(1, 1.0 - self.dropout_percentage, data.shape) / (
            1.0 - self.dropout_percentage)


[docs]class RandomPermutation(object):
    """ RandomPermutation corruption, a fix number of units change their activation values.
    """

[docs]    def __init__(self, permutation_percentage=0.2):
        """ Corruptor contructor.

        :param permutation_percentage: permutation_percentage: Percentage of states to permute
        :type permutation_percentage: float
        """
        self.permutation_percentage = permutation_percentage

[docs]    def corrupt(self, data):
        """ The function corrupts the data.

        :param data: Input of the layer.
        :type data: numpy array [num samples, layer dim]

        :return: Corrupted data.
        :rtype: numpy array [num samples, layer dim]
        """
        result = numx.copy(data)
        num_switches = numx.int32(data.shape[1] * self.permutation_percentage * 0.5)
        for d in range(data.shape[0]):
            # Proof of concept
            # setA = numx.random.randint(0, pattern.shape[1], num_states_to_change / 2)
            # setB = numx.random.randint(0, pattern.shape[1], num_states_to_change / 2)
            # result[d][setA] = pattern[d][setB]
            # result[d][setB] = pattern[d][setA]
            tempset = numx.random.permutation(numx.arange(data.shape[1]))
            result[d][tempset[0:num_switches]] = data[d][tempset[num_switches:2 * num_switches]]
            result[d][tempset[num_switches:2 * num_switches]] = data[d][tempset[0:num_switches]]
        return result


[docs]class KeepKWinner(object):
    """ Implements K Winner stay. Keep the k max values and set the rest to 0.
    """

[docs]    def __init__(self, k=10, axis=0):
        """ Corruptor contructor.

        :param k: Keep the k max values and set the rest to 0.
        :type k: int

        :param axis: Axis =0 across min batch, axis = 1 across hidden units
        :type axis: int
        """
        self.k = k
        self.axis = axis

[docs]    def corrupt(self, data):
        """ The function corrupts the data.

        :param data: Input of the layer.
        :type data: numpy array [num samples, layer dim]

        :return: Corrupted data.
        :rtype: numpy array [num samples, layer dim]
        """
        data = data
        if self.axis == 0:
            return data * (data >= numx.atleast_2d(numx.sort(data, axis=self.axis)[-self.k, :]))
        else:
            return data * (data >= numx.atleast_2d(numx.sort(data, axis=self.axis)[:, -self.k]).T)


[docs]class KWinnerTakesAll(object):
    """ Implements K Winner takes all. Keep the k max values and set the rest to 0.
    """

[docs]    def __init__(self, k=10, axis=0):
        """ Corruptor constructor.

        :param k: Keep the k max values and set the rest to 0.
        :type k: int

        :param axis: Axis =0 across min batch, axis = 1 across hidden units
        :type axis: int
        """
        self.k = k
        self.axis = axis

[docs]    def corrupt(self, data):
        """ The function corrupts the data.

        :param data: Input of the layer.
        :type data: numpy array [num samples, layer dim]

        :return: Corrupted data.
        :rtype: numpy array [num samples, layer dim]
        """
        data = data
        if self.axis == 0:
            return 1.0 * (data >= numx.atleast_2d(numx.sort(data, axis=self.axis)[-self.k, :]))
        else:
            return 1.0 * (data >= numx.atleast_2d(numx.sort(data, axis=self.axis)[:, -self.k]).T)