""" This module provides implementations for corrupting the training data.
:Implemented:
- Identity
- Sampling Binary
- BinaryNoise
- Additive Gauss Noise
- Multiplicative Gauss Noise
- Dropout
- Random Permutation
- KeepKWinner
- KWinnerTakesAll
:Info:
http://ufldl.stanford.edu/wiki/index.php/Sparse_Coding:_Autoencoder_Interpretation
:Version:
1.1.0
:Date:
13.03.2017
:Author:
Jan Melchior
:Contact:
JanMelchior@gmx.de
:License:
Copyright (C) 2017 Jan Melchior
This file is part of the Python library PyDeep.
PyDeep is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import numpy as numx
[docs]class Identity(object):
""" Dummy corruptor object.
"""
[docs] @classmethod
def corrupt(cls, data):
""" The function corrupts the data.
:param data: Input of the layer.
:type data: numpy array [num samples, layer dim]
:return: Corrupted data.
:rtype: numpy array [num samples, layer dim]
"""
return data
[docs]class AdditiveGaussNoise(object):
""" An object that corrupts data by adding Gauss noise.
"""
[docs] def __init__(self, mean, std):
""" The function corrupts the data.
:param mean: Constant the data is shifted
:type mean: float
:param std: Standard deviation Added to the data.
:type std: float
"""
self.mean = mean
self.std = std
[docs] def corrupt(self, data):
""" The function corrupts the data.
:param data: Input of the layer.
:type data: numpy array [num samples, layer dim]
:return: Corrupted data.
:rtype: numpy array [num samples, layer dim]
"""
return data + self.mean + numx.random.standard_normal(data.shape) * self.std
[docs]class MultiGaussNoise(object):
""" An object that corrupts data by multiplying Gauss noise.
"""
[docs] def __init__(self, mean, std):
""" Corruptor contructor.
:param mean: Constant the data is shifted
:type mean: float
:param std: Standard deviation Added to the data.
:type std: float
"""
self.mean = mean
self.std = std
[docs] def corrupt(self, data):
""" The function corrupts the data.
:param data: Input of the layer.
:type data: numpy array [num samples, layer dim]
:return: Corrupted data.
:rtype: numpy array [num samples, layer dim]
"""
return data * (self.mean + numx.random.standard_normal(data.shape) * self.std)
[docs]class SamplingBinary(object):
""" Sample binary states (zero out) corruption.
"""
[docs] @classmethod
def corrupt(cls, data):
""" The function corrupts the data.
:param data: Input of the layer.
:type data: numpy array [num samples, layer dim]
:return: Corrupted data.
:rtype: numpy array [num samples, layer dim]
"""
return data > numx.random.random(data.shape)
class BinaryNoise(object):
""" Binary Noise.
"""
def __init__(self, percentage):
""" Corruptor contructor.
:param percentage: Percent of random chosen pixel/states.
:type percentage: float [0,1]
:param std: Standard deviation Added to the data.
"""
self.percentage = percentage
def corrupt(self, data):
""" The function corrupts the data.
:param data: Input of the layer.
:type data: numpy array [num samples, layer dim]
:return: Corrupted data.
:rtype: numpy array [num samples, layer dim]
"""
return numx.abs(data - numx.random.binomial(1, self.percentage, data.shape))
[docs]class Dropout(object):
""" Dropout (zero out) corruption.
"""
[docs] def __init__(self, dropout_percentage=0.2):
""" Corruptor contructor.
:param dropout_percentage: Dropout percentage
:type dropout_percentage: float
"""
self.dropout_percentage = dropout_percentage
[docs] def corrupt(self, data):
""" The function corrupts the data.
:param data: Input of the layer.
:type data: numpy array [num samples, layer dim]
:return: Corrupted data.
:rtype: numpy array [num samples, layer dim]
"""
return data * numx.random.binomial(1, 1.0 - self.dropout_percentage, data.shape) / (
1.0 - self.dropout_percentage)
[docs]class RandomPermutation(object):
""" RandomPermutation corruption, a fix number of units change their activation values.
"""
[docs] def __init__(self, permutation_percentage=0.2):
""" Corruptor contructor.
:param permutation_percentage: permutation_percentage: Percentage of states to permute
:type permutation_percentage: float
"""
self.permutation_percentage = permutation_percentage
[docs] def corrupt(self, data):
""" The function corrupts the data.
:param data: Input of the layer.
:type data: numpy array [num samples, layer dim]
:return: Corrupted data.
:rtype: numpy array [num samples, layer dim]
"""
result = numx.copy(data)
num_switches = numx.int32(data.shape[1] * self.permutation_percentage * 0.5)
for d in range(data.shape[0]):
# Proof of concept
# setA = numx.random.randint(0, pattern.shape[1], num_states_to_change / 2)
# setB = numx.random.randint(0, pattern.shape[1], num_states_to_change / 2)
# result[d][setA] = pattern[d][setB]
# result[d][setB] = pattern[d][setA]
tempset = numx.random.permutation(numx.arange(data.shape[1]))
result[d][tempset[0:num_switches]] = data[d][tempset[num_switches:2 * num_switches]]
result[d][tempset[num_switches:2 * num_switches]] = data[d][tempset[0:num_switches]]
return result
[docs]class KeepKWinner(object):
""" Implements K Winner stay. Keep the k max values and set the rest to 0.
"""
[docs] def __init__(self, k=10, axis=0):
""" Corruptor contructor.
:param k: Keep the k max values and set the rest to 0.
:type k: int
:param axis: Axis =0 across min batch, axis = 1 across hidden units
:type axis: int
"""
self.k = k
self.axis = axis
[docs] def corrupt(self, data):
""" The function corrupts the data.
:param data: Input of the layer.
:type data: numpy array [num samples, layer dim]
:return: Corrupted data.
:rtype: numpy array [num samples, layer dim]
"""
data = data
if self.axis == 0:
return data * (data >= numx.atleast_2d(numx.sort(data, axis=self.axis)[-self.k, :]))
else:
return data * (data >= numx.atleast_2d(numx.sort(data, axis=self.axis)[:, -self.k]).T)
[docs]class KWinnerTakesAll(object):
""" Implements K Winner takes all. Keep the k max values and set the rest to 0.
"""
[docs] def __init__(self, k=10, axis=0):
""" Corruptor constructor.
:param k: Keep the k max values and set the rest to 0.
:type k: int
:param axis: Axis =0 across min batch, axis = 1 across hidden units
:type axis: int
"""
self.k = k
self.axis = axis
[docs] def corrupt(self, data):
""" The function corrupts the data.
:param data: Input of the layer.
:type data: numpy array [num samples, layer dim]
:return: Corrupted data.
:rtype: numpy array [num samples, layer dim]
"""
data = data
if self.axis == 0:
return 1.0 * (data >= numx.atleast_2d(numx.sort(data, axis=self.axis)[-self.k, :]))
else:
return 1.0 * (data >= numx.atleast_2d(numx.sort(data, axis=self.axis)[:, -self.k]).T)