Source code for pydeep.ae.trainer

''' This module provides implementations for training different variants of Auto-encoders,
    modifications on standard gradient decent are provided (centering, denoising, dropout,
    sparseness, contractiveness, slowness L1-decay, L2-decay, momentum, gradient restriction)

    :Implemented:
        - GDTrainer

    :Info:
        http://ufldl.stanford.edu/wiki/index.php/Sparse_Coding:_Autoencoder_Interpretation

    :Version:
        1.0

    :Date:
        21.01.2018

    :Author:
        Jan Melchior

    :Contact:
        JanMelchior@gmx.de

    :License:

        Copyright (C) 2018 Jan Melchior

        This program is free software: you can redistribute it and/or modify
        it under the terms of the GNU General Public License as published by
        the Free Software Foundation, either version 3 of the License, or
        (at your option) any later version.

        This program is distributed in the hope that it will be useful,
        but WITHOUT ANY WARRANTY; without even the implied warranty of
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        GNU General Public License for more details.

        You should have received a copy of the GNU General Public License
        along with this program.  If not, see <http://www.gnu.org/licenses/>.

'''
import numpy as numx
import pydeep.base.numpyextension as npExt
import pydeep.ae.model as MODEL

[docs]class GDTrainer(object): ''' Auto encoder trainer using gradient descent. '''
[docs] def __init__(self, model): ''' The constructor takes the model as input :Parameters: model: An auto-encoder object which should be trained. -type: AutoEncoder ''' # Store passed model if isinstance(model, MODEL.AutoEncoder): self.model = model else: raise Exception("Model has to be an Auto-encoder object!") # Count the number of parameters parameters = self.model.get_parameters() self.num_parameters = len(parameters) # Storage variables for the gradients self.parameter_updates = [] for i in range(self.num_parameters): self.parameter_updates.append(numx.zeros(( parameters[i].shape[0], parameters[i].shape[1]), dtype=model.dtype))
[docs] def _train(self, data, epsilon, momentum, update_visible_offsets, update_hidden_offsets, corruptor, reg_L1Norm, reg_L2Norm, reg_sparseness, desired_sparseness, reg_contractive, reg_slowness, data_next, restrict_gradient, restriction_norm): ''' The training for one batch is performed using gradient descent. :Parameters: data: The training data -type: numpy array [num samples, input dim] epsilon: The learning rate. -type: numpy array[num parameters] momentum: The momentum term. -type: numpy array[num parameters] update_visible_offsets: The update step size for the models visible offsets. Good value if functionality is used: 0.001 -type: float update_hidden_offsets: The update step size for the models hidden offsets. Good value if functionality is used: 0.001 -type: float corruptor: Defines if and how the data gets corrupted. (e.g. Gauss noise, dropout, Max out) -type: corruptor reg_L1Norm: The parameter for the L1 regularization -type: float reg_L2Norm: The parameter for the L2 regularization, also know as weight decay. -type: float reg_sparseness: The parameter (epsilon) for the sparseness regularization. -type: float desired_sparseness: Desired average hidden activation. -type: float reg_contractive: The parameter (epsilon) for the contractive regularization. -type: float reg_slowness: The parameter (epsilon) for the slowness regularization. -type: float data_next: The next training data in the sequence. -type: numpy array [num samples, input dim] restrict_gradient: If a scalar is given the norm of the weight gradient is restricted to stay below this value. -type: None, float restriction_norm: restricts the column norm, row norm or Matrix norm. -type: string: 'Cols','Rows', 'Mat' ''' x_next = None h_next = None a_h_next = None #orginal_h = None # Forward propagation, if corruptor is given the data is corrupted if corruptor == None: x = data x_next = data_next a_h,h = self.model._encode(x) #orginal_h = h a_y,y = self.model._decode(h) if reg_slowness > 0.0 and data_next is not None: a_h_next,h_next = self.model._encode(x_next) else: #_,orginal_h = self.model._encode(data) if isinstance(corruptor, list): x = corruptor[0].corrupt(data) a_h,h = self.model._encode(x) h = corruptor[1].corrupt(h) a_y,y = self.model._decode(h) y = corruptor[2].corrupt(y) if reg_slowness > 0.0 and data_next != None: x_next = corruptor[0].corrupt(data_next) a_h_next,h_next = self.model._encode(x_next) else: x = corruptor.corrupt(data) a_h,h = self.model._encode(x) h = corruptor.corrupt(h) a_y,y = self.model._decode(h) y = corruptor.corrupt(y) if reg_slowness > 0.0 and data_next != None: x_next = corruptor.corrupt(data_next) a_h_next,h_next = self.model._encode(x_next) # Update offsets mean_h = 0.0 mean_x = 0.0 if update_visible_offsets > 0.0: mean_x = numx.mean(x,axis=0).reshape(1,self.model.input_dim) if update_hidden_offsets > 0.0: mean_h = numx.mean(h,axis=0).reshape(1,self.model.output_dim) self.model.update_offsets(mean_x, mean_h, update_visible_offsets, update_hidden_offsets) # Get the gradients for the model gradients = self.model._get_gradients(data, a_h, h, a_y, y, reg_contractive, reg_sparseness, desired_sparseness, reg_slowness, x_next, a_h_next, h_next) # adapt parameters for i in range(self.num_parameters): self.parameter_updates[i] *= momentum[i] self.parameter_updates[i] -= epsilon[i] * gradients[i] # add weight decay L1 norm if reg_L1Norm != 0: self.parameter_updates[0] -= (epsilon[0] * reg_L1Norm * numx.sign(self.model.w)) # add weight decay L2 norm if reg_L2Norm != 0: self.parameter_updates[0] -= (epsilon[0] * reg_L2Norm * self.model.w) # Restricts the gradient if numx.isscalar(restrict_gradient): if restrict_gradient > 0: if restriction_norm is 'Cols': typ = 0 if restriction_norm is 'Rows': typ = 1 if restriction_norm is 'Mat': typ = None self.parameter_updates[0] = npExt.restrict_norms(self.parameter_updates[0], restrict_gradient, typ ) # update the parameters with the calculated gradient self.model.update_parameters(self.parameter_updates)
[docs] def train(self, data, num_epochs = 1, epsilon = 0.1, momentum = 0.0, update_visible_offsets = 0.0, update_hidden_offsets = 0.0, corruptor = None, reg_L1Norm = 0.0, reg_L2Norm = 0.0, reg_sparseness = 0.0, desired_sparseness = 0.01, reg_contractive = 0.0, reg_slowness = 0.0, data_next = None, restrict_gradient = False, restriction_norm = 'Mat'): ''' The training for one batch is performed using gradient descent. :Parameters: data: The data used for training. -type: list of numpy arrays [num samples input dimension] num_epochs: Number of epochs to train. -type: int epsilon: The learning rate. -type: numpy array[num parameters] momentum: The momentum term. -type: numpy array[num parameters] update_visible_offsets: The update step size for the models visible offsets. Good value if functionality is used: 0.001 -type: float update_hidden_offsets: The update step size for the models hidden offsets. Good value if functionality is used: 0.001 -type: float corruptor: Defines if and how the data gets corrupted. -type: corruptor reg_L1Norm: The parameter for the L1 regularization -type: float reg_L2Norm: The parameter for the L2 regularization, also know as weight decay. -type: float reg_sparseness: The parameter (epsilon) for the sparseness regularization. -type: float desired_sparseness: Desired average hidden activation. -type: float reg_contractive: The parameter (epsilon) for the contractive regularization. -type: float reg_slowness: The parameter (epsilon) for the slowness regularization. -type: float data_next: The next training data in the sequence. -type: numpy array [num samples, input dim] restrict_gradient: If a scalar is given the norm of the weight gradient is restricted to stay below this value. -type: None, float restriction_norm: restricts the column norm, row norm or Matrix norm. -type: string: 'Cols','Rows', 'Mat' ''' # Set learning rates if(numx.isscalar(epsilon)): epsilon = numx.zeros(self.num_parameters) + epsilon # Set momenti if(numx.isscalar(momentum)): momentum = numx.zeros(self.num_parameters) + momentum if isinstance(data,list): for _ in range(num_epochs): # gradient update for all batches for batch in data: self._train(data = batch, epsilon = epsilon, momentum = momentum, update_visible_offsets = update_visible_offsets, update_hidden_offsets = update_hidden_offsets, corruptor = corruptor, reg_L1Norm = reg_L1Norm, reg_L2Norm = reg_L2Norm, reg_sparseness = reg_sparseness, desired_sparseness = desired_sparseness, reg_contractive = reg_contractive, reg_slowness = reg_slowness, data_next = data_next, restrict_gradient = restrict_gradient, restriction_norm = restriction_norm) else: for _ in range(num_epochs): self._train(data = data, epsilon = epsilon, momentum = momentum, update_visible_offsets = update_visible_offsets, update_hidden_offsets = update_hidden_offsets, corruptor = corruptor, reg_L1Norm = reg_L1Norm, reg_L2Norm = reg_L2Norm, reg_sparseness = reg_sparseness, desired_sparseness = desired_sparseness, reg_contractive = reg_contractive, reg_slowness = reg_slowness, data_next = data_next, restrict_gradient = restrict_gradient, restriction_norm = restriction_norm)