multilayerffnn.h

Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2005-2011 LpzRobots development team                    *
00003  *    Georg Martius  <georg dot martius at web dot de>                     *
00004  *    Frank Guettler <guettler at informatik dot uni-leipzig dot de        *
00005  *    Frank Hesse    <frank at nld dot ds dot mpg dot de>                  *
00006  *    Ralf Der       <ralfder at mis dot mpg dot de>                       *
00007  *                                                                         *
00008  *   This program is free software; you can redistribute it and/or modify  *
00009  *   it under the terms of the GNU General Public License as published by  *
00010  *   the Free Software Foundation; either version 2 of the License, or     *
00011  *   (at your option) any later version.                                   *
00012  *                                                                         *
00013  *   This program is distributed in the hope that it will be useful,       *
00014  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00015  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00016  *   GNU General Public License for more details.                          *
00017  *                                                                         *
00018  *   You should have received a copy of the GNU General Public License     *
00019  *   along with this program; if not, write to the                         *
00020  *   Free Software Foundation, Inc.,                                       *
00021  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
00022  *                                                                         *
00023  ***************************************************************************/
00024 #ifndef __MULTILAYERFFNN_H
00025 #define __MULTILAYERFFNN_H
00026 
00027 #include <vector>
00028 
00029 #include "feedforwardnn.h"
00030 #include "invertablemodel.h"
00031 #include "layer.h"
00032 
00033 
00034 /// multi layer neural network with configurable activation functions
00035 class MultiLayerFFNN : public FeedForwardNN {
00036 public:     
00037 
00038   /**
00039      @param eps learning rate
00040      @param layers Layer description (the input layer is not specified (always linear))
00041      @param useBypass if true, then a connection from input to output layer is included
00042      @param someInternalParams if true then only a few parameters are send to plotting
00043   */
00044   MultiLayerFFNN(double eps, const std::vector<Layer>& layers, bool useBypass=false,
00045                  bool someInternalParams=true);
00046   virtual ~MultiLayerFFNN(){ }
00047 
00048   /** initialisation of the network with the given number of input and output units.
00049       The dimensionality of the ouputlayer is automatically adjusted.
00050       @param unit_map defines the approximate response of the network 
00051        after initialisation (if unit_map=1 the weights are unit matrices).
00052       @param randGen pointer to random generator, if 0 an new one is used
00053    */
00054   virtual void init(unsigned int inputDim, unsigned  int outputDim, 
00055                     double unit_map = 0.0, RandGen* randGen = 0); 
00056 
00057   /// passive processing of the input
00058   virtual const matrix::Matrix process (const matrix::Matrix& input); 
00059 
00060   /** performs learning and returns the network output before learning 
00061       (process should be called before) */
00062   virtual const matrix::Matrix learn (const matrix::Matrix& input, 
00063                                       const matrix::Matrix& nom_output, 
00064                                       double learnRateFactor = 1);
00065 
00066   /** response matrix of neural network at given input 
00067       
00068   \f[  J_ij = \frac{\partial y_i}{\partial x_j} \f]
00069   \f[  J = G_n' W_n G_{n-1}' W_{n-1} ... G_1' W_1 \f]
00070   with \f$W_n\f$ is the weight matrix of layer n and 
00071   \f$ G'\f$ is a diagonal matrix with \f$ G'_ii = g'_i \f$ as values on the diagonal.
00072   ATTENTION: input is ignored! use process before!
00073   */
00074   virtual const matrix::Matrix response(const matrix::Matrix& input) const;
00075 
00076   /** calculates the input shift v to a given output shift xsi via pseudo inversion.
00077 
00078       \f[o+\xi = \psi(i+v)\f]
00079 
00080       The result is a vector of dimension inputdim.
00081       ATTENTION: input is ignored! use process before!  
00082    */
00083   virtual const matrix::Matrix inversion(const matrix::Matrix& input, const matrix::Matrix& xsi) const;
00084 
00085 
00086   /// returns the number of input neurons
00087   virtual unsigned int getInputDim() const { 
00088     return weights[0].getN(); 
00089   }
00090   /// returns the number of output neurons
00091   virtual unsigned int getOutputDim() const { 
00092     return (weights.rbegin())->getM(); 
00093   }
00094 
00095   /// returns activation of the given layer. Layer 0 is the first hidden layer
00096   virtual const matrix::Matrix& getLayerOutput(unsigned int layer) const {
00097     assert(layer < layers.size());
00098     return ys[layer];
00099   }
00100 
00101   /// damps the weights and the biases by multiplying (1-damping)
00102   virtual void damp(double damping);
00103 
00104   // total number of layers (1 means no hidden units)
00105   virtual unsigned int getLayerNum() const {
00106     return layers.size();
00107   }
00108 
00109   /// layers 0 is the first hidden layer
00110   virtual const Layer& getLayer(unsigned int layer) const {
00111     assert(layer < layers.size());
00112     return layers[layer];
00113   }
00114 
00115   /// layers 0 is the first hidden layer
00116   virtual Layer& getLayer(unsigned int layer) {
00117     assert(layer < layers.size());
00118     return layers[layer];
00119   }
00120 
00121   /// weight matrix 0 connects input with the first hidden layer
00122   virtual const matrix::Matrix& getWeights(unsigned int to_layer) const {
00123     assert(to_layer < weights.size());
00124     return weights[to_layer];
00125   }
00126 
00127   /// weight matrix 0 connects input with the first hidden layer
00128   virtual matrix::Matrix& getWeights(unsigned int to_layer) {
00129     assert(to_layer < weights.size());
00130     return weights[to_layer];
00131   }
00132 
00133   /// Note: layers 0 is the first hidden layer
00134   virtual const matrix::Matrix& getBias(unsigned int of_layer) const {
00135     assert(of_layer < bias.size());
00136     return bias[of_layer];
00137   }
00138 
00139   /// Note: layers 0 is the first hidden layer
00140   virtual matrix::Matrix& getBias(unsigned int of_layer) {
00141     assert(of_layer < bias.size());
00142     return bias[of_layer];
00143   }
00144 
00145   /**************  STOREABLE **********************************/
00146   /// stores the layer binary into file stream
00147   bool store(FILE* f) const;
00148   /// restores the layer binary from file stream
00149   bool restore(FILE* f);
00150 
00151   
00152   /// writes the layer ASCII into file stream (not in the storable interface)
00153   bool write(FILE* f) const;
00154 
00155   /************** Inspectable **********************************/
00156   virtual iparamkeylist getInternalParamNames() const;
00157   virtual iparamvallist getInternalParams() const;
00158   virtual ilayerlist getStructuralLayers() const;
00159   virtual iconnectionlist getStructuralConnections() const;
00160 
00161 
00162   virtual void setSomeInternalParams(bool someInternalParams){
00163     assert(!initialised); this->someInternalParams = someInternalParams;
00164   }
00165 
00166 public:
00167   double eps; ///< learning rate
00168   
00169   /**
00170   * sets the activation function (and derivative and inversion too) for ALL layers!
00171   * @param actfun the activation function to be used
00172   * @return the activation functions which where used until now
00173   */
00174   virtual std::vector<ActivationFunction> setActivationFunction(ActivationFunction actfun);
00175   
00176 /**
00177   * sets the activation functions (and derivative and inversion too) for all layers.
00178   * @note: normally you call setActivationFunction() first and get a list of the used
00179   * activation functions, which are set back with this function
00180   * @param actfunList the list of actfuns to be used
00181   */
00182   virtual void setActivationFunctions(std::vector<ActivationFunction> actfunList);
00183   
00184   
00185 protected:
00186   std::vector<Layer> layers;
00187   std::vector<matrix::Matrix> weights;
00188   std::vector<matrix::Matrix> bias;
00189   std::vector<matrix::Matrix> smallids; // small unit matrices for pseudoinversion
00190   bool useBypass;
00191   matrix::Matrix bypassWeights;
00192   bool someInternalParams;
00193 
00194   matrix::Matrix input;
00195   std::vector<matrix::Matrix> ys; // activations
00196   std::vector<matrix::Matrix> zs; // potentials
00197 
00198   double lambda;   // regularisation value for pseudoinverse
00199   bool initialised;
00200 };
00201 
00202 #endif