Robot Simulator of the Robotics Group for Self-Organization of Control: multilayerffnn.h Source File

00001 /***************************************************************************
00002  *   Copyright (C) 2005 by Robot Group Leipzig                             *
00003  *    martius@informatik.uni-leipzig.de                                    *
00004  *    fhesse@informatik.uni-leipzig.de                                     *
00005  *    der@informatik.uni-leipzig.de                                        *
00006  *                                                                         *
00007  *   This program is free software; you can redistribute it and/or modify  *
00008  *   it under the terms of the GNU General Public License as published by  *
00009  *   the Free Software Foundation; either version 2 of the License, or     *
00010  *   (at your option) any later version.                                   *
00011  *                                                                         *
00012  *   This program is distributed in the hope that it will be useful,       *
00013  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00014  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00015  *   GNU General Public License for more details.                          *
00016  *                                                                         *
00017  *   You should have received a copy of the GNU General Public License     *
00018  *   along with this program; if not, write to the                         *
00019  *   Free Software Foundation, Inc.,                                       *
00020  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
00021  ***************************************************************************
00022  *                                                                         *
00023  *  DESCRIPTION                                                            *
00024  *                                                                         *
00025  *   $Log: multilayerffnn.h,v $
00026  *   Revision 1.24  2008/05/30 11:57:12  martius
00027  *   do processing of inputs in learning function if network is not
00028  *    activated with this input before (was a very probable mistake)
00029  *
00030  *   Revision 1.23  2008/05/27 13:23:42  guettler
00031  *   multilayerffnn: inserted function for setting activation function of all layers
00032  *   from outside and back
00033  *
00034  *   Revision 1.22  2008/05/02 17:20:04  martius
00035  *   *** empty log message ***
00036  *
00037  *   Revision 1.21  2008/04/17 14:54:45  martius
00038  *   randomGen added, which is a random generator with long period and an
00039  *    internal state. Each Agent has an instance and passed it to the controller
00040  *    and the wiring. This is good for
00041  *   a) repeatability on agent basis,
00042  *   b) parallel execution as done in ode_robots
00043  *
00044  *   Revision 1.20  2008/02/08 13:38:05  der
00045  *   abstract model is inspectable
00046  *
00047  *   Revision 1.19  2007/12/13 16:46:49  martius
00048  *   new actfuns
00049  *
00050  *   Revision 1.18  2007/12/11 14:23:32  martius
00051  *   some internal params
00052  *
00053  *   Revision 1.17  2007/12/09 16:24:25  martius
00054  *   direct access to layers and weights possible
00055  *
00056  *   Revision 1.16  2007/09/06 18:52:03  martius
00057  *   write (ascii store)
00058  *
00059  *   Revision 1.15  2007/08/24 12:03:37  martius
00060  *   getLayerNum
00061  *
00062  *   Revision 1.14  2007/06/08 15:47:17  martius
00063  *   eps public
00064  *
00065  *   Revision 1.13  2007/04/03 11:21:40  martius
00066  *   layer has inverse of actfun
00067  *   new constructor
00068  *
00069  *   Revision 1.12  2007/04/02 15:24:23  der
00070  *   inversion
00071  *
00072  *   Revision 1.11  2007/02/23 15:14:17  martius
00073  *   *** empty log message ***
00074  *
00075  *   Revision 1.10  2007/02/23 09:40:46  der
00076  *   regularisation used from regularisation.h
00077  *
00078  *   Revision 1.9  2007/02/20 15:41:06  martius
00079  *   big model stuff, elman and co
00080  *
00081  *   Revision 1.8  2006/12/21 11:44:17  martius
00082  *   commenting style for doxygen //< -> ///<
00083  *   FOREACH and FOREACHC are macros for collection iteration
00084  *
00085  *   Revision 1.7  2006/11/23 08:43:46  martius
00086  *   bugfix in store and restore
00087  *
00088  *   Revision 1.6  2006/07/27 15:22:18  martius
00089  *   activations stored (processing must preceed learning and reponse calculation)
00090  *
00091  *
00092  *                                                                 *
00093  ***************************************************************************/
00094 #ifndef __MULTILAYERFFNN_H
00095 #define __MULTILAYERFFNN_H
00096 
00097 #include <vector>
00098 
00099 #include "feedforwardnn.h"
00100 #include "invertablemodel.h"
00101 
00102 class Layer : public Storeable {
00103 public:
00104   /** constructor for Layer. 
00105       The derivative and inverse of activation function is derived automatically.
00106       @param size number neurons
00107       @param factor_bias size of bias neuron , if 0 no bias is used
00108       @param actfun activation function. see also FeedForwardNN     
00109   */
00110   Layer(int size, double factor_bias=0.1, 
00111         ActivationFunction actfun = FeedForwardNN::linear);
00112 
00113   /** obsolete, use the other constructor for Layer. 
00114       @param size number neurons
00115       @param factor_bias size of bias neuron , if 0 no bias is used
00116       @param actfun activation function. see also FeedForwardNN
00117       @param dactfun derivative of activation function (should be consistent with actfun)
00118   */
00119   Layer(int size, double factor_bias, 
00120         ActivationFunction actfun, 
00121         ActivationFunction dactfun) {
00122     fprintf(stderr, "%s %s\n", "MultiLayerFFNN, Layer::Layer(): this contructor is obsolete! ",
00123             "Please use the one without dactfun now!\n");
00124     exit(1);
00125   }
00126 
00127   /***STOREABLE ******/
00128   /// stores the layer binary into file stream
00129   bool store(FILE* f) const;
00130   /// restores the layer binary from file stream
00131   bool restore(FILE* f);
00132   
00133   /// sets the activation function of the layer
00134   void setActFun(ActivationFunction actfun);
00135 
00136   int size;
00137   double factor_bias;
00138   ActivationFunction actfun;  ///< callback activation function
00139   ActivationFunction dactfun; ///< first derivative of the activation function  
00140   InvActivationFunction invactfun; ///< inversion of activation function
00141 
00142   // prints the Layer data-structure
00143   friend std::ostream& operator<<(std::ostream& , const Layer&);
00144 };
00145 
00146 /// multi layer neural network with configurable activation functions
00147 class MultiLayerFFNN : public FeedForwardNN {
00148 public: 
00149   /**
00150      @param eps learning rate
00151      @param layers Layer description (the input layer is not specified (always linear))
00152      @param useBypass if true, then a connection from input to output layer is included
00153      @param someInternalParams if true then only a few parameters are send to plotting
00154   */
00155   MultiLayerFFNN(double eps, const std::vector<Layer>& layers, bool useBypass=false,
00156                  bool someInternalParams=true);
00157   virtual ~MultiLayerFFNN(){ }
00158 
00159   /** initialisation of the network with the given number of input and output units.
00160       The dimensionality of the ouputlayer is automatically adjusted.
00161       @param unit_map defines the approximate response of the network 
00162        after initialisation (if unit_map=1 the weights are unit matrices).
00163       @param randGen pointer to random generator, if 0 an new one is used
00164    */
00165   virtual void init(unsigned int inputDim, unsigned  int outputDim, 
00166                     double unit_map = 0.0, RandGen* randGen = 0); 
00167 
00168   /// passive processing of the input
00169   virtual const matrix::Matrix process (const matrix::Matrix& input); 
00170 
00171   /** performs learning and returns the network output before learning 
00172       (process should be called before) */
00173   virtual const matrix::Matrix learn (const matrix::Matrix& input, 
00174                                       const matrix::Matrix& nom_output, 
00175                                       double learnRateFactor = 1);
00176 
00177   /** response matrix of neural network at given input 
00178       
00179   \f[  J_ij = \frac{\partial y_i}{\partial x_j} \f]
00180   \f[  J = G_n' W_n G_{n-1}' W_{n-1} ... G_1' W_1 \f]
00181   with \f$W_n\f$ is the weight matrix of layer n and 
00182   \f$ G'\f$ is a diagonal matrix with \f$ G'_ii = g'_i \f$ as values on the diagonal.
00183   ATTENTION: input is ignored! use process before!
00184   */
00185   virtual const matrix::Matrix response(const matrix::Matrix& input) const;
00186 
00187   /** calculates the input shift v to a given output shift xsi via pseudo inversion.
00188 
00189       \f[o+\xi = \psi(i+v)\f]
00190 
00191       The result is a vector of dimension inputdim.
00192       ATTENTION: input is ignored! use process before!  
00193    */
00194   virtual const matrix::Matrix inversion(const matrix::Matrix& input, const matrix::Matrix& xsi) const;
00195 
00196 
00197   /// returns the number of input neurons
00198   virtual unsigned int getInputDim() const { 
00199     return weights[0].getN(); 
00200   }
00201   /// returns the number of output neurons
00202   virtual unsigned int getOutputDim() const { 
00203     return (weights.rbegin())->getM(); 
00204   }
00205 
00206   /// returns activation of the given layer. Layer 0 is the first hidden layer
00207   virtual matrix::Matrix getLayerOutput(unsigned int layer){
00208     assert(layer < layers.size());
00209     return ys[layer];
00210   }
00211 
00212   /// damps the weights and the biases by multiplying (1-damping)
00213   virtual void damp(double damping);
00214 
00215   // total number of layers (1 means no hidden units)
00216   virtual unsigned int getLayerNum() const {
00217     return layers.size();
00218   }
00219 
00220   /// layers 0 is the first hidden layer
00221   virtual const Layer& getLayer(unsigned int layer) const {
00222     assert(layer < layers.size());
00223     return layers[layer];
00224   }
00225 
00226   /// layers 0 is the first hidden layer
00227   virtual Layer& getLayer(unsigned int layer) {
00228     assert(layer < layers.size());
00229     return layers[layer];
00230   }
00231 
00232   /// weight matrix 0 connects input with the first hidden layer
00233   virtual const matrix::Matrix& getWeights(unsigned int to_layer) const {
00234     assert(to_layer < weights.size());
00235     return weights[to_layer];
00236   }
00237 
00238   /// weight matrix 0 connects input with the first hidden layer
00239   virtual matrix::Matrix& getWeights(unsigned int to_layer) {
00240     assert(to_layer < weights.size());
00241     return weights[to_layer];
00242   }
00243 
00244   /// layers 0 is the first hidden layer
00245   virtual const matrix::Matrix& getBias(unsigned int of_layer) const {
00246     assert(of_layer < bias.size());
00247     return bias[of_layer];
00248   }
00249 
00250   /**************  STOREABLE **********************************/
00251   /// stores the layer binary into file stream
00252   bool store(FILE* f) const;
00253   /// restores the layer binary from file stream
00254   bool restore(FILE* f);
00255 
00256   
00257   /// writes the layer ASCII into file stream (not in the storable interface)
00258   bool write(FILE* f) const;
00259 
00260   /************** Inspectable **********************************/
00261   virtual iparamkeylist getInternalParamNames() const;
00262   virtual iparamvallist getInternalParams() const;
00263   virtual ilayerlist getStructuralLayers() const;
00264   virtual iconnectionlist getStructuralConnections() const;
00265 
00266 
00267   virtual void setSomeInternalParams(bool someInternalParams){
00268     assert(!initialised); this->someInternalParams = someInternalParams;
00269   }
00270 public:
00271   double eps; ///< learning rate
00272   
00273   /**
00274   * sets the activation function (and derivative and inversion too) for ALL layers!
00275   * @param actfun the activation function to be used
00276   * @return the activation functions which where used until now
00277   */
00278   virtual std::vector<ActivationFunction> setActivationFunction(ActivationFunction actfun);
00279   
00280 /**
00281   * sets the activation functions (and derivative and inversion too) for all layers.
00282   * @note: normally you call setActivationFunction() first and get a list of the used
00283   * activation functions, which are set back with this function
00284   * @param actfunList the list of actfuns to be used
00285   */
00286   virtual void setActivationFunctions(std::vector<ActivationFunction> actfunList);
00287   
00288   
00289 protected:
00290   std::vector<Layer> layers;
00291   std::vector<matrix::Matrix> weights;
00292   std::vector<matrix::Matrix> bias;
00293   std::vector<matrix::Matrix> smallids; // small unit matrices for pseudoinversion
00294   bool useBypass;
00295   matrix::Matrix bypassWeights;
00296   bool someInternalParams;
00297 
00298   matrix::Matrix input;
00299   std::vector<matrix::Matrix> ys; // activations
00300   std::vector<matrix::Matrix> zs; // potentials
00301 
00302 
00303   bool initialised;
00304 };
00305 
00306 #endif