elman.h

Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2005-2011 LpzRobots development team                    *
00003  *    Georg Martius  <georg dot martius at web dot de>                     *
00004  *    Frank Guettler <guettler at informatik dot uni-leipzig dot de        *
00005  *    Frank Hesse    <frank at nld dot ds dot mpg dot de>                  *
00006  *    Ralf Der       <ralfder at mis dot mpg dot de>                       *
00007  *                                                                         *
00008  *   This program is free software; you can redistribute it and/or modify  *
00009  *   it under the terms of the GNU General Public License as published by  *
00010  *   the Free Software Foundation; either version 2 of the License, or     *
00011  *   (at your option) any later version.                                   *
00012  *                                                                         *
00013  *   This program is distributed in the hope that it will be useful,       *
00014  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00015  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00016  *   GNU General Public License for more details.                          *
00017  *                                                                         *
00018  *   You should have received a copy of the GNU General Public License     *
00019  *   along with this program; if not, write to the                         *
00020  *   Free Software Foundation, Inc.,                                       *
00021  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
00022  *                                                                         *
00023  ***************************************************************************/
00024 #ifndef __ELMAN_H
00025 #define __ELMAN_H
00026 
00027 #include <vector>
00028 
00029 #include "multilayerffnn.h"
00030 
00031 /// updates for network
00032 class NetUpdate {
00033 public:
00034   NetUpdate(){}
00035   NetUpdate(int numweights, int numbias, int numothers)
00036     : weights(numweights), bias(numweights), other(numothers) {}
00037   std::vector<matrix::Matrix> weights;
00038   std::vector<matrix::Matrix> bias;
00039   std::vector<matrix::Matrix> other;
00040 };
00041 
00042 /** Multilayer Neural Network with context neurons (after Elman and Jordan)
00043 Example of 2 hidden layer network with both, elman and jordan context units.
00044 \pre{
00045 +--<-----O O O
00046 |        | | |
00047 |        H H H 
00048 |        | | |
00049 |        | | |
00050 |        | | |
00051 |        H H H ----->-----+ 1:1 fixed connections (time delayed)
00052 |   >->-/| | |\-<-<       |
00053 |  / / / | | | \ \ \      |  
00054 | J J J  I I I  E E E     | 
00055 +-^-^-^         ^-^-^--<--+
00056 }
00057  */
00058 class Elman : public MultiLayerFFNN {
00059 public: 
00060   /**
00061      @param eps learning rate
00062      @param layers Layer description (the input layer is not specified (always linear))
00063      @param lambda self-recurrent feedback strength of context neurons
00064   */
00065   Elman(double eps, const std::vector<Layer>& layers, 
00066         bool useElman, bool useJordan=false, bool useBypass=false)
00067     : MultiLayerFFNN(eps,layers,useBypass), useElman(useElman), useJordan(useJordan) { 
00068     
00069     initialised = false;  
00070   }
00071 
00072   virtual ~Elman(){ }
00073 
00074   /// initialisation of the network with the given number of input and output units
00075   virtual void init(unsigned int inputDim, unsigned  int outputDim, 
00076                     double unit_map = 0.0, RandGen* randGen = 0); 
00077 
00078   /** passive processing of the input 
00079       (this will be different for every input, since it is a recurrent network)
00080   */
00081   virtual const matrix::Matrix process (const matrix::Matrix& input); 
00082 
00083   /// performs learning and returns the network output before learning
00084   virtual const matrix::Matrix learn (const matrix::Matrix& input, 
00085                                       const matrix::Matrix& nom_output, 
00086                                       double learnRateFactor = 1);
00087 
00088   /** determines the weight and bias updates
00089    */
00090   virtual NetUpdate weightIncrement(const matrix::Matrix& xsi);
00091 
00092   /** like weightIncrement but with blocked backprop flow for some neurons.
00093       @param blockedlayer index of layer with blocked neurons 
00094       @param blockfrom index of neuron in blockedlayer to start blocking
00095       @param blockto index of neuron in blockedlayer to end blocking (if -1 then to end)
00096       (not included)
00097    */
00098   virtual NetUpdate weightIncrementBlocked(const matrix::Matrix& xsi_, 
00099                                            int blockedlayer, 
00100                                            int blockfrom, int blockto);
00101 
00102 
00103   /** applies the weight increments to the weight (and bias) matrices 
00104       with the learningrate and the learnRateFactor */
00105   virtual void updateWeights(const NetUpdate& updates);
00106 
00107 
00108   /* Is implemented in multilayerfnn
00109      virtual const matrix::Matrix response(const matrix::Matrix& input) const;     
00110    */
00111 
00112   void damp(double damping);
00113   
00114   /**************  STOREABLE **********************************/
00115   /// stores the layer binary into file stream
00116   bool store(FILE* f) const;
00117   /// restores the layer binary from file stream
00118   bool restore(FILE* f);
00119 
00120 
00121   /************** CONFIGURABLE INTERFACE ************************/
00122   virtual paramkey getName() const {
00123     return std::string("elmanNN");
00124   }
00125 
00126 
00127 
00128   /************** Inspectable INTERFACE ************************/
00129   virtual iparamkeylist getInternalParamNames() const;
00130   virtual iparamvallist getInternalParams() const;
00131   virtual ilayerlist getStructuralLayers() const;
00132   virtual iconnectionlist getStructuralConnections() const;
00133 
00134 protected:
00135   matrix::Matrix elmanWeights;
00136   matrix::Matrix elmanContext;
00137   matrix::Matrix jordanWeights;
00138   matrix::Matrix jordanContext;
00139   bool useElman;
00140   bool useJordan;
00141 
00142 };
00143 
00144 #endif