00001 /*************************************************************************** 00002 * Copyright (C) 2005-2011 LpzRobots development team * 00003 * Georg Martius <georg dot martius at web dot de> * 00004 * Frank Guettler <guettler at informatik dot uni-leipzig dot de * 00005 * Frank Hesse <frank at nld dot ds dot mpg dot de> * 00006 * Ralf Der <ralfder at mis dot mpg dot de> * 00007 * * 00008 * This program is free software; you can redistribute it and/or modify * 00009 * it under the terms of the GNU General Public License as published by * 00010 * the Free Software Foundation; either version 2 of the License, or * 00011 * (at your option) any later version. * 00012 * * 00013 * This program is distributed in the hope that it will be useful, * 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 00016 * GNU General Public License for more details. * 00017 * * 00018 * You should have received a copy of the GNU General Public License * 00019 * along with this program; if not, write to the * 00020 * Free Software Foundation, Inc., * 00021 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 00022 * * 00023 ***************************************************************************/ 00024 #ifndef __MULTILAYERFFNN_H 00025 #define __MULTILAYERFFNN_H 00026 00027 #include <vector> 00028 00029 #include "feedforwardnn.h" 00030 #include "invertablemodel.h" 00031 #include "layer.h" 00032 00033 00034 /// multi layer neural network with configurable activation functions 00035 class MultiLayerFFNN : public FeedForwardNN { 00036 public: 00037 00038 /** 00039 @param eps learning rate 00040 @param layers Layer description (the input layer is not specified (always linear)) 00041 @param useBypass if true, then a connection from input to output layer is included 00042 @param someInternalParams if true then only a few parameters are send to plotting 00043 */ 00044 MultiLayerFFNN(double eps, const std::vector<Layer>& layers, bool useBypass=false, 00045 bool someInternalParams=true); 00046 virtual ~MultiLayerFFNN(){ } 00047 00048 /** initialisation of the network with the given number of input and output units. 00049 The dimensionality of the ouputlayer is automatically adjusted. 00050 @param unit_map defines the approximate response of the network 00051 after initialisation (if unit_map=1 the weights are unit matrices). 00052 @param randGen pointer to random generator, if 0 an new one is used 00053 */ 00054 virtual void init(unsigned int inputDim, unsigned int outputDim, 00055 double unit_map = 0.0, RandGen* randGen = 0); 00056 00057 /// passive processing of the input 00058 virtual const matrix::Matrix process (const matrix::Matrix& input); 00059 00060 /** performs learning and returns the network output before learning 00061 (process should be called before) */ 00062 virtual const matrix::Matrix learn (const matrix::Matrix& input, 00063 const matrix::Matrix& nom_output, 00064 double learnRateFactor = 1); 00065 00066 /** response matrix of neural network at given input 00067 00068 \f[ J_ij = \frac{\partial y_i}{\partial x_j} \f] 00069 \f[ J = G_n' W_n G_{n-1}' W_{n-1} ... G_1' W_1 \f] 00070 with \f$W_n\f$ is the weight matrix of layer n and 00071 \f$ G'\f$ is a diagonal matrix with \f$ G'_ii = g'_i \f$ as values on the diagonal. 00072 ATTENTION: input is ignored! use process before! 00073 */ 00074 virtual const matrix::Matrix response(const matrix::Matrix& input) const; 00075 00076 /** calculates the input shift v to a given output shift xsi via pseudo inversion. 00077 00078 \f[o+\xi = \psi(i+v)\f] 00079 00080 The result is a vector of dimension inputdim. 00081 ATTENTION: input is ignored! use process before! 00082 */ 00083 virtual const matrix::Matrix inversion(const matrix::Matrix& input, const matrix::Matrix& xsi) const; 00084 00085 00086 /// returns the number of input neurons 00087 virtual unsigned int getInputDim() const { 00088 return weights[0].getN(); 00089 } 00090 /// returns the number of output neurons 00091 virtual unsigned int getOutputDim() const { 00092 return (weights.rbegin())->getM(); 00093 } 00094 00095 /// returns activation of the given layer. Layer 0 is the first hidden layer 00096 virtual const matrix::Matrix& getLayerOutput(unsigned int layer) const { 00097 assert(layer < layers.size()); 00098 return ys[layer]; 00099 } 00100 00101 /// damps the weights and the biases by multiplying (1-damping) 00102 virtual void damp(double damping); 00103 00104 // total number of layers (1 means no hidden units) 00105 virtual unsigned int getLayerNum() const { 00106 return layers.size(); 00107 } 00108 00109 /// layers 0 is the first hidden layer 00110 virtual const Layer& getLayer(unsigned int layer) const { 00111 assert(layer < layers.size()); 00112 return layers[layer]; 00113 } 00114 00115 /// layers 0 is the first hidden layer 00116 virtual Layer& getLayer(unsigned int layer) { 00117 assert(layer < layers.size()); 00118 return layers[layer]; 00119 } 00120 00121 /// weight matrix 0 connects input with the first hidden layer 00122 virtual const matrix::Matrix& getWeights(unsigned int to_layer) const { 00123 assert(to_layer < weights.size()); 00124 return weights[to_layer]; 00125 } 00126 00127 /// weight matrix 0 connects input with the first hidden layer 00128 virtual matrix::Matrix& getWeights(unsigned int to_layer) { 00129 assert(to_layer < weights.size()); 00130 return weights[to_layer]; 00131 } 00132 00133 /// Note: layers 0 is the first hidden layer 00134 virtual const matrix::Matrix& getBias(unsigned int of_layer) const { 00135 assert(of_layer < bias.size()); 00136 return bias[of_layer]; 00137 } 00138 00139 /// Note: layers 0 is the first hidden layer 00140 virtual matrix::Matrix& getBias(unsigned int of_layer) { 00141 assert(of_layer < bias.size()); 00142 return bias[of_layer]; 00143 } 00144 00145 /************** STOREABLE **********************************/ 00146 /// stores the layer binary into file stream 00147 bool store(FILE* f) const; 00148 /// restores the layer binary from file stream 00149 bool restore(FILE* f); 00150 00151 00152 /// writes the layer ASCII into file stream (not in the storable interface) 00153 bool write(FILE* f) const; 00154 00155 /************** Inspectable **********************************/ 00156 virtual iparamkeylist getInternalParamNames() const; 00157 virtual iparamvallist getInternalParams() const; 00158 virtual ilayerlist getStructuralLayers() const; 00159 virtual iconnectionlist getStructuralConnections() const; 00160 00161 00162 virtual void setSomeInternalParams(bool someInternalParams){ 00163 assert(!initialised); this->someInternalParams = someInternalParams; 00164 } 00165 00166 public: 00167 double eps; ///< learning rate 00168 00169 /** 00170 * sets the activation function (and derivative and inversion too) for ALL layers! 00171 * @param actfun the activation function to be used 00172 * @return the activation functions which where used until now 00173 */ 00174 virtual std::vector<ActivationFunction> setActivationFunction(ActivationFunction actfun); 00175 00176 /** 00177 * sets the activation functions (and derivative and inversion too) for all layers. 00178 * @note: normally you call setActivationFunction() first and get a list of the used 00179 * activation functions, which are set back with this function 00180 * @param actfunList the list of actfuns to be used 00181 */ 00182 virtual void setActivationFunctions(std::vector<ActivationFunction> actfunList); 00183 00184 00185 protected: 00186 std::vector<Layer> layers; 00187 std::vector<matrix::Matrix> weights; 00188 std::vector<matrix::Matrix> bias; 00189 std::vector<matrix::Matrix> smallids; // small unit matrices for pseudoinversion 00190 bool useBypass; 00191 matrix::Matrix bypassWeights; 00192 bool someInternalParams; 00193 00194 matrix::Matrix input; 00195 std::vector<matrix::Matrix> ys; // activations 00196 std::vector<matrix::Matrix> zs; // potentials 00197 00198 double lambda; // regularisation value for pseudoinverse 00199 bool initialised; 00200 }; 00201 00202 #endif