00001 /*************************************************************************** 00002 * Copyright (C) 2005 by Robot Group Leipzig * 00003 * martius@informatik.uni-leipzig.de * 00004 * fhesse@informatik.uni-leipzig.de * 00005 * der@informatik.uni-leipzig.de * 00006 * * 00007 * This program is free software; you can redistribute it and/or modify * 00008 * it under the terms of the GNU General Public License as published by * 00009 * the Free Software Foundation; either version 2 of the License, or * 00010 * (at your option) any later version. * 00011 * * 00012 * This program is distributed in the hope that it will be useful, * 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 00015 * GNU General Public License for more details. * 00016 * * 00017 * You should have received a copy of the GNU General Public License * 00018 * along with this program; if not, write to the * 00019 * Free Software Foundation, Inc., * 00020 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 00021 *************************************************************************** 00022 * * 00023 * DESCRIPTION * 00024 * * 00025 * $Log: multilayerffnn.h,v $ 00026 * Revision 1.24 2008/05/30 11:57:12 martius 00027 * do processing of inputs in learning function if network is not 00028 * activated with this input before (was a very probable mistake) 00029 * 00030 * Revision 1.23 2008/05/27 13:23:42 guettler 00031 * multilayerffnn: inserted function for setting activation function of all layers 00032 * from outside and back 00033 * 00034 * Revision 1.22 2008/05/02 17:20:04 martius 00035 * *** empty log message *** 00036 * 00037 * Revision 1.21 2008/04/17 14:54:45 martius 00038 * randomGen added, which is a random generator with long period and an 00039 * internal state. Each Agent has an instance and passed it to the controller 00040 * and the wiring. This is good for 00041 * a) repeatability on agent basis, 00042 * b) parallel execution as done in ode_robots 00043 * 00044 * Revision 1.20 2008/02/08 13:38:05 der 00045 * abstract model is inspectable 00046 * 00047 * Revision 1.19 2007/12/13 16:46:49 martius 00048 * new actfuns 00049 * 00050 * Revision 1.18 2007/12/11 14:23:32 martius 00051 * some internal params 00052 * 00053 * Revision 1.17 2007/12/09 16:24:25 martius 00054 * direct access to layers and weights possible 00055 * 00056 * Revision 1.16 2007/09/06 18:52:03 martius 00057 * write (ascii store) 00058 * 00059 * Revision 1.15 2007/08/24 12:03:37 martius 00060 * getLayerNum 00061 * 00062 * Revision 1.14 2007/06/08 15:47:17 martius 00063 * eps public 00064 * 00065 * Revision 1.13 2007/04/03 11:21:40 martius 00066 * layer has inverse of actfun 00067 * new constructor 00068 * 00069 * Revision 1.12 2007/04/02 15:24:23 der 00070 * inversion 00071 * 00072 * Revision 1.11 2007/02/23 15:14:17 martius 00073 * *** empty log message *** 00074 * 00075 * Revision 1.10 2007/02/23 09:40:46 der 00076 * regularisation used from regularisation.h 00077 * 00078 * Revision 1.9 2007/02/20 15:41:06 martius 00079 * big model stuff, elman and co 00080 * 00081 * Revision 1.8 2006/12/21 11:44:17 martius 00082 * commenting style for doxygen //< -> ///< 00083 * FOREACH and FOREACHC are macros for collection iteration 00084 * 00085 * Revision 1.7 2006/11/23 08:43:46 martius 00086 * bugfix in store and restore 00087 * 00088 * Revision 1.6 2006/07/27 15:22:18 martius 00089 * activations stored (processing must preceed learning and reponse calculation) 00090 * 00091 * 00092 * * 00093 ***************************************************************************/ 00094 #ifndef __MULTILAYERFFNN_H 00095 #define __MULTILAYERFFNN_H 00096 00097 #include <vector> 00098 00099 #include "feedforwardnn.h" 00100 #include "invertablemodel.h" 00101 00102 class Layer : public Storeable { 00103 public: 00104 /** constructor for Layer. 00105 The derivative and inverse of activation function is derived automatically. 00106 @param size number neurons 00107 @param factor_bias size of bias neuron , if 0 no bias is used 00108 @param actfun activation function. see also FeedForwardNN 00109 */ 00110 Layer(int size, double factor_bias=0.1, 00111 ActivationFunction actfun = FeedForwardNN::linear); 00112 00113 /** obsolete, use the other constructor for Layer. 00114 @param size number neurons 00115 @param factor_bias size of bias neuron , if 0 no bias is used 00116 @param actfun activation function. see also FeedForwardNN 00117 @param dactfun derivative of activation function (should be consistent with actfun) 00118 */ 00119 Layer(int size, double factor_bias, 00120 ActivationFunction actfun, 00121 ActivationFunction dactfun) { 00122 fprintf(stderr, "%s %s\n", "MultiLayerFFNN, Layer::Layer(): this contructor is obsolete! ", 00123 "Please use the one without dactfun now!\n"); 00124 exit(1); 00125 } 00126 00127 /***STOREABLE ******/ 00128 /// stores the layer binary into file stream 00129 bool store(FILE* f) const; 00130 /// restores the layer binary from file stream 00131 bool restore(FILE* f); 00132 00133 /// sets the activation function of the layer 00134 void setActFun(ActivationFunction actfun); 00135 00136 int size; 00137 double factor_bias; 00138 ActivationFunction actfun; ///< callback activation function 00139 ActivationFunction dactfun; ///< first derivative of the activation function 00140 InvActivationFunction invactfun; ///< inversion of activation function 00141 00142 // prints the Layer data-structure 00143 friend std::ostream& operator<<(std::ostream& , const Layer&); 00144 }; 00145 00146 /// multi layer neural network with configurable activation functions 00147 class MultiLayerFFNN : public FeedForwardNN { 00148 public: 00149 /** 00150 @param eps learning rate 00151 @param layers Layer description (the input layer is not specified (always linear)) 00152 @param useBypass if true, then a connection from input to output layer is included 00153 @param someInternalParams if true then only a few parameters are send to plotting 00154 */ 00155 MultiLayerFFNN(double eps, const std::vector<Layer>& layers, bool useBypass=false, 00156 bool someInternalParams=true); 00157 virtual ~MultiLayerFFNN(){ } 00158 00159 /** initialisation of the network with the given number of input and output units. 00160 The dimensionality of the ouputlayer is automatically adjusted. 00161 @param unit_map defines the approximate response of the network 00162 after initialisation (if unit_map=1 the weights are unit matrices). 00163 @param randGen pointer to random generator, if 0 an new one is used 00164 */ 00165 virtual void init(unsigned int inputDim, unsigned int outputDim, 00166 double unit_map = 0.0, RandGen* randGen = 0); 00167 00168 /// passive processing of the input 00169 virtual const matrix::Matrix process (const matrix::Matrix& input); 00170 00171 /** performs learning and returns the network output before learning 00172 (process should be called before) */ 00173 virtual const matrix::Matrix learn (const matrix::Matrix& input, 00174 const matrix::Matrix& nom_output, 00175 double learnRateFactor = 1); 00176 00177 /** response matrix of neural network at given input 00178 00179 \f[ J_ij = \frac{\partial y_i}{\partial x_j} \f] 00180 \f[ J = G_n' W_n G_{n-1}' W_{n-1} ... G_1' W_1 \f] 00181 with \f$W_n\f$ is the weight matrix of layer n and 00182 \f$ G'\f$ is a diagonal matrix with \f$ G'_ii = g'_i \f$ as values on the diagonal. 00183 ATTENTION: input is ignored! use process before! 00184 */ 00185 virtual const matrix::Matrix response(const matrix::Matrix& input) const; 00186 00187 /** calculates the input shift v to a given output shift xsi via pseudo inversion. 00188 00189 \f[o+\xi = \psi(i+v)\f] 00190 00191 The result is a vector of dimension inputdim. 00192 ATTENTION: input is ignored! use process before! 00193 */ 00194 virtual const matrix::Matrix inversion(const matrix::Matrix& input, const matrix::Matrix& xsi) const; 00195 00196 00197 /// returns the number of input neurons 00198 virtual unsigned int getInputDim() const { 00199 return weights[0].getN(); 00200 } 00201 /// returns the number of output neurons 00202 virtual unsigned int getOutputDim() const { 00203 return (weights.rbegin())->getM(); 00204 } 00205 00206 /// returns activation of the given layer. Layer 0 is the first hidden layer 00207 virtual matrix::Matrix getLayerOutput(unsigned int layer){ 00208 assert(layer < layers.size()); 00209 return ys[layer]; 00210 } 00211 00212 /// damps the weights and the biases by multiplying (1-damping) 00213 virtual void damp(double damping); 00214 00215 // total number of layers (1 means no hidden units) 00216 virtual unsigned int getLayerNum() const { 00217 return layers.size(); 00218 } 00219 00220 /// layers 0 is the first hidden layer 00221 virtual const Layer& getLayer(unsigned int layer) const { 00222 assert(layer < layers.size()); 00223 return layers[layer]; 00224 } 00225 00226 /// layers 0 is the first hidden layer 00227 virtual Layer& getLayer(unsigned int layer) { 00228 assert(layer < layers.size()); 00229 return layers[layer]; 00230 } 00231 00232 /// weight matrix 0 connects input with the first hidden layer 00233 virtual const matrix::Matrix& getWeights(unsigned int to_layer) const { 00234 assert(to_layer < weights.size()); 00235 return weights[to_layer]; 00236 } 00237 00238 /// weight matrix 0 connects input with the first hidden layer 00239 virtual matrix::Matrix& getWeights(unsigned int to_layer) { 00240 assert(to_layer < weights.size()); 00241 return weights[to_layer]; 00242 } 00243 00244 /// layers 0 is the first hidden layer 00245 virtual const matrix::Matrix& getBias(unsigned int of_layer) const { 00246 assert(of_layer < bias.size()); 00247 return bias[of_layer]; 00248 } 00249 00250 /************** STOREABLE **********************************/ 00251 /// stores the layer binary into file stream 00252 bool store(FILE* f) const; 00253 /// restores the layer binary from file stream 00254 bool restore(FILE* f); 00255 00256 00257 /// writes the layer ASCII into file stream (not in the storable interface) 00258 bool write(FILE* f) const; 00259 00260 /************** Inspectable **********************************/ 00261 virtual iparamkeylist getInternalParamNames() const; 00262 virtual iparamvallist getInternalParams() const; 00263 virtual ilayerlist getStructuralLayers() const; 00264 virtual iconnectionlist getStructuralConnections() const; 00265 00266 00267 virtual void setSomeInternalParams(bool someInternalParams){ 00268 assert(!initialised); this->someInternalParams = someInternalParams; 00269 } 00270 public: 00271 double eps; ///< learning rate 00272 00273 /** 00274 * sets the activation function (and derivative and inversion too) for ALL layers! 00275 * @param actfun the activation function to be used 00276 * @return the activation functions which where used until now 00277 */ 00278 virtual std::vector<ActivationFunction> setActivationFunction(ActivationFunction actfun); 00279 00280 /** 00281 * sets the activation functions (and derivative and inversion too) for all layers. 00282 * @note: normally you call setActivationFunction() first and get a list of the used 00283 * activation functions, which are set back with this function 00284 * @param actfunList the list of actfuns to be used 00285 */ 00286 virtual void setActivationFunctions(std::vector<ActivationFunction> actfunList); 00287 00288 00289 protected: 00290 std::vector<Layer> layers; 00291 std::vector<matrix::Matrix> weights; 00292 std::vector<matrix::Matrix> bias; 00293 std::vector<matrix::Matrix> smallids; // small unit matrices for pseudoinversion 00294 bool useBypass; 00295 matrix::Matrix bypassWeights; 00296 bool someInternalParams; 00297 00298 matrix::Matrix input; 00299 std::vector<matrix::Matrix> ys; // activations 00300 std::vector<matrix::Matrix> zs; // potentials 00301 00302 00303 bool initialised; 00304 }; 00305 00306 #endif