Robot Simulator of the Robotics Group for Self-Organization of Control: invertmotorbigmodel.h Source File

00001 /***************************************************************************
00002  *   Copyright (C) 2005 by Robot Group Leipzig                             *
00003  *    martius@informatik.uni-leipzig.de                                    *
00004  *    fhesse@informatik.uni-leipzig.de                                     *
00005  *    der@informatik.uni-leipzig.de                                        *
00006  *                                                                         *
00007  *   ANY COMMERCIAL USE FORBIDDEN!                                         *
00008  *   LICENSE:                                                              *
00009  *   This work is licensed under the Creative Commons                      *
00010  *   Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of   *
00011  *   this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ *
00012  *   or send a letter to Creative Commons, 543 Howard Street, 5th Floor,   *
00013  *   San Francisco, California, 94105, USA.                                *
00014  *                                                                         *
00015  *   This program is distributed in the hope that it will be useful,       *
00016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
00018  *                                                                         *
00019  *   $Log: invertmotorbigmodel.h,v $
00020  *   Revision 1.7  2008/05/30 11:58:27  martius
00021  *   use cmath instead of math.h
00022  *
00023  *   Revision 1.6  2008/04/17 14:54:44  martius
00024  *   randomGen added, which is a random generator with long period and an
00025  *    internal state. Each Agent has an instance and passed it to the controller
00026  *    and the wiring. This is good for
00027  *   a) repeatability on agent basis,
00028  *   b) parallel execution as done in ode_robots
00029  *
00030  *   Revision 1.5  2007/02/23 09:40:45  der
00031  *   regularisation used from regularisation.h
00032  *
00033  *   Revision 1.4  2007/02/20 15:41:06  martius
00034  *   big model stuff, elman and co
00035  *
00036  *   Revision 1.3  2006/12/21 11:44:17  martius
00037  *   commenting style for doxygen //< -> ///<
00038  *   FOREACH and FOREACHC are macros for collection iteration
00039  *
00040  *   Revision 1.2  2006/11/29 16:22:43  martius
00041  *   name is a variable of configurable and is used as such
00042  *
00043  *   Revision 1.1  2006/07/20 17:15:25  martius
00044  *   controller with arbitrary invertable model
00045  *
00046  *
00047  ***************************************************************************/
00048 #ifndef __INVERTMOTORBIGMODEL_H
00049 #define __INVERTMOTORBIGMODEL_H
00050 
00051 #include "invertmotorcontroller.h"
00052 
00053 #include <assert.h>
00054 #include <cmath>
00055 
00056 #include "matrix.h"
00057 #include "noisegenerator.h"
00058 #include "invertablemodel.h"
00059 
00060 typedef struct InvertMotorBigModelConf {
00061   int buffersize;  ///< buffersize size of the time-buffer for x,y,eta
00062   double cInit;    ///< cInit size of the C matrix to initialised with.
00063   double cNonDiag; ///< cNonDiag is the size of the nondiagonal elements in respect to the diagonal (cInit) ones
00064   bool modelInit;  ///< size of the unit-map strenght of the model 
00065   bool useS;    ///< useS decides whether to use the S matrix in addition to the A matrix
00066   bool someInternalParams;  ///< someInternalParams if true only some internal parameters are exported, otherwise all 
00067   
00068   double modelCompliant; ///< learning factor for model (or sensor) compliant learning
00069   
00070   InvertableModel* model;   ///< model used as world model
00071 } InvertMotorBigModelConf;
00072 
00073 /**
00074  * class for robot controller is based on InvertMotorNStep
00075  *  
00076  * - direct inversion 
00077  *
00078  * - motor space
00079  *
00080  * - multilayer,nonlinear model
00081  */
00082 class InvertMotorBigModel : public InvertMotorController {
00083 
00084 public:
00085   InvertMotorBigModel(const InvertMotorBigModelConf& conf = getDefaultConf());
00086   virtual void init(int sensornumber, int motornumber, RandGen* randGen = 0);
00087 
00088   virtual ~InvertMotorBigModel();
00089 
00090   /// returns the number of sensors the controller was initialised with or 0 if not initialised
00091   virtual int getSensorNumber() const { return number_sensors; }
00092   /// returns the mumber of motors the controller was initialised with or 0 if not initialised
00093   virtual int getMotorNumber() const  { return number_motors; }
00094 
00095   /// performs one step (includes learning). 
00096   /// Calulates motor commands from sensor inputs.
00097   virtual void step(const sensor* , int number_sensors, motor* , int number_motors);
00098 
00099   /// performs one step without learning. Calulates motor commands from sensor inputs.
00100   virtual void stepNoLearning(const sensor* , int number_sensors, 
00101                               motor* , int number_motors);
00102 
00103 
00104   /**************  STOREABLE **********************************/
00105   /** stores the controller values to a given file. */
00106   virtual bool store(FILE* f) const;
00107   /** loads the controller values from a given file. */
00108   virtual bool restore(FILE* f);  
00109 
00110   /************** INSPECTABLE ********************************/
00111   virtual iparamkeylist getInternalParamNames() const;
00112   virtual iparamvallist getInternalParams() const;  
00113   virtual ilayerlist getStructuralLayers() const;
00114   virtual iconnectionlist getStructuralConnections() const;
00115   
00116   /************** CONFIGURABLE ********************************/
00117   virtual paramval getParam(const paramkey& key) const;
00118   virtual bool setParam(const paramkey& key, paramval val);
00119   virtual paramlist getParamList() const;
00120 
00121   /**** TEACHING ****/
00122   /** The given motor teaching signal is used for this timestep. 
00123       It is used as a feed forward teaching signal for the controller.
00124       Please note, that the teaching signal has to be given each timestep 
00125        for a continuous teaching process.
00126    */
00127   virtual void setMotorTeachingSignal(const motor* teaching, int len);
00128 
00129   /** The given sensor teaching signal (distal learning) is used for this timestep. 
00130       First the belonging motor teachung signal is calculated by the inverse model.
00131       See setMotorTeachingSignal
00132    */
00133   virtual void setSensorTeachingSignal(const sensor* teaching, int len);  
00134 
00135 
00136   static InvertMotorBigModelConf getDefaultConf(){
00137     InvertMotorBigModelConf c;
00138     c.buffersize = 50;
00139     c.cInit = 1.0;
00140     c.cNonDiag = 0;
00141     c.modelInit  = 1.0;
00142     c.someInternalParams = true;
00143     c.useS = false;
00144     c.modelCompliant = 0;
00145     c.model = 0;
00146     return c;
00147   }
00148 
00149   void getLastMotors(motor* motors, int len);
00150 
00151 protected:
00152   unsigned short number_sensors;
00153   unsigned short number_motors;
00154   
00155   matrix::Matrix A; ///< Model Matrix (motors to sensors)
00156   matrix::Matrix S; ///< additional Model Matrix (sensors derivatives to sensors) 
00157   matrix::Matrix C; ///< Controller Matrix
00158   matrix::Matrix H; ///< Controller Bias
00159   NoiseGenerator* BNoiseGen; ///< Noisegenerator for noisy bias
00160   matrix::Matrix R; ///< C*A
00161   matrix::Matrix SmallID; ///< small identity matrix in the dimension of R
00162   matrix::Matrix xsi; ///< current output error
00163   double xsi_norm; ///< norm of matrix
00164   double xsi_norm_avg; ///< average norm of xsi (used to define whether Modell learns)
00165   double pain;         ///< if the modelling error (xsi) is too high we have a pain signal
00166   matrix::Matrix* x_buffer;
00167   matrix::Matrix* y_buffer;
00168   matrix::Matrix* eta_buffer;
00169   matrix::Matrix zero_eta; // zero initialised eta
00170   matrix::Matrix x_smooth;
00171   //   matrix::Matrix z; ///< membrane potential
00172   matrix::Matrix y_teaching; ///< teaching motor signal
00173   bool useTeaching; ///< flag whether there is an actual teachning signal or not
00174   int t_rand; ///< initial random time to avoid syncronous management of all controllers
00175 
00176 
00177   int managementInterval; ///< interval between subsequent management function calls
00178   paramval inhibition; ///< inhibition strength for sparce kwta strategy (is scaled with epsC)
00179   paramval kwta;       ///< (int) number of synapses that get strengthend
00180   paramval limitRF;    ///< (int) receptive field of motor neurons (number of offcenter sensors) if null then no limitation. Mutual exclusive with inhibition 
00181   paramval dampS;     ///< damping of S matrix
00182 
00183   InvertMotorBigModelConf conf;
00184 
00185   /// puts the sensors in the ringbuffer, generate controller values and put them in the 
00186   //  ringbuffer as well
00187   virtual void fillBuffersAndControl(const sensor* x_, int number_sensors, 
00188                              motor* y_, int number_motors);
00189 
00190   /// calculates the first shift into the motor space useing delayed motor values. 
00191   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop
00192   virtual void calcEtaAndBufferIt(int delay);
00193 
00194   /// learn H,C with motors y and corresponding sensors x
00195   virtual void learnController();
00196 
00197   /// calculates the Update for C and H 
00198   // @param y_delay timesteps to delay the y-values.  (usually 0)
00199   //  Please note that the delayed values are NOT used for the error calculation 
00200   //  (this is done in calcXsi())
00201   virtual void calcCandHUpdates(matrix::Matrix& C_update, matrix::Matrix& H_update, int y_delay);
00202 
00203   /// updates the matrix C and H
00204   virtual void updateCandH(const matrix::Matrix& C_update, const matrix::Matrix& H_update, double squashSize);
00205 
00206   /// learn A, (and S) using motors y and corresponding sensors x
00207   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop 
00208   virtual void learnModel(int delay);
00209 
00210   /// handles inhibition damping etc.
00211   virtual void management();
00212 
00213   /// returns controller output for given sensor values
00214   virtual matrix::Matrix calculateControllerValues(const matrix::Matrix& x_smooth);   
00215  
00216   /** Calculates first and second derivative and returns both in on matrix (above). 
00217       We use simple discrete approximations:
00218       \f[ f'(x) = (f(x) - f(x-1)) / 2 \f]
00219       \f[ f''(x) = f(x) - 2f(x-1) + f(x-2) \f]
00220       where we have to go into the past because we do not have f(x+1). The scaling can be neglegted.
00221   */
00222   matrix::Matrix calcDerivatives(const matrix::Matrix* buffer, int delay);
00223 
00224 public:
00225   /** k-winner take all inhibition for synapses. k largest synapses are strengthed and the rest are inhibited.
00226       strong synapes are scaled by 1+(damping/k) and weak synapses are scaled by 1-(damping/(n-k)) where n is the 
00227       number of synapes
00228       @param weightmatrix reference to weight matrix. Synapses for a neuron are in one row. 
00229              The inhibition is done for all rows independently
00230       @param k number of synapes to strengthen
00231       @param damping strength of supression and exitation (typically 0.001)
00232    */
00233   void kwtaInhibition(matrix::Matrix& weightmatrix, unsigned int k, double damping);
00234 
00235   /** sets all connections to zero which are further away then rfSize.
00236       If rfSize == 1 then only main diagonal is left. 
00237       If rfSize = 2: main diagonal and upper and lower side diagonal are kept and so on and so forth.
00238    */
00239   void limitC(matrix::Matrix& weightmatrix, unsigned int rfSize);   
00240  
00241 };
00242 
00243 #endif