Robotsystem of the Robot Group Leipzig: invertmotornstep.h Source File

00001 /***************************************************************************
00002  *   Copyright (C) 2005 by Robot Group Leipzig                             *
00003  *    martius@informatik.uni-leipzig.de                                    *
00004  *    fhesse@informatik.uni-leipzig.de                                     *
00005  *    der@informatik.uni-leipzig.de                                        *
00006  *                                                                         *
00007  *   ANY COMMERCIAL USE FORBIDDEN!                                         *
00008  *   LICENSE:                                                              *
00009  *   This work is licensed under the Creative Commons                      *
00010  *   Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of   *
00011  *   this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ *
00012  *   or send a letter to Creative Commons, 543 Howard Street, 5th Floor,   *
00013  *   San Francisco, California, 94105, USA.                                *
00014  *                                                                         *
00015  *   This program is distributed in the hope that it will be useful,       *
00016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
00018  *                                                                         *
00019  *   $Log: invertmotornstep.h,v $
00020  *   Revision 1.32  2008/05/30 11:58:27  martius
00021  *   use cmath instead of math.h
00022  *
00023  *   Revision 1.31  2008/04/17 14:54:44  martius
00024  *   randomGen added, which is a random generator with long period and an
00025  *    internal state. Each Agent has an instance and passed it to the controller
00026  *    and the wiring. This is good for
00027  *   a) repeatability on agent basis,
00028  *   b) parallel execution as done in ode_robots
00029  *
00030  *   Revision 1.30  2008/01/17 10:02:00  der
00031  *   inserted some parameters.
00032  *
00033  *   Revision 1.29  2007/12/13 16:42:52  martius
00034  *   continuity
00035  *
00036  *   Revision 1.28  2007/12/07 10:58:02  der
00037  *   NoiseGenerator: method signature of add and generate changed!
00038  *   IMNS: added cfactor parameter
00039  *
00040  *   Revision 1.27  2007/11/07 13:36:36  martius
00041  *   context sensor definition, that are inputs to S
00042  *
00043  *   Revision 1.26  2007/04/02 08:21:36  martius
00044  *   simple reinforcement
00045  *
00046  *   Revision 1.25  2007/02/23 19:36:17  martius
00047  *   S and SD matrixes separated
00048  *
00049  *   Revision 1.24  2007/02/12 13:18:46  martius
00050  *   teaching changed. Use standard backprop
00051  *   distal learning
00052  *   model compliant learning (untested)
00053  *
00054  *   Revision 1.23  2007/01/26 12:03:11  martius
00055  *   kwta inhibition and limitRF implemented
00056  *
00057  *   Revision 1.22  2006/12/21 11:44:17  martius
00058  *   commenting style for doxygen //< -> ///<
00059  *   FOREACH and FOREACHC are macros for collection iteration
00060  *
00061  *   Revision 1.21  2006/12/01 16:18:26  martius
00062  *   S uses first and second derivative. Not really tested yet
00063  *
00064  *   Revision 1.20  2006/11/29 16:22:43  martius
00065  *   name is a variable of configurable and is used as such
00066  *
00067  *   Revision 1.19  2006/10/23 10:47:30  martius
00068  *   model as function
00069  *
00070  *   Revision 1.18  2006/08/02 09:31:16  martius
00071  *   calcErrorFactor moved to invertmotorcontroller
00072  *
00073  *   Revision 1.17  2006/07/20 17:14:34  martius
00074  *   removed std namespace from matrix.h
00075  *   storable interface
00076  *   abstract model and invertablemodel as superclasses for networks
00077  *
00078  *   Revision 1.16  2006/07/18 15:17:16  martius
00079  *   buffer operations like delayed values and smoothed values moved to invertmotorcontroller
00080  *
00081  *   Revision 1.15  2006/07/18 14:47:45  martius
00082  *   new regularisation of g' and g''/g' and squashP moved to invertmotorcontroller
00083  *
00084  *   Revision 1.14  2006/07/14 12:23:58  martius
00085  *   selforg becomes HEAD
00086  *
00087  *   Revision 1.12.6.12  2006/07/10 13:05:16  martius
00088  *   NON-COMMERICAL LICENSE added to controllers
00089  *
00090  *   Revision 1.12.6.11  2006/07/10 11:59:23  martius
00091  *   Matrixlib now in selforg
00092  *   no namespace std in header files
00093  *
00094  *   Revision 1.12.6.10  2006/02/24 14:46:44  martius
00095  *   removed gR
00096  *
00097  *   Revision 1.12.6.9  2006/02/08 15:16:24  martius
00098  *   *** empty log message ***
00099  *
00100  *   Revision 1.12.6.8  2006/01/31 15:30:34  martius
00101  *   model does not learn if too large error.
00102  *
00103  *   Revision 1.12.6.7  2006/01/18 16:49:34  martius
00104  *   new Update algo (simpler and hopefully better)
00105  *
00106  *   Revision 1.12.6.6  2006/01/17 16:58:40  martius
00107  *   loading and storing
00108  *
00109  *   Revision 1.12.6.5  2006/01/13 12:24:39  martius
00110  *   motor teaching included
00111  *
00112  *   Revision 1.12.6.4  2005/12/15 17:04:39  martius
00113  *   min, max and so on are template functions now
00114  *
00115  *   Revision 1.12.6.3  2005/11/21 17:02:46  martius
00116  *   calcCandHUpdates as separat function
00117  *   better formula for g' (q') using Mittelwertsatz
00118  *    works great!
00119  *
00120  *   Revision 1.12.6.2  2005/11/16 11:24:26  martius
00121  *   moved to selforg
00122  *
00123  *   Revision 1.12.6.1  2005/11/14 14:54:02  martius
00124  *   cpp file is seperate now
00125  *
00126  *   Revision 1.12  2005/10/27 15:46:38  martius
00127  *   inspectable interface is expanded to structural information for network visualiser
00128  *
00129  *   Revision 1.11  2005/10/24 11:10:27  martius
00130  *   disabled exact formula for shift
00131  *
00132  *   Revision 1.10  2005/10/21 16:02:49  martius
00133  *   added additional model matrix S (switch in contructor)
00134  *
00135  *   Revision 1.9  2005/10/21 11:55:13  martius
00136  *   id is now toId
00137  *   relativeE can be switched on
00138  *   uses an symmetric but exact formula for shift calcuation through neuron
00139  *   However g'/g'' stays -2g because implementation was making trouble
00140  *
00141  *   Revision 1.8  2005/10/06 17:08:08  martius
00142  *   switched to stl lists
00143  *   Bias noise
00144  *   correct error_factor for controller learning
00145  *
00146  *   Revision 1.7  2005/09/27 10:59:47  martius
00147  *   error_factor (from logaE or rootE) is member now
00148  *
00149  *   Revision 1.6  2005/09/22 07:22:59  martius
00150  *   proper destruction
00151  *
00152  *   Revision 1.5  2005/09/21 07:31:10  martius
00153  *   v does not go through g
00154  *
00155  *   Revision 1.4  2005/07/28 11:16:08  martius
00156  *   there is a switch for exporting just some of the internal parameters
00157  *
00158  *   Revision 1.3  2005/07/27 15:44:23  martius
00159  *   learning rate adaptation
00160  *
00161  *   Revision 1.2  2005/07/21 15:07:59  martius
00162  *   working version
00163  *   squashing normalised
00164  *
00165  *   Revision 1.1  2005/07/19 09:35:35  martius
00166  *   invert motor space controller that iterates
00167  *    over multiple time steps in past
00168  *
00169  *
00170  ***************************************************************************/
00171 #ifndef __INVERTMOTORNSTEP_H
00172 #define __INVERTMOTORNSTEP_H
00173 
00174 #include "invertmotorcontroller.h"
00175 
00176 #include <assert.h>
00177 #include <cmath>
00178 
00179 #include <selforg/matrix.h>
00180 #include <selforg/noisegenerator.h>
00181 
00182 typedef struct InvertMotorNStepConf {
00183   int buffersize; //< buffersize size of the time-buffer for x,y,eta
00184   double cInit; //< cInit size of the C matrix to initialised with.
00185   double cNonDiag; ///< cNonDiag is the size of the nondiagonal elements in respect to the diagonal (cInit) ones
00186   double cNonDiagAbs; ///< cNonDiag is the value of the nondiagonal elements
00187   bool useS;    ///< useS decides whether to use the S matrix in addition to the A matrix (sees sensors)
00188   /** useSD decides whether to use the SD matrix in addition to the A
00189       matrix (sees first and second derivatives)  */
00190   bool useSD;    
00191   /** number of context sensors(considered at the end of the sensor
00192       vector. If not 0, then S will only get them as input  */
00193   int numberContext; 
00194   bool someInternalParams;  ///< someInternalParams if true only some internal parameters are exported, all otherwise
00195 } InvertMotorNStepConf;
00196  
00197 /**
00198  * class for robot controller that uses the georg's matrixlib for 
00199  *  direct matrix inversion for n channels 
00200  * (simple one layer networks)
00201  * 
00202  * Implements standart parameters: eps, rho, mu, stepnumber4avg, stepnumber4delay
00203  */
00204 class InvertMotorNStep : public InvertMotorController {
00205 
00206 public:
00207   InvertMotorNStep(const InvertMotorNStepConf& conf = getDefaultConf());
00208   virtual void init(int sensornumber, int motornumber, RandGen* randGen = 0);
00209 
00210   virtual ~InvertMotorNStep();
00211 
00212   /// returns the number of sensors the controller was initialised with or 0 if not initialised
00213   virtual int getSensorNumber() const { return number_sensors; }
00214   /// returns the mumber of motors the controller was initialised with or 0 if not initialised
00215   virtual int getMotorNumber() const  { return number_motors; }
00216 
00217   /// performs one step (includes learning). 
00218   /// Calulates motor commands from sensor inputs.
00219   virtual void step(const sensor* , int number_sensors, motor* , int number_motors);
00220 
00221   /// performs one step without learning. Calulates motor commands from sensor inputs.
00222   virtual void stepNoLearning(const sensor* , int number_sensors, 
00223                               motor* , int number_motors);
00224 
00225   /**** STOREABLE ****/
00226   /** stores the controller values to a given file. */
00227   virtual bool store(FILE* f) const;
00228   /** loads the controller values from a given file. */
00229   virtual bool restore(FILE* f);  
00230 
00231   /**** INSPECTABLE ****/
00232   virtual std::list<iparamkey> getInternalParamNames() const;
00233   virtual std::list<iparamval> getInternalParams() const;  
00234   virtual std::list<ILayer> getStructuralLayers() const;
00235   virtual std::list<IConnection> getStructuralConnections() const;
00236 
00237   /**** TEACHING ****/
00238   /** The given motor teaching signal is used for this timestep. 
00239       It is used as a feed forward teaching signal for the controller.
00240       Please note, that the teaching signal has to be given each timestep 
00241        for a continuous teaching process.
00242    */
00243   virtual void setMotorTeachingSignal(const motor* teaching, int len);
00244 
00245   /** The given sensor teaching signal (distal learning) is used for this timestep. 
00246       First the belonging motor teachung signal is calculated by the inverse model.
00247       See setMotorTeachingSignal
00248    */
00249   virtual void setSensorTeachingSignal(const sensor* teaching, int len);  
00250 
00251   // UNUSED! OLD IMPLEMENTATION which hat some consistency arguments
00252   void calcCandHUpdatesTeaching(matrix::Matrix& C_update, matrix::Matrix& H_update, int y_delay);
00253 
00254   /**** REINFORCEMENT ****/
00255   /** set the reinforcement signal for this timestep.
00256       It is used to calculate a factor for the update. 
00257       Factor = 1-0.95*reinforcement.
00258       @param reinforcement value between -1 and 1 (-1 bad, 0 neutral, 1 good)
00259    */
00260   virtual void setReinforcement(double reinforcement);
00261   
00262 
00263 
00264   static InvertMotorNStepConf getDefaultConf(){
00265     InvertMotorNStepConf c;
00266     c.buffersize = 50;
00267     c.cInit = 1.0;
00268     c.cNonDiag = 0;
00269     c.useS  = false;
00270     c.useSD  = false;
00271     c.numberContext  = 0;
00272     c.someInternalParams = true;
00273     c.cNonDiagAbs=0.0;
00274     return c;
00275   }
00276 
00277   void getLastMotors(motor* motors, int len);
00278   void getLastSensors(sensor* sensors, int len);
00279   /// sets the sensor channel weights (matrix should be (getSensorNumber() x 1)
00280   void setSensorWeights(const matrix::Matrix& weights);
00281   matrix::Matrix getSensorWeights() const {return sensorweights; }
00282   /// reference to C-matrix
00283   matrix::Matrix& getC(){return C;}; 
00284 
00285   double getE() const {return v.multTM().val(0,0);}
00286 
00287 protected:
00288   unsigned short number_sensors;
00289   unsigned short number_motors;
00290 
00291  public:  
00292   matrix::Matrix A; ///< Model Matrix (motors to sensors)
00293   matrix::Matrix S; ///< additional Model Matrix (sensors to sensors) 
00294   matrix::Matrix SD; ///< additional Model Matrix (sensors derivatives to sensors) 
00295   matrix::Matrix C; ///< Controller Matrix
00296   matrix::Matrix H; ///< Controller Bias
00297   matrix::Matrix B; ///< Model Bias
00298 protected:
00299   NoiseGenerator* BNoiseGen; ///< Noisegenerator for noisy bias
00300   NoiseGenerator* YNoiseGen; ///< Noisegenerator for noisy motor output
00301   matrix::Matrix R; ///< C*A
00302   matrix::Matrix SmallID; ///< small identity matrix in the dimension of R
00303   matrix::Matrix xsi; ///< current output error
00304   matrix::Matrix v;   ///< current reconstructed error
00305   double xsi_norm; ///< norm of matrix
00306   double xsi_norm_avg; ///< average norm of xsi (used to define whether Modell learns)
00307   double pain;         ///< if the modelling error (xsi) is too high we have a pain signal
00308   matrix::Matrix* x_buffer;
00309   matrix::Matrix* y_buffer;
00310   matrix::Matrix* eta_buffer;
00311   matrix::Matrix zero_eta; // zero initialised eta
00312   matrix::Matrix x_smooth;
00313   //   matrix::Matrix z; ///< membrane potential
00314   matrix::Matrix y_teaching; ///< teaching motor signal
00315   bool useTeaching; ///< flag whether there is an actual teachning signal or not  
00316   double reinforcement; ///< reinforcement value (set via setReinforcement())
00317   double reinforcefactor; ///< reinforcement factor (set to 1 every step after learning)
00318   int t_rand; ///< initial random time to avoid syncronous management of all controllers
00319 
00320   matrix::Matrix sensorweights; ///< sensor channel weight (each channel gets a certain importance)
00321 
00322   /** factor to teach for continuity: subsequent motor commands 
00323       should not differ too much */
00324   double continuity;     
00325   double modelCompliant; ///< learning factor for model (or sensor) compliant learning
00326   int managementInterval; ///< interval between subsequent management function calls
00327   paramval inhibition; ///< inhibition strength for sparce kwta strategy (is scaled with epsC)
00328   paramval kwta;       ///< (int) number of synapses that get strengthend
00329   paramval limitRF;    ///< (int) receptive field of motor neurons (number of offcenter sensors) if null then no limitation. Mutual exclusive with inhibition 
00330   paramval dampS;     ///< damping of S matrix
00331   paramval dampC;     ///< damping of C matrix
00332   paramval activeExplore; ///< decides whether and how strong the backpropagated error is used as a control signal
00333   paramval cfactor;
00334   paramval cnondiagabs;
00335   paramval cdiagabs;
00336   
00337   
00338   paramval noiseY; ///< noise strength for y 
00339   
00340   InvertMotorNStepConf conf;
00341 
00342   /// puts the sensors in the ringbuffer, generate controller values and put them in the 
00343   //  ringbuffer as well
00344   virtual void fillBuffersAndControl(const sensor* x_, int number_sensors, 
00345                              motor* y_, int number_motors);
00346 
00347   /// calculates the first shift into the motor space useing delayed motor values. 
00348   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop
00349   virtual void calcEtaAndBufferIt(int delay);
00350   /// calculates xsi for the current time step using the delayed y values
00351   //  and x delayed by one
00352   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop 
00353   virtual void calcXsi(int delay);
00354 
00355   /// learn H,C with motors y and corresponding sensors x
00356   virtual void learnController();
00357 
00358   /// calculates the Update for C and H 
00359   // @param y_delay timesteps to delay the y-values.  (usually 0)
00360   //  Please note that the delayed values are NOT used for the error calculation 
00361   //  (this is done in calcXsi())
00362   virtual void calcCandHUpdates(matrix::Matrix& C_update, matrix::Matrix& H_update, int y_delay);
00363 
00364   /// updates the matrix C and H
00365   virtual void updateCandH(const matrix::Matrix& C_update, const matrix::Matrix& H_update, double squashSize);
00366 
00367   /// learn A, (and S) using motors y and corresponding sensors x
00368   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop 
00369   virtual void learnModel(int delay);
00370 
00371   /// calculates the predicted sensor values
00372   virtual matrix::Matrix model(const matrix::Matrix* x_buffer, int delay, const matrix::Matrix& y);
00373 
00374   /// handles inhibition damping etc.
00375   virtual void management();
00376 
00377   /// returns controller output for given sensor values
00378   virtual matrix::Matrix calculateControllerValues(const matrix::Matrix& x_smooth);   
00379  
00380   /** Calculates first and second derivative and returns both in on matrix (above). 
00381       We use simple discrete approximations:
00382       \f[ f'(x) = (f(x) - f(x-1)) / 2 \f]
00383       \f[ f''(x) = f(x) - 2f(x-1) + f(x-2) \f]
00384       where we have to go into the past because we do not have f(x+1). The scaling can be neglegted.
00385   */
00386   matrix::Matrix calcDerivatives(const matrix::Matrix* buffer, int delay);
00387 
00388 public:
00389   /** k-winner take all inhibition for synapses. k largest synapses are strengthed and the rest are inhibited.
00390       strong synapes are scaled by 1+(damping/k) and weak synapses are scaled by 1-(damping/(n-k)) where n is the 
00391       number of synapes
00392       @param weightmatrix reference to weight matrix. Synapses for a neuron are in one row. 
00393              The inhibition is done for all rows independently
00394       @param k number of synapes to strengthen
00395       @param damping strength of supression and exitation (typically 0.001)
00396    */
00397   void kwtaInhibition(matrix::Matrix& weightmatrix, unsigned int k, double damping);
00398 
00399   /** sets all connections to zero which are further away then rfSize
00400       from the diagonal.
00401       If rfSize == 1 then only main diagonal is left. 
00402       If rfSize = 2: main diagonal and upper and lower side diagonal are kept and so on and so forth.
00403    */
00404   void limitC(matrix::Matrix& weightmatrix, unsigned int rfSize);
00405   
00406 
00407 };
00408 
00409 #endif