Robot Simulator of the Robotics Group for Self-Organization of Control: invertmotornstep.h Source File

00001 /***************************************************************************
00002  *   Copyright (C) 2005 by Robot Group Leipzig                             *
00003  *    martius@informatik.uni-leipzig.de                                    *
00004  *    fhesse@informatik.uni-leipzig.de                                     *
00005  *    der@informatik.uni-leipzig.de                                        *
00006  *    joergweide84@aol.com (robot12)                                       *
00007  *                                                                         *
00008  *   ANY COMMERCIAL USE FORBIDDEN!                                         *
00009  *   LICENSE:                                                              *
00010  *   This work is licensed under the Creative Commons                      *
00011  *   Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of   *
00012  *   this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ *
00013  *   or send a letter to Creative Commons, 543 Howard Street, 5th Floor,   *
00014  *   San Francisco, California, 94105, USA.                                *
00015  *                                                                         *
00016  *   This program is distributed in the hope that it will be useful,       *
00017  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00018  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
00019  *                                                                         *
00020  *   $Log: invertmotornstep.h,v $
00021  *   Revision 1.37  2009/10/23 12:39:03  martius
00022  *   added description for inspectable elements
00023  *
00024  *   Revision 1.36  2009/08/07 14:40:23  martius
00025  *   added new teaching interface
00026  *
00027  *   Revision 1.35  2009/08/04 17:23:08  martius
00028  *   removed additional constructor again and added a initial matrix into the conf
00029  *
00030  *   Revision 1.34  2009/07/15 13:10:11  robot12
00031  *   insert a new constructor to create the class with a giving neuron matrix
00032  *
00033  *   Revision 1.33  2009/03/20 17:52:33  martius
00034  *   modified teaching
00035  *
00036  *   Revision 1.32  2008/05/30 11:58:27  martius
00037  *   use cmath instead of math.h
00038  *
00039  *   Revision 1.31  2008/04/17 14:54:44  martius
00040  *   randomGen added, which is a random generator with long period and an
00041  *    internal state. Each Agent has an instance and passed it to the controller
00042  *    and the wiring. This is good for
00043  *   a) repeatability on agent basis,
00044  *   b) parallel execution as done in ode_robots
00045  *
00046  *   Revision 1.30  2008/01/17 10:02:00  der
00047  *   inserted some parameters.
00048  *
00049  *   Revision 1.29  2007/12/13 16:42:52  martius
00050  *   continuity
00051  *
00052  *   Revision 1.28  2007/12/07 10:58:02  der
00053  *   NoiseGenerator: method signature of add and generate changed!
00054  *   IMNS: added cfactor parameter
00055  *
00056  *   Revision 1.27  2007/11/07 13:36:36  martius
00057  *   context sensor definition, that are inputs to S
00058  *
00059  *   Revision 1.26  2007/04/02 08:21:36  martius
00060  *   simple reinforcement
00061  *
00062  *   Revision 1.25  2007/02/23 19:36:17  martius
00063  *   S and SD matrixes separated
00064  *
00065  *   Revision 1.24  2007/02/12 13:18:46  martius
00066  *   teaching changed. Use standard backprop
00067  *   distal learning
00068  *   model compliant learning (untested)
00069  *
00070  *   Revision 1.23  2007/01/26 12:03:11  martius
00071  *   kwta inhibition and limitRF implemented
00072  *
00073  *   Revision 1.22  2006/12/21 11:44:17  martius
00074  *   commenting style for doxygen //< -> ///<
00075  *   FOREACH and FOREACHC are macros for collection iteration
00076  *
00077  *   Revision 1.21  2006/12/01 16:18:26  martius
00078  *   S uses first and second derivative. Not really tested yet
00079  *
00080  *   Revision 1.20  2006/11/29 16:22:43  martius
00081  *   name is a variable of configurable and is used as such
00082  *
00083  *   Revision 1.19  2006/10/23 10:47:30  martius
00084  *   model as function
00085  *
00086  *   Revision 1.18  2006/08/02 09:31:16  martius
00087  *   calcErrorFactor moved to invertmotorcontroller
00088  *
00089  *   Revision 1.17  2006/07/20 17:14:34  martius
00090  *   removed std namespace from matrix.h
00091  *   storable interface
00092  *   abstract model and invertablemodel as superclasses for networks
00093  *
00094  *   Revision 1.16  2006/07/18 15:17:16  martius
00095  *   buffer operations like delayed values and smoothed values moved to invertmotorcontroller
00096  *
00097  *   Revision 1.15  2006/07/18 14:47:45  martius
00098  *   new regularisation of g' and g''/g' and squashP moved to invertmotorcontroller
00099  *
00100  *   Revision 1.14  2006/07/14 12:23:58  martius
00101  *   selforg becomes HEAD
00102  *
00103  *   Revision 1.12.6.12  2006/07/10 13:05:16  martius
00104  *   NON-COMMERICAL LICENSE added to controllers
00105  *
00106  *   Revision 1.12.6.11  2006/07/10 11:59:23  martius
00107  *   Matrixlib now in selforg
00108  *   no namespace std in header files
00109  *
00110  *   Revision 1.12.6.10  2006/02/24 14:46:44  martius
00111  *   removed gR
00112  *
00113  *   Revision 1.12.6.9  2006/02/08 15:16:24  martius
00114  *   *** empty log message ***
00115  *
00116  *   Revision 1.12.6.8  2006/01/31 15:30:34  martius
00117  *   model does not learn if too large error.
00118  *
00119  *   Revision 1.12.6.7  2006/01/18 16:49:34  martius
00120  *   new Update algo (simpler and hopefully better)
00121  *
00122  *   Revision 1.12.6.6  2006/01/17 16:58:40  martius
00123  *   loading and storing
00124  *
00125  *   Revision 1.12.6.5  2006/01/13 12:24:39  martius
00126  *   motor teaching included
00127  *
00128  *   Revision 1.12.6.4  2005/12/15 17:04:39  martius
00129  *   min, max and so on are template functions now
00130  *
00131  *   Revision 1.12.6.3  2005/11/21 17:02:46  martius
00132  *   calcCandHUpdates as separat function
00133  *   better formula for g' (q') using Mittelwertsatz
00134  *    works great!
00135  *
00136  *   Revision 1.12.6.2  2005/11/16 11:24:26  martius
00137  *   moved to selforg
00138  *
00139  *   Revision 1.12.6.1  2005/11/14 14:54:02  martius
00140  *   cpp file is seperate now
00141  *
00142  *   Revision 1.12  2005/10/27 15:46:38  martius
00143  *   inspectable interface is expanded to structural information for network visualiser
00144  *
00145  *   Revision 1.11  2005/10/24 11:10:27  martius
00146  *   disabled exact formula for shift
00147  *
00148  *   Revision 1.10  2005/10/21 16:02:49  martius
00149  *   added additional model matrix S (switch in contructor)
00150  *
00151  *   Revision 1.9  2005/10/21 11:55:13  martius
00152  *   id is now toId
00153  *   relativeE can be switched on
00154  *   uses an symmetric but exact formula for shift calcuation through neuron
00155  *   However g'/g'' stays -2g because implementation was making trouble
00156  *
00157  *   Revision 1.8  2005/10/06 17:08:08  martius
00158  *   switched to stl lists
00159  *   Bias noise
00160  *   correct error_factor for controller learning
00161  *
00162  *   Revision 1.7  2005/09/27 10:59:47  martius
00163  *   error_factor (from logaE or rootE) is member now
00164  *
00165  *   Revision 1.6  2005/09/22 07:22:59  martius
00166  *   proper destruction
00167  *
00168  *   Revision 1.5  2005/09/21 07:31:10  martius
00169  *   v does not go through g
00170  *
00171  *   Revision 1.4  2005/07/28 11:16:08  martius
00172  *   there is a switch for exporting just some of the internal parameters
00173  *
00174  *   Revision 1.3  2005/07/27 15:44:23  martius
00175  *   learning rate adaptation
00176  *
00177  *   Revision 1.2  2005/07/21 15:07:59  martius
00178  *   working version
00179  *   squashing normalised
00180  *
00181  *   Revision 1.1  2005/07/19 09:35:35  martius
00182  *   invert motor space controller that iterates
00183  *    over multiple time steps in past
00184  *
00185  *
00186  ***************************************************************************/
00187 #ifndef __INVERTMOTORNSTEP_H
00188 #define __INVERTMOTORNSTEP_H
00189 
00190 #include "invertmotorcontroller.h"
00191 #include "teachable.h"
00192 
00193 #include <assert.h>
00194 #include <cmath>
00195 
00196 #include <selforg/matrix.h>
00197 #include <selforg/noisegenerator.h>
00198 
00199 typedef struct InvertMotorNStepConf {
00200   int buffersize; ///< buffersize size of the time-buffer for x,y,eta
00201   matrix::Matrix initialC; ///< initial controller matrix C (if not given then randomly chosen, see cInit, cNonDiag...)
00202   double cInit; ///< cInit size of the C matrix to initialised with.
00203   double cNonDiag; ///< cNonDiag is the size of the nondiagonal elements in respect to the diagonal (cInit) ones
00204   double cNonDiagAbs; ///< cNonDiag is the value of the nondiagonal elements
00205   bool useS;    ///< useS decides whether to use the S matrix in addition to the A matrix (sees sensors)
00206   /** useSD decides whether to use the SD matrix in addition to the A
00207       matrix (sees first and second derivatives)  */
00208   bool useSD;
00209   /** number of context sensors(considered at the end of the sensor
00210       vector. If not 0, then S will only get them as input  */
00211   int numberContext;
00212   bool someInternalParams;  ///< someInternalParams if true only some internal parameters are exported, all otherwise
00213 } InvertMotorNStepConf;
00214 
00215 /**
00216  * class for robot controller that uses the georg's matrixlib for
00217  *  direct matrix inversion for n channels
00218  * (simple one layer networks)
00219  *
00220  * Implements standart parameters: eps, rho, mu, stepnumber4avg, stepnumber4delay
00221  */
00222 class InvertMotorNStep : public InvertMotorController, public Teachable {
00223 
00224 public:
00225   InvertMotorNStep(const InvertMotorNStepConf& conf = getDefaultConf());
00226 
00227   virtual void init(int sensornumber, int motornumber, RandGen* randGen = 0);
00228 
00229   virtual ~InvertMotorNStep();
00230 
00231   /// returns the number of sensors the controller was initialised with or 0 if not initialised
00232   virtual int getSensorNumber() const { return number_sensors; }
00233   /// returns the mumber of motors the controller was initialised with or 0 if not initialised
00234   virtual int getMotorNumber() const  { return number_motors; }
00235 
00236   /// performs one step (includes learning).
00237   /// Calulates motor commands from sensor inputs.
00238   virtual void step(const sensor* , int number_sensors, motor* , int number_motors);
00239 
00240   /// performs one step without learning. Calulates motor commands from sensor inputs.
00241   virtual void stepNoLearning(const sensor* , int number_sensors,
00242                               motor* , int number_motors);
00243 
00244   /**** STOREABLE ****/
00245   /** stores the controller values to a given file. */
00246   virtual bool store(FILE* f) const;
00247   /** loads the controller values from a given file. */
00248   virtual bool restore(FILE* f);
00249 
00250   /**** INSPECTABLE ****/
00251   virtual std::list<ILayer> getStructuralLayers() const;
00252   virtual std::list<IConnection> getStructuralConnections() const;
00253 
00254   /**** TEACHING ****/
00255   /** The given motor teaching signal is used for this timestep.
00256       It is used as a feed forward teaching signal for the controller.
00257       Please note, that the teaching signal has to be given each timestep
00258        for a continuous teaching process.
00259    */
00260   virtual void setMotorTeachingSignal(const motor* teaching, int len);
00261 
00262   /** The given sensor teaching signal (distal learning) is used for this timestep.
00263       First the belonging motor teachung signal is calculated by the inverse model.
00264       See setMotorTeachingSignal
00265    */
00266   virtual void setSensorTeachingSignal(const sensor* teaching, int len);
00267   void getLastMotors(motor* motors, int len);
00268   void getLastSensors(sensor* sensors, int len);
00269 
00270 
00271   /**** New TEACHING interface ****/
00272   /** The given motor teaching signal is used for this timestep. 
00273       It is used as a feed forward teaching signal for the controller.
00274       Please note, that the teaching signal has to be given each timestep 
00275        for a continuous teaching process.
00276      @param teaching: matrix with dimensions (motornumber,1)
00277    */
00278   virtual void setMotorTeaching(const matrix::Matrix& teaching);
00279 
00280   /** The given sensor teaching signal (distal learning) is used for this timestep. 
00281       The belonging motor teachung signal is calculated by the inverse model.
00282       See setMotorTeaching
00283      @param teaching: matrix with dimensions (motorsensors,1)
00284    */
00285   virtual void setSensorTeaching(const matrix::Matrix& teaching);  
00286   /// returns the last motor values (useful for cross motor coupling)
00287   virtual matrix::Matrix getLastMotorValues();
00288   /// returns the last sensor values (useful for cross sensor coupling)
00289   virtual matrix::Matrix getLastSensorValues();
00290 
00291   
00292 
00293   // UNUSED! OLD IMPLEMENTATION which hat some consistency arguments
00294   void calcCandHUpdatesTeaching(matrix::Matrix& C_update, matrix::Matrix& H_update, int y_delay);
00295 
00296   /**** REINFORCEMENT ****/
00297   /** set the reinforcement signal for this timestep.
00298       It is used to calculate a factor for the update.
00299       Factor = 1-0.95*reinforcement.
00300       @param reinforcement value between -1 and 1 (-1 bad, 0 neutral, 1 good)
00301    */
00302   virtual void setReinforcement(double reinforcement);
00303 
00304 
00305 
00306   static InvertMotorNStepConf getDefaultConf(){
00307     InvertMotorNStepConf c;
00308     c.buffersize = 50;
00309     c.cInit = 1.0;
00310     c.cNonDiag = 0;
00311     c.useS  = false;
00312     c.useSD  = false;
00313     c.numberContext  = 0;
00314     c.someInternalParams = true;
00315     c.cNonDiagAbs=0.0;
00316     return c;
00317   }
00318 
00319   /// sets the sensor channel weights (matrix should be (getSensorNumber() x 1)
00320   void setSensorWeights(const matrix::Matrix& weights);
00321   matrix::Matrix getSensorWeights() const {return sensorweights; }
00322   /// reference to C-matrix
00323   matrix::Matrix& getC(){return C;};
00324 
00325   double getE() const {return v.multTM().val(0,0);}
00326 
00327 protected:
00328   unsigned short number_sensors;
00329   unsigned short number_motors;
00330 
00331  public:
00332   matrix::Matrix A; ///< Model Matrix (motors to sensors)
00333   matrix::Matrix S; ///< additional Model Matrix (sensors to sensors)
00334   matrix::Matrix SD; ///< additional Model Matrix (sensors derivatives to sensors)
00335   matrix::Matrix C; ///< Controller Matrix
00336   matrix::Matrix H; ///< Controller Bias
00337   matrix::Matrix B; ///< Model Bias
00338 protected:
00339   NoiseGenerator* BNoiseGen; ///< Noisegenerator for noisy bias
00340   NoiseGenerator* YNoiseGen; ///< Noisegenerator for noisy motor output
00341   matrix::Matrix R; ///< C*A
00342   matrix::Matrix SmallID; ///< small identity matrix in the dimension of R
00343   matrix::Matrix xsi; ///< current output error
00344   matrix::Matrix v;   ///< current reconstructed error
00345   double E_val; ///< value of Error function
00346   double xsi_norm; ///< norm of matrix
00347   double xsi_norm_avg; ///< average norm of xsi (used to define whether Modell learns)
00348   double pain;         ///< if the modelling error (xsi) is too high we have a pain signal
00349   matrix::Matrix* x_buffer;
00350   matrix::Matrix* y_buffer;
00351   matrix::Matrix* eta_buffer;
00352   matrix::Matrix zero_eta; // zero initialised eta
00353   matrix::Matrix x_smooth;
00354   //   matrix::Matrix z; ///< membrane potential
00355   matrix::Matrix y_teaching; ///< teaching motor signal
00356   bool useTeaching; ///< flag whether there is an actual teachning signal or not
00357   double reinforcement; ///< reinforcement value (set via setReinforcement())
00358   double reinforcefactor; ///< reinforcement factor (set to 1 every step after learning)
00359   int t_rand; ///< initial random time to avoid syncronous management of all controllers
00360 
00361   matrix::Matrix sensorweights; ///< sensor channel weight (each channel gets a certain importance)
00362 
00363   /** factor to teach for continuity: subsequent motor commands
00364       should not differ too much */
00365   double continuity;
00366   double modelCompliant; ///< learning factor for model (or sensor) compliant learning
00367   int managementInterval; ///< interval between subsequent management function calls
00368   paramval inhibition; ///< inhibition strength for sparce kwta strategy (is scaled with epsC)
00369   paramval kwta;       ///< (int) number of synapses that get strengthend
00370   paramval limitRF;    ///< (int) receptive field of motor neurons (number of offcenter sensors) if null then no limitation. Mutual exclusive with inhibition
00371   paramval dampS;     ///< damping of S matrix
00372   paramval dampC;     ///< damping of C matrix
00373   paramval activeExplore; ///< decides whether and how strong the backpropagated error is used as a control signal
00374   paramval cfactor;
00375   paramval cnondiagabs;
00376   paramval cdiagabs;
00377 
00378 
00379   paramval noiseY; ///< noise strength for y
00380 
00381   InvertMotorNStepConf conf;
00382 
00383   /// puts the sensors in the ringbuffer, generate controller values and put them in the
00384   //  ringbuffer as well
00385   virtual void fillBuffersAndControl(const sensor* x_, int number_sensors,
00386                              motor* y_, int number_motors);
00387 
00388   /// calculates the first shift into the motor space useing delayed motor values.
00389   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop
00390   virtual void calcEtaAndBufferIt(int delay);
00391   /// calculates xsi for the current time step using the delayed y values
00392   //  and x delayed by one
00393   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop
00394   virtual void calcXsi(int delay);
00395 
00396   /// learn H,C with motors y and corresponding sensors x
00397   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop
00398   virtual void learnController(int delay);
00399 
00400   /// calculates the Update for C and H
00401   // @param delay timesteps to delay the y-values.  (usually 0)
00402   //  Please note that the delayed values are NOT used for the error calculation
00403   //  (this is done in calcXsi())
00404   virtual void calcCandHUpdates(matrix::Matrix& C_update, matrix::Matrix& H_update, int delay);
00405 
00406   /// updates the matrix C and H
00407   virtual void updateCandH(const matrix::Matrix& C_update, const matrix::Matrix& H_update, double squashSize);
00408 
00409   /// learn A, (and S) using motors y and corresponding sensors x
00410   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop
00411   virtual void learnModel(int delay);
00412 
00413   /// calculates the predicted sensor values
00414   virtual matrix::Matrix model(const matrix::Matrix* x_buffer, int delay, const matrix::Matrix& y);
00415 
00416   /// handles inhibition damping etc.
00417   virtual void management();
00418 
00419   /// returns controller output for given sensor values
00420   virtual matrix::Matrix calculateControllerValues(const matrix::Matrix& x_smooth);
00421 
00422   /** Calculates first and second derivative and returns both in on matrix (above).
00423       We use simple discrete approximations:
00424       \f[ f'(x) = (f(x) - f(x-1)) / 2 \f]
00425       \f[ f''(x) = f(x) - 2f(x-1) + f(x-2) \f]
00426       where we have to go into the past because we do not have f(x+1). The scaling can be neglegted.
00427   */
00428   matrix::Matrix calcDerivatives(const matrix::Matrix* buffer, int delay);
00429 
00430 public:
00431   /** k-winner take all inhibition for synapses. k largest synapses are strengthed and the rest are inhibited.
00432       strong synapes are scaled by 1+(damping/k) and weak synapses are scaled by 1-(damping/(n-k)) where n is the
00433       number of synapes
00434       @param weightmatrix reference to weight matrix. Synapses for a neuron are in one row.
00435              The inhibition is done for all rows independently
00436       @param k number of synapes to strengthen
00437       @param damping strength of supression and exitation (typically 0.001)
00438    */
00439   void kwtaInhibition(matrix::Matrix& weightmatrix, unsigned int k, double damping);
00440 
00441   /** sets all connections to zero which are further away then rfSize
00442       from the diagonal.
00443       If rfSize == 1 then only main diagonal is left.
00444       If rfSize = 2: main diagonal and upper and lower side diagonal are kept and so on and so forth.
00445    */
00446   void limitC(matrix::Matrix& weightmatrix, unsigned int rfSize);
00447 
00448 
00449   static double clip095(double x);
00450   static double regularizedInverse(double v);
00451 
00452 };
00453 
00454 #endif