invertmotornstep.h

Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2005-2011 by                                            *
00003  *    Georg Martius  <georg dot martius at web dot de>                     *
00004  *    Frank Hesse    <frank at nld dot ds dot mpg dot de>                  *
00005  *    Ralf Der       <ralfder at mis dot mpg dot de>                       *
00006  *                                                                         *
00007  *   ANY COMMERCIAL USE FORBIDDEN!                                         *
00008  *   LICENSE:                                                              *
00009  *   This work is licensed under the Creative Commons                      *
00010  *   Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of   *
00011  *   this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ *
00012  *   or send a letter to Creative Commons, 543 Howard Street, 5th Floor,   *
00013  *   San Francisco, California, 94105, USA.                                *
00014  *                                                                         *
00015  *   This program is distributed in the hope that it will be useful,       *
00016  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00017  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
00018  *                                                                         *
00019  ***************************************************************************/
00020 #ifndef __INVERTMOTORNSTEP_H
00021 #define __INVERTMOTORNSTEP_H
00022 
00023 #include "invertmotorcontroller.h"
00024 #include "teachable.h"
00025 
00026 #include <assert.h>
00027 #include <cmath>
00028 
00029 #include <selforg/matrix.h>
00030 #include <selforg/noisegenerator.h>
00031 
00032 typedef struct InvertMotorNStepConf {
00033   int buffersize; ///< buffersize size of the time-buffer for x,y,eta
00034   matrix::Matrix initialC; ///< initial controller matrix C (if not given then randomly chosen, see cInit, cNonDiag...)
00035   double cInit; ///< cInit size of the C matrix to initialised with.
00036   double cNonDiag; ///< cNonDiag is the size of the nondiagonal elements in respect to the diagonal (cInit) ones
00037   double cNonDiagAbs; ///< cNonDiag is the value of the nondiagonal elements
00038   bool useS;    ///< useS decides whether to use the S matrix in addition to the A matrix (sees sensors)
00039   /** useSD decides whether to use the SD matrix in addition to the A
00040       matrix (sees first and second derivatives)  */
00041   bool useSD;
00042   /** number of context sensors(considered at the end of the sensor
00043       vector. If not 0, then S will only get them as input  */
00044   int numberContext;
00045   bool someInternalParams;  ///< someInternalParams if true only some internal parameters are exported, all otherwise
00046 } InvertMotorNStepConf;
00047 
00048 /**
00049  * class for robot controller that uses the georg's matrixlib for
00050  *  direct matrix inversion for n channels
00051  * (simple one layer networks)
00052  *
00053  * Implements standart parameters: eps, rho, mu, stepnumber4avg, stepnumber4delay
00054  */
00055 class InvertMotorNStep : public InvertMotorController, public Teachable {
00056 
00057 public:
00058   InvertMotorNStep(const InvertMotorNStepConf& conf = getDefaultConf());
00059 
00060   virtual void init(int sensornumber, int motornumber, RandGen* randGen = 0);
00061 
00062   virtual ~InvertMotorNStep();
00063 
00064   /// returns the number of sensors the controller was initialised with or 0 if not initialised
00065   virtual int getSensorNumber() const { return number_sensors; }
00066   /// returns the mumber of motors the controller was initialised with or 0 if not initialised
00067   virtual int getMotorNumber() const  { return number_motors; }
00068 
00069   /// performs one step (includes learning).
00070   /// Calulates motor commands from sensor inputs.
00071   virtual void step(const sensor* , int number_sensors, motor* , int number_motors);
00072 
00073   /// performs one step without learning. Calulates motor commands from sensor inputs.
00074   virtual void stepNoLearning(const sensor* , int number_sensors,
00075                               motor* , int number_motors);
00076 
00077   /**** STOREABLE ****/
00078   /** stores the controller values to a given file. */
00079   virtual bool store(FILE* f) const;
00080   /** loads the controller values from a given file. */
00081   virtual bool restore(FILE* f);
00082 
00083   /**** INSPECTABLE ****/
00084   virtual std::list<ILayer> getStructuralLayers() const;
00085   virtual std::list<IConnection> getStructuralConnections() const;
00086 
00087   /**** TEACHING ****/
00088   /** The given motor teaching signal is used for this timestep.
00089       It is used as a feed forward teaching signal for the controller.
00090       Please note, that the teaching signal has to be given each timestep
00091        for a continuous teaching process.
00092    */
00093   virtual void setMotorTeachingSignal(const motor* teaching, int len);
00094 
00095   /** The given sensor teaching signal (distal learning) is used for this timestep.
00096       First the belonging motor teachung signal is calculated by the inverse model.
00097       See setMotorTeachingSignal
00098    */
00099   virtual void setSensorTeachingSignal(const sensor* teaching, int len);
00100   void getLastMotors(motor* motors, int len);
00101   void getLastSensors(sensor* sensors, int len);
00102 
00103 
00104   /**** New TEACHING interface ****/
00105   /** The given motor teaching signal is used for this timestep. 
00106       It is used as a feed forward teaching signal for the controller.
00107       Please note, that the teaching signal has to be given each timestep 
00108        for a continuous teaching process.
00109      @param teaching: matrix with dimensions (motornumber,1)
00110    */
00111   virtual void setMotorTeaching(const matrix::Matrix& teaching);
00112 
00113   /** The given sensor teaching signal (distal learning) is used for this timestep. 
00114       The belonging motor teachung signal is calculated by the inverse model.
00115       See setMotorTeaching
00116      @param teaching: matrix with dimensions (motorsensors,1)
00117    */
00118   virtual void setSensorTeaching(const matrix::Matrix& teaching);  
00119   /// returns the last motor values (useful for cross motor coupling)
00120   virtual matrix::Matrix getLastMotorValues();
00121   /// returns the last sensor values (useful for cross sensor coupling)
00122   virtual matrix::Matrix getLastSensorValues();
00123 
00124   
00125 
00126   // UNUSED! OLD IMPLEMENTATION which hat some consistency arguments
00127   void calcCandHUpdatesTeaching(matrix::Matrix& C_update, matrix::Matrix& H_update, int y_delay);
00128 
00129   /**** REINFORCEMENT ****/
00130   /** set the reinforcement signal for this timestep.
00131       It is used to calculate a factor for the update.
00132       Factor = 1-0.95*reinforcement.
00133       @param reinforcement value between -1 and 1 (-1 bad, 0 neutral, 1 good)
00134    */
00135   virtual void setReinforcement(double reinforcement);
00136 
00137 
00138 
00139   static InvertMotorNStepConf getDefaultConf(){
00140     InvertMotorNStepConf c;
00141     c.buffersize = 50;
00142     c.cInit = 1.0;
00143     c.cNonDiag = 0;
00144     c.useS  = false;
00145     c.useSD  = false;
00146     c.numberContext  = 0;
00147     c.someInternalParams = true;
00148     c.cNonDiagAbs=0.0;
00149     return c;
00150   }
00151 
00152   /// sets the sensor channel weights (matrix should be (getSensorNumber() x 1)
00153   void setSensorWeights(const matrix::Matrix& weights);
00154   matrix::Matrix getSensorWeights() const {return sensorweights; }
00155   /// reference to C-matrix
00156   matrix::Matrix& getC(){return C;};
00157 
00158   double getE() const {return v.multTM().val(0,0);}
00159 
00160 protected:
00161   unsigned short number_sensors;
00162   unsigned short number_motors;
00163 
00164  public:
00165   matrix::Matrix A; ///< Model Matrix (motors to sensors)
00166   matrix::Matrix S; ///< additional Model Matrix (sensors to sensors)
00167   matrix::Matrix SD; ///< additional Model Matrix (sensors derivatives to sensors)
00168   matrix::Matrix C; ///< Controller Matrix
00169   matrix::Matrix H; ///< Controller Bias
00170   matrix::Matrix B; ///< Model Bias
00171 protected:
00172   NoiseGenerator* BNoiseGen; ///< Noisegenerator for noisy bias
00173   NoiseGenerator* YNoiseGen; ///< Noisegenerator for noisy motor output
00174   matrix::Matrix R; ///< C*A
00175   matrix::Matrix SmallID; ///< small identity matrix in the dimension of R
00176   matrix::Matrix xsi; ///< current output error
00177   matrix::Matrix v;   ///< current reconstructed error
00178   double E_val; ///< value of Error function
00179   double xsi_norm; ///< norm of matrix
00180   double xsi_norm_avg; ///< average norm of xsi (used to define whether Modell learns)
00181   double pain;         ///< if the modelling error (xsi) is too high we have a pain signal
00182   matrix::Matrix* x_buffer;
00183   matrix::Matrix* y_buffer;
00184   matrix::Matrix* eta_buffer;
00185   matrix::Matrix zero_eta; // zero initialised eta
00186   matrix::Matrix x_smooth;
00187   //   matrix::Matrix z; ///< membrane potential
00188   matrix::Matrix y_teaching; ///< teaching motor signal
00189   bool useTeaching; ///< flag whether there is an actual teachning signal or not
00190   double reinforcement; ///< reinforcement value (set via setReinforcement())
00191   double reinforcefactor; ///< reinforcement factor (set to 1 every step after learning)
00192   int t_rand; ///< initial random time to avoid syncronous management of all controllers
00193 
00194   matrix::Matrix sensorweights; ///< sensor channel weight (each channel gets a certain importance)
00195 
00196   /** factor to teach for continuity: subsequent motor commands
00197       should not differ too much */
00198   double continuity;
00199   double modelCompliant; ///< learning factor for model (or sensor) compliant learning
00200   int managementInterval; ///< interval between subsequent management function calls
00201   paramval inhibition; ///< inhibition strength for sparce kwta strategy (is scaled with epsC)
00202   paramval kwta;       ///< (int) number of synapses that get strengthend
00203   paramval limitRF;    ///< (int) receptive field of motor neurons (number of offcenter sensors) if null then no limitation. Mutual exclusive with inhibition
00204   paramval dampS;     ///< damping of S matrix
00205   paramval dampC;     ///< damping of C matrix
00206   paramval activeExplore; ///< decides whether and how strong the backpropagated error is used as a control signal
00207   paramval cfactor;
00208   paramval cnondiagabs;
00209   paramval cdiagabs;
00210 
00211 
00212   paramval noiseY; ///< noise strength for y
00213 
00214   InvertMotorNStepConf conf;
00215 
00216   /// puts the sensors in the ringbuffer, generate controller values and put them in the
00217   //  ringbuffer as well
00218   virtual void fillBuffersAndControl(const sensor* x_, int number_sensors,
00219                              motor* y_, int number_motors);
00220 
00221   /// calculates the first shift into the motor space useing delayed motor values.
00222   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop
00223   virtual void calcEtaAndBufferIt(int delay);
00224   /// calculates xsi for the current time step using the delayed y values
00225   //  and x delayed by one
00226   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop
00227   virtual void calcXsi(int delay);
00228 
00229   /// learn H,C with motors y and corresponding sensors x
00230   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop
00231   virtual void learnController(int delay);
00232 
00233   /// calculates the Update for C and H
00234   // @param delay timesteps to delay the y-values.  (usually 0)
00235   //  Please note that the delayed values are NOT used for the error calculation
00236   //  (this is done in calcXsi())
00237   virtual void calcCandHUpdates(matrix::Matrix& C_update, matrix::Matrix& H_update, int delay);
00238 
00239   /// updates the matrix C and H
00240   virtual void updateCandH(const matrix::Matrix& C_update, const matrix::Matrix& H_update, double squashSize);
00241 
00242   /// learn A, (and S) using motors y and corresponding sensors x
00243   //  @param delay 0 for no delay and n>0 for n timesteps delay in the time loop
00244   virtual void learnModel(int delay);
00245 
00246   /// calculates the predicted sensor values
00247   virtual matrix::Matrix model(const matrix::Matrix* x_buffer, int delay, const matrix::Matrix& y);
00248 
00249   /// handles inhibition damping etc.
00250   virtual void management();
00251 
00252   /// returns controller output for given sensor values
00253   virtual matrix::Matrix calculateControllerValues(const matrix::Matrix& x_smooth);
00254 
00255   /** Calculates first and second derivative and returns both in on matrix (above).
00256       We use simple discrete approximations:
00257       \f[ f'(x) = (f(x) - f(x-1)) / 2 \f]
00258       \f[ f''(x) = f(x) - 2f(x-1) + f(x-2) \f]
00259       where we have to go into the past because we do not have f(x+1). The scaling can be neglegted.
00260   */
00261   matrix::Matrix calcDerivatives(const matrix::Matrix* buffer, int delay);
00262 
00263 public:
00264   /** k-winner take all inhibition for synapses. k largest synapses are strengthed and the rest are inhibited.
00265       strong synapes are scaled by 1+(damping/k) and weak synapses are scaled by 1-(damping/(n-k)) where n is the
00266       number of synapes
00267       @param weightmatrix reference to weight matrix. Synapses for a neuron are in one row.
00268              The inhibition is done for all rows independently
00269       @param k number of synapes to strengthen
00270       @param damping strength of supression and exitation (typically 0.001)
00271    */
00272   void kwtaInhibition(matrix::Matrix& weightmatrix, unsigned int k, double damping);
00273 
00274   /** sets all connections to zero which are further away then rfSize
00275       from the diagonal.
00276       If rfSize == 1 then only main diagonal is left.
00277       If rfSize = 2: main diagonal and upper and lower side diagonal are kept and so on and so forth.
00278    */
00279   void limitC(matrix::Matrix& weightmatrix, unsigned int rfSize);
00280 
00281 
00282   static double clip095(double x);
00283   static double regularizedInverse(double v);
00284 
00285 };
00286 
00287 #endif