dinvert3channelcontroller.hpp

Go to the documentation of this file.
00001 
00002 double** allocateMatrix(int m, int n){
00003   double** mat=(double**)malloc(m*sizeof(double*));
00004   for(int i=0; i < m; i++){
00005     mat[i]=(double*)malloc(n*sizeof(double));
00006   }
00007   return mat;
00008 }
00009 
00010 void freeMatrix(double** mat, int m){
00011   for(int i=0; i < m; i++){
00012     free(mat[i]);
00013   }
00014   free(mat);
00015 }
00016 
00017 
00018 public: 
00019 
00020 /// return the name of the object (with version number)
00021 virtual constparamkey getName() const {  
00022   return name; 
00023 }
00024 
00025 
00026 virtual void init(int sensornumber, int motornumber){
00027   
00028 }
00029 
00030 
00031 DInvert3ChannelController(int numberchannels, int buffersize){
00032   
00033   eps=0.7;
00034   rho=0.0; 
00035   s4delay=1;
00036   s4avg=1;
00037   delta=0.01;
00038   factor_a=1.0;
00039   t=0;
00040   m=0.0;
00041     
00042   NUMBER_CHANNELS=numberchannels;
00043   BUFFER_SIZE=buffersize;
00044   // allocate field;
00045   A = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS);
00046   C = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS);
00047   h = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00048  
00049   x_buffer = allocateMatrix(BUFFER_SIZE, NUMBER_CHANNELS);
00050   y_buffer = allocateMatrix(BUFFER_SIZE, NUMBER_CHANNELS);
00051 
00052   xsi4E = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00053   xsi4Model = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00054 
00055   // internals
00056   x_smooth = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00057   x_effective = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00058   y_effective = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00059   Q_buf1 = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS);
00060   Q_buf2 = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS);
00061   L = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS);
00062   z = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00063 
00064 
00065   for (int i = 0; i < NUMBER_CHANNELS; i++)
00066     {
00067       h[i] = 0.0;
00068       for (int j = 0; j < NUMBER_CHANNELS; j++)
00069         {
00070           if (i == j)
00071             {
00072               A[i][j] = (rand()/10.0)/RAND_MAX;
00073               C[i][j] = (rand()/10.0)/RAND_MAX;
00074             }
00075           else
00076             {
00077               A[i][j] = (rand()/10.0)/RAND_MAX;
00078               C[i][j] = (rand()/10.0)/RAND_MAX;
00079             }
00080         }
00081     }
00082   for (int i = 0; i < NUMBER_CHANNELS; i++)
00083     {
00084       for (int k = 0; k < BUFFER_SIZE; k++)
00085         {
00086           x_buffer[k][i] = 0;
00087           y_buffer[k][i] = 0;   
00088         }
00089     }
00090   // prepare name;
00091   Configurable::insertCVSInfo(name, "$RCSfile: dinvert3channelcontroller.hpp,v $", 
00092                                     "$Revision: 1.3 $");
00093     
00094   /*    // print initial values
00095         std::cout<<"Constructor of RobotLearnControl:"<<std::endl;
00096         std::cout<<"init: epsilon="<<eps<<std::endl;     
00097         std::cout<<"init: rho="<<rho<<std::endl;         
00098         std::cout<<"init: steps4delay="<<steps4delay<<std::endl; 
00099         std::cout<<"init: steps4avg="<<steps4avg<<std::endl;     
00100         std::cout<<"init: delta="<<delta<<std::endl; 
00101         std::cout<<"init: m (for calculation of E)="<<m<<std::endl; 
00102   */    
00103 };
00104 
00105 /// performs one step (includes learning). Calulates motor commands from sensor inputs.
00106 virtual void step(const sensor* x_, int sensornumber, motor* y_, int motornumber){
00107   for (int i = 0; i < NUMBER_CHANNELS; i++)
00108     {
00109       x_smooth[i] = 0.0;
00110       x_effective[i] = 0.0;
00111       y_effective[i] = 0.0;
00112     }
00113 
00114   // put new input value in ring buffer x_buffer
00115   putInBuffer(x_buffer, x_);
00116 
00117   // averaging over the last steps4avg values of x_buffer
00118   calculateSmoothValues(x_buffer, s4avg, x_smooth);
00119     
00120   // calculate controller values based on smoothed input values
00121   calculateControllerValues(x_smooth, y_);
00122 
00123   // put new output value in ring buffer y_buffer
00124   putInBuffer(y_buffer, y_);
00125 
00126   // calculate effective input/output, which is (actual-steps4delay) element of buffer
00127   calculateDelayedValues(x_buffer, s4delay, x_effective);    
00128   calculateDelayedValues(y_buffer, s4delay, y_effective);
00129  
00130   // learn controller with effective input/output
00131   learn(x_, x_effective, y_effective);
00132     
00133   // learn model with actual input and effective output (that produced the actual input)
00134   learnModel(x_, y_effective);
00135 
00136   // update step counter
00137   t++;
00138 };
00139 
00140 
00141 /// performs one step without learning. Calulates motor commands from sensor inputs.
00142 virtual void stepNoLearning(const sensor* x_, int sensornumber, motor* y_, int motornumber){
00143   for (int i = 0; i < NUMBER_CHANNELS; i++)
00144     {
00145       x_smooth[i] = 0.0;
00146     }
00147 
00148 
00149   // put new input value in ring buffer x_buffer
00150   putInBuffer(x_buffer, x_);
00151 
00152   // averaging over the last steps4avg values of x_buffer
00153   calculateSmoothValues(x_buffer, s4avg, x_smooth);
00154     
00155   // calculate controller values based on smoothed input values
00156   calculateControllerValues(x_smooth, y_);
00157 
00158   // put new output value in ring buffer y_buffer
00159   putInBuffer(y_buffer, y_);
00160 
00161   // update step counter
00162   t++;
00163 };
00164   
00165 virtual int getInternalParamNames(paramkey*& keylist){
00166   return 0;
00167 }
00168 
00169 virtual int getInternalParams(paramval* vallist, int length){
00170   int len = 0;
00171   return len;
00172 }
00173  
00174 protected:
00175 virtual void inverseMatrix(double** Q, double** Q_1){
00176   // Berechne Inverse von Q
00177 
00178   // only if NUMBER_CHANNELS<4
00179   assert(NUMBER_CHANNELS<4);
00180   if (NUMBER_CHANNELS==2){
00181 
00182     double det = Q[0][0] * Q[1][1] - Q[0][1] * Q[1][0];
00183     Q_1[0][0] = Q[1][1] / det;
00184     Q_1[1][1] = Q[0][0] / det;
00185     Q_1[0][1] = -Q[0][1] / det;
00186     Q_1[1][0] = -Q[1][0] / det;
00187       
00188   }
00189     
00190     
00191   if (NUMBER_CHANNELS==3){  
00192       
00193     double  detQ=0  ;
00194     double** Q_adjoint=Q_buf1;
00195       
00196     //calculate the inverse of Q
00197     Q_adjoint[0][0]=Q[1][1]*Q[2][2]-Q[1][2]*Q[2][1] ;
00198     Q_adjoint[0][1]=(Q[1][2]*Q[2][0]-Q[1][0]*Q[2][2]) ;
00199     Q_adjoint[0][2]=Q[1][0]*Q[2][1]-Q[1][1]*Q[2][0] ;
00200     Q_adjoint[1][0]=(Q[2][1]*Q[0][2]-Q[0][1]*Q[2][2]) ;
00201     Q_adjoint[1][1]=Q[0][0]*Q[2][2]-Q[0][2]*Q[2][0] ;
00202     Q_adjoint[1][2]=(Q[0][1]*Q[2][0]-Q[0][0]*Q[2][1]) ;
00203     Q_adjoint[2][0]=Q[0][1]*Q[1][2]-Q[1][1]*Q[0][2] ;
00204     Q_adjoint[2][1]=(Q[1][0]*Q[0][2]-Q[0][0]*Q[1][2]) ;
00205     Q_adjoint[2][2]=Q[0][0]*Q[1][1]-Q[0][1]*Q[1][0] ;
00206     detQ=Q[0][0]*Q_adjoint[0][0]+Q[0][1]*Q_adjoint[0][1]+Q[0][2]*Q_adjoint[0][2] ;
00207     for(int i=0; i<NUMBER_CHANNELS; i++){
00208       for(int j=0; j<NUMBER_CHANNELS; j++) {                                                     
00209         Q_1[i][j]=(Q_adjoint[j][i])/detQ  ;
00210       }
00211     }       
00212   } 
00213 };
00214 
00215 
00216 virtual double calculateE(const double *x_, const double *x_delay, const  double *y_delay) {
00217   double** Q = Q_buf1;
00218   double** Q_1= Q_buf2;
00219     
00220 
00221   // Calculate z based on the delayed inputs since the present input x is
00222   // produced by the outputs tau time steps before
00223   // which on their hand are y = K(x_D)
00224   // due to the delay in the feed back loop.
00225 
00226   for (int i = 0; i < NUMBER_CHANNELS; i++)
00227     {
00228       z[i] = h[i];
00229       for (int j = 0; j < NUMBER_CHANNELS; j++)
00230         {
00231           z[i] += C[i][j] * x_delay[j];
00232         }
00233     }
00234 
00235   // Berechne Matrix L
00236   for (int i = 0; i < NUMBER_CHANNELS; i++)
00237     {
00238       for (int j = 0; j < NUMBER_CHANNELS; j++)
00239         {
00240           L[i][j] = 0.0;
00241           for (int k = 0; k < NUMBER_CHANNELS; k++)
00242             {
00243               L[i][j] += A[i][k] * g_s(z[k]) * C[k][j];
00244             }
00245         }
00246     }
00247 
00248   // Berechne Q=LL^T
00249   for (int i = 0; i < NUMBER_CHANNELS; i++)
00250     {
00251       for (int j = 0; j < NUMBER_CHANNELS; j++)
00252         {
00253           Q[i][j] = 0.0;
00254           for (int k = 0; k < NUMBER_CHANNELS; k++)
00255             {
00256               Q[i][j] += L[i][k] * L[j][k];
00257             }
00258           if (i == j)
00259             Q[i][j] += rho / NUMBER_CHANNELS; // Regularisation
00260         }
00261     }
00262 
00263   // Berechne Inverse von Q
00264   inverseMatrix(Q, Q_1);
00265 
00266   // Berechne xsi
00267   for (int i = 0; i < NUMBER_CHANNELS; i++)
00268     {
00269       xsi4E[i] = x_[i];
00270       for (int j = 0; j < NUMBER_CHANNELS; j++)
00271         {
00272           xsi4E[i] -= A[i][j] * y_delay[j];  // using old y value -> no influence of changes (adding delta) in controller
00273           // very very strange!!!!
00274           //xsi4E[i] -= A[i][j] * g(z[j]);     // using recalculating y -> influence of changes (adding delta) in controller
00275         }
00276     }
00277   double E = 0;
00278   for (int i = 0; i < NUMBER_CHANNELS; i++)
00279     {
00280       for (int j = 0; j < NUMBER_CHANNELS; j++)
00281         {
00282           E += xsi4E[i] * Q_1[i][j] * xsi4E[j];
00283         }
00284     }
00285 
00286   double E_s=0;
00287   for (int i = 0; i < NUMBER_CHANNELS; i++)
00288     {
00289       for (int j = 0; j < NUMBER_CHANNELS; j++)
00290         {
00291           E_s += (A[i][j]*g(z[j]) - x_[i]) * (A[i][j]*g(z[j]) - x_[i]);
00292         }
00293     }
00294 
00295   E=(1-m)*E+ m*E_s;
00296   return (E);
00297 
00298 };
00299 
00300 
00301 virtual void learn(const double *x_, const double *x_delay, const double *y_delay)
00302 {
00303   //    double A_update[NUMBER_CHANNELS][NUMBER_CHANNELS];
00304   double** C_update=allocateMatrix(NUMBER_CHANNELS,NUMBER_CHANNELS);
00305   double* h_update=(double*)malloc(NUMBER_CHANNELS*sizeof(double));
00306 
00307   double E_0 = calculateE(x_,x_delay, y_delay);
00308 
00309   // calculate updates for h,C,A
00310   for (int i = 0; i < NUMBER_CHANNELS; i++)
00311     {
00312       h[i] += delta;
00313       h_update[i] = -eps * (calculateE(x_,x_delay, y_delay) - E_0) / delta;
00314       h[i] -= delta;
00315       for (int j = 0; j < NUMBER_CHANNELS; j++)
00316         {
00317           C[i][j] += delta;
00318           C_update[i][j] = -eps * (calculateE(x_,x_delay, y_delay) - E_0) / delta;
00319           C[i][j] -= delta;
00320           //A[i][j] += delta;
00321           //A_update[i][j] = -eps * a_factor * (calculateE(x_delay, y_delay) - E_0) / delta;
00322           //A[i][j] -= delta;
00323         }
00324     }
00325 
00326   // apply updates to h,C,A
00327   for (int i = 0; i < NUMBER_CHANNELS; i++)
00328     {
00329       h[i] += squash(h_update[i]);
00330       for (int j = 0; j < NUMBER_CHANNELS; j++)
00331         {
00332           C[i][j] += squash(C_update[i][j]);
00333           //A[i][j] += squash(A_update[i][j]);
00334         }
00335     }
00336   freeMatrix(C_update, NUMBER_CHANNELS);
00337   free(h_update);
00338 };
00339 
00340   
00341 virtual void learnModel(const double *x_actual, double *y_effective){
00342   /*    double z[N_output];    
00343         for(int i=0; i<N_output; i++){
00344         z[i]=h[i];
00345         for(int j=0; j<N_input; j++) {
00346         z[i]+=C[i][j]*x_D[j];
00347         }
00348         }
00349   */
00350   // Berechne xsi
00351   for(int i=0; i<NUMBER_CHANNELS; i++){
00352     xsi4Model[i]=x_actual[i];  
00353     for(int j=0; j<NUMBER_CHANNELS; j++){
00354       xsi4Model[i]-= A[i][j]*y_effective[j];
00355     }    
00356   }
00357 
00358   for(int i=0; i<NUMBER_CHANNELS; i++){
00359     for (int j=0; j<NUMBER_CHANNELS; j++){   
00360       A[i][j]+=squash( (factor_a*eps*0.2) *xsi4Model[i] * y_effective[j]) ;
00361     }
00362   }
00363 };
00364 
00365   
00366 
00367   
00368   
00369 /// calculate delayed values
00370 virtual void calculateDelayedValues(double** source, paramval number_steps_of_delay_, double *target)  
00371 {
00372   // number_steps_of_delay_ must not be larger than BUFFER_SIZE
00373   assert ((int)number_steps_of_delay_ < BUFFER_SIZE);
00374 
00375   // get delayed value from ring buffer
00376 
00377   for (int i = 0; i < NUMBER_CHANNELS; i++)
00378     {
00379       target[i] = source[(t - (int)number_steps_of_delay_ + BUFFER_SIZE) % BUFFER_SIZE][i];
00380     }
00381 };
00382 
00383 virtual void calculateSmoothValues(double** source, paramval number_steps_for_averaging_, double *target)
00384 {
00385   // number_steps_for_averaging_ must not be larger than BUFFER_SIZE
00386   assert ((int)number_steps_for_averaging_ <= BUFFER_SIZE);
00387 
00388   for (int i = 0; i < NUMBER_CHANNELS; i++)
00389     {
00390       target[i] = 0.0;
00391       for (int k = 0; k < (int)number_steps_for_averaging_; k++)
00392         {
00393           target[i] += source[(t - k + BUFFER_SIZE) % BUFFER_SIZE][i]/ (double) (number_steps_for_averaging_);
00394         }
00395     }
00396 };
00397 
00398 
00399 
00400 /// calculate controller ouptus
00401 virtual void calculateControllerValues(double *x_smooth, double *y)  
00402 {
00403   double z[NUMBER_CHANNELS];
00404 
00405   for (int i = 0; i < NUMBER_CHANNELS; i++)
00406     {
00407       z[i] = h[i];
00408       for (int j = 0; j < NUMBER_CHANNELS; j++)
00409         {
00410           z[i] += C[i][j] * x_smooth[j];
00411         }
00412       y[i] = g(z[i]);
00413     }
00414 };
00415   
00416   
00417 // put new value in ring buffer
00418 virtual void putInBuffer(double** buffer, const double *values){  
00419   for (int i = 0; i < NUMBER_CHANNELS; i++)
00420     {
00421       buffer[(t+BUFFER_SIZE)% BUFFER_SIZE][i] = values[i];
00422     }
00423 };
00424 

Generated on Tue Apr 4 19:05:03 2006 for Robotsystem from Robot Group Leipzig by  doxygen 1.4.5