Robotsystem of the Robot Group Leipzig: invert3channelcontroller.h Source File

00001 /*************************************************************************** 00002 * Copyright (C) 2005 by Robot Group Leipzig * 00003 * martius@informatik.uni-leipzig.de * 00004 * fhesse@informatik.uni-leipzig.de * 00005 * der@informatik.uni-leipzig.de * 00006 * * 00007 * LICENSE: * 00008 * This work is licensed under the Creative Commons * 00009 * Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of * 00010 * this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ * 00011 * or send a letter to Creative Commons, 543 Howard Street, 5th Floor, * 00012 * San Francisco, California, 94105, USA. * 00013 * * 00014 * This program is distributed in the hope that it will be useful, * 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * 00017 * * 00018 * $Log: invert3channelcontroller.h,v $ 00019 * Revision 1.4 2006/07/14 12:23:58 martius 00020 * selforg becomes HEAD 00021 * 00022 * Revision 1.2.6.2 2006/07/10 13:05:16 martius 00023 * NON-COMMERICAL LICENSE added to controllers 00024 * 00025 * * 00026 ***************************************************************************/ 00027 00028 #ifndef __INVERT3CHANNELCONTROLLER_H 00029 #define __INVERT3CHANNELCONTROLLER_H 00030 00031 #include "invertcontroller.h" 00032 00033 #include <cassert> 00034 #include <math.h> 00035 00036 00037 /** 00038 * class for robot controller that use naglaa's direct matrix inversion for n channels 00039 * (simple one layer networks) 00040 * 00041 * Implements standart parameters: eps, rho, mu, stepnumber4avg, stepnumber4delay 00042 */ 00043 template <int NUMBER_CHANNELS, int BUFFER_SIZE=2> class Invert3ChannelController : public InvertController { 00044 00045 public: 00046 00047 /* 00048 Invert3ChannelController(); 00049 00050 //virtual ~Invert3ChannelController(){} 00051 00052 00053 /// performs one step (includes learning). Calulates motor commands from sensor inputs. 00054 virtual void step(const sensor*, motor*); 00055 00056 /// performs one step without learning. Calulates motor commands from sensor inputs. 00057 virtual void stepNoLearning(const sensor*, motor*); 00058 */ 00059 00060 00061 //protected: 00062 public: 00063 double A[NUMBER_CHANNELS][NUMBER_CHANNELS]; ///< model matrix 00064 double C[NUMBER_CHANNELS][NUMBER_CHANNELS]; ///< controller matrix 00065 double h[NUMBER_CHANNELS]; ///< bias vector 00066 00067 double x_buffer[BUFFER_SIZE][NUMBER_CHANNELS]; ///< buffer for input values, x[t%buffersize]=actual value, x[(t-1+buffersize)%buffersize]=x(t-1) 00068 double y_buffer[BUFFER_SIZE][NUMBER_CHANNELS]; ///< buffer for output values, y[t%buffersize]=actual value(if already calculated!), y[(t-1+buffersize)%buffersize]=y(t-1) 00069 00070 double xsi4E[NUMBER_CHANNELS]; 00071 double xsi4Model[NUMBER_CHANNELS]; 00072 00073 int t; ///< number of steps, needed for ringbuffer x_buffer 00074 00075 00076 00077 00078 00079 /* 00080 virtual void inverseMatrix(double Q[NUMBER_CHANNELS][NUMBER_CHANNELS], double Q_1[NUMBER_CHANNELS][NUMBER_CHANNELS]); 00081 00082 virtual double calculateE(double *x_delay, double *y_delay); 00083 00084 /// learn values h,C //,A 00085 virtual void learn(double *x_delay, double *y_delay); 00086 00087 /// learn model parameter (matrix A) by gradient descent 00088 virtual void learnModel(double *x_actual, double *y_effective); 00089 00090 00091 /// calculate delayed values 00092 virtual void calculateDelayedValues(double source[BUFFER_SIZE][NUMBER_CHANNELS], int number_steps_of_delay_, double *target); 00093 00094 virtual void calculateSmoothValues(double source[BUFFER_SIZE][NUMBER_CHANNELS], int number_steps_for_averaging_, double *target); 00095 00096 /// calculate controller ouptus 00097 virtual void calculateControllerValues(double *x_smooth, double *y); 00098 00099 // put new value in ring buffer 00100 virtual void putInBuffer(double buffer[BUFFER_SIZE][NUMBER_CHANNELS], double *values); 00101 00102 */ 00103 /// neuron transfer function 00104 virtual double g(double z) 00105 { 00106 return tanh(z); 00107 }; 00108 00109 00110 00111 /// 00112 virtual double g_s(double z) 00113 { 00114 return 1.0 - tanh(z) * tanh(z); 00115 }; 00116 00117 00118 00119 /// squashing function, to protect against to large weight updates 00120 virtual double squash(double z) 00121 { 00122 return 0.1 * tanh(10.0 * z); 00123 }; 00124 00125 00126 //template <int NUMBER_CHANNELS, int BUFFER_SIZES> 00127 // Invert3ChannelController<int NUMBER_CHANNELS, int BUFFER_SIZES>:: 00128 Invert3ChannelController(){ 00129 00130 t=0; 00131 00132 for (int i = 0; i < NUMBER_CHANNELS; i++) 00133 { 00134 h[i] = 0.0; 00135 for (int j = 0; j < NUMBER_CHANNELS; j++) 00136 { 00137 if (i == j) 00138 { 00139 A[i][j] = 1.0; 00140 C[i][j] = 0.1; 00141 } 00142 else 00143 { 00144 A[i][j] = 0.0; 00145 C[i][j] = 0.0; 00146 } 00147 } 00148 } 00149 for (int i = 0; i < NUMBER_CHANNELS; i++) 00150 { 00151 for (int k = 0; k < BUFFER_SIZE; k++) 00152 { 00153 x_buffer[k][i] = 0; 00154 y_buffer[k][i] = 0; 00155 } 00156 } 00157 00158 /* // print initial values 00159 std::cout<<"Constructor of RobotLearnControl:"<<std::endl; 00160 std::cout<<"init: epsilon="<<eps<<std::endl; 00161 std::cout<<"init: rho="<<rho<<std::endl; 00162 std::cout<<"init: stepnumber4delay="<<stepnumber4delay<<std::endl; 00163 std::cout<<"init: stepnumber4avg="<<stepnumber4avg<<std::endl; 00164 std::cout<<"init: delta="<<delta<<std::endl; 00165 std::cout<<"init: m (for calculation of E)="<<m<<std::endl; 00166 */ 00167 }; 00168 00169 /// performs one step (includes learning). Calulates motor commands from sensor inputs. 00170 virtual void step(const sensor* x_, int sensornumber, 00171 motor* y_, int motornumber) { 00172 double x_smooth[NUMBER_CHANNELS]; 00173 double x_effective[NUMBER_CHANNELS]; 00174 double y_effective[NUMBER_CHANNELS]; 00175 for (int i = 0; i < NUMBER_CHANNELS; i++) 00176 { 00177 x_smooth[i] = 0.0; 00178 x_effective[i] = 0.0; 00179 y_effective[i] = 0.0; 00180 } 00181 00182 // put new input value in ring buffer x_buffer 00183 putInBuffer(x_buffer, x_); 00184 00185 // averaging over the last stepnumber4avg values of x_buffer 00186 calculateSmoothValues(x_buffer, stepnumber4avg, x_smooth); 00187 00188 // calculate controller values based on smoothed input values 00189 calculateControllerValues(x_smooth, y_); 00190 00191 // put new output value in ring buffer y_buffer 00192 putInBuffer(y_buffer, y_); 00193 00194 // calculate effective input/output, which is (actual-stepnumber4delay) element of buffer 00195 calculateDelayedValues(x_buffer, stepnumber4delay, x_effective); 00196 calculateDelayedValues(y_buffer, stepnumber4delay, y_effective); 00197 00198 // learn controller with effective input/output 00199 learn(x_, x_effective, y_effective); 00200 00201 // learn model with actual input and effective output (that produced the actual input) 00202 learnModel(x_, y_effective); 00203 00204 // update step counter 00205 t++; 00206 }; 00207 00208 00209 /// performs one step without learning. Calulates motor commands from sensor inputs. 00210 virtual void stepNoLearning(const sensor* x_, int sensornumber, 00211 motor* y_, int motornumber){ 00212 double x_smooth[NUMBER_CHANNELS]; 00213 for (int i = 0; i < NUMBER_CHANNELS; i++) 00214 { 00215 x_smooth[i] = 0.0; 00216 } 00217 00218 00219 // put new input value in ring buffer x_buffer 00220 putInBuffer(x_buffer, x_); 00221 00222 // averaging over the last stepnumber4avg values of x_buffer 00223 calculateSmoothValues(x_buffer, stepnumber4avg, x_smooth); 00224 00225 // calculate controller values based on smoothed input values 00226 calculateControllerValues(x_smooth, y_); 00227 00228 // put new output value in ring buffer y_buffer 00229 putInBuffer(y_buffer, y_); 00230 00231 // update step counter 00232 t++; 00233 }; 00234 00235 00236 protected: 00237 virtual void inverseMatrix(double Q[NUMBER_CHANNELS][NUMBER_CHANNELS], double Q_1[NUMBER_CHANNELS][NUMBER_CHANNELS]){ 00238 // Berechne Inverse von Q 00239 00240 // only if NUMBER_CHANNELS<4 00241 assert(NUMBER_CHANNELS<4); 00242 if (NUMBER_CHANNELS==2){ 00243 00244 double det = Q[0][0] * Q[1][1] - Q[0][1] * Q[1][0]; 00245 Q_1[0][0] = Q[1][1] / det; 00246 Q_1[1][1] = Q[0][0] / det; 00247 Q_1[0][1] = -Q[0][1] / det; 00248 Q_1[1][0] = -Q[1][0] / det; 00249 00250 } 00251 00252 00253 if (NUMBER_CHANNELS==3){ 00254 00255 double Q_adjoint[NUMBER_CHANNELS][NUMBER_CHANNELS] ; 00256 double detQ=0 ; 00257 00258 //calculate the inverse of Q 00259 Q_adjoint[0][0]=Q[1][1]*Q[2][2]-Q[1][2]*Q[2][1] ; 00260 Q_adjoint[0][1]=(Q[1][2]*Q[2][0]-Q[1][0]*Q[2][2]) ; 00261 Q_adjoint[0][2]=Q[1][0]*Q[2][1]-Q[1][1]*Q[2][0] ; 00262 Q_adjoint[1][0]=(Q[2][1]*Q[0][2]-Q[0][1]*Q[2][2]) ; 00263 Q_adjoint[1][1]=Q[0][0]*Q[2][2]-Q[0][2]*Q[2][0] ; 00264 Q_adjoint[1][2]=(Q[0][1]*Q[2][0]-Q[0][0]*Q[2][1]) ; 00265 Q_adjoint[2][0]=Q[0][1]*Q[1][2]-Q[1][1]*Q[0][2] ; 00266 Q_adjoint[2][1]=(Q[1][0]*Q[0][2]-Q[0][0]*Q[1][2]) ; 00267 Q_adjoint[2][2]=Q[0][0]*Q[1][1]-Q[0][1]*Q[1][0] ; 00268 detQ=Q[0][0]*Q_adjoint[0][0]+Q[0][1]*Q_adjoint[0][1]+Q[0][2]*Q_adjoint[0][2] ; 00269 for(int i=0; i<NUMBER_CHANNELS; i++){ 00270 for(int j=0; j<NUMBER_CHANNELS; j++) { 00271 Q_1[i][j]=(Q_adjoint[j][i])/detQ ; 00272 } 00273 } 00274 } 00275 }; 00276 00277 00278 virtual double calculateE(const double *x_, const double *x_delay, const double *y_delay) { 00279 double L[NUMBER_CHANNELS][NUMBER_CHANNELS]; 00280 double Q[NUMBER_CHANNELS][NUMBER_CHANNELS]; 00281 double Q_1[NUMBER_CHANNELS][NUMBER_CHANNELS]; 00282 double z[NUMBER_CHANNELS]; 00283 00284 00285 // Calculate z based on the delayed inputs since the present input x is 00286 // produced by the outputs tau time steps before 00287 // which on their hand are y = K(x_D) 00288 // due to the delay in the feed back loop. 00289 00290 for (int i = 0; i < NUMBER_CHANNELS; i++) 00291 { 00292 z[i] = h[i]; 00293 for (int j = 0; j < NUMBER_CHANNELS; j++) 00294 { 00295 z[i] += C[i][j] * x_delay[j]; 00296 } 00297 } 00298 00299 // Berechne Matrix L 00300 for (int i = 0; i < NUMBER_CHANNELS; i++) 00301 { 00302 for (int j = 0; j < NUMBER_CHANNELS; j++) 00303 { 00304 L[i][j] = 0.0; 00305 for (int k = 0; k < NUMBER_CHANNELS; k++) 00306 { 00307 L[i][j] += A[i][k] * g_s(z[k]) * C[k][j]; 00308 } 00309 } 00310 } 00311 00312 // Berechne Q=LL^T 00313 for (int i = 0; i < NUMBER_CHANNELS; i++) 00314 { 00315 for (int j = 0; j < NUMBER_CHANNELS; j++) 00316 { 00317 Q[i][j] = 0.0; 00318 for (int k = 0; k < NUMBER_CHANNELS; k++) 00319 { 00320 Q[i][j] += L[i][k] * L[j][k]; 00321 } 00322 if (i == j) 00323 Q[i][j] += rho / NUMBER_CHANNELS; // Regularisation 00324 } 00325 } 00326 00327 // Berechne Inverse von Q 00328 inverseMatrix(Q, Q_1); 00329 00330 // Berechne xsi 00331 for (int i = 0; i < NUMBER_CHANNELS; i++) 00332 { 00333 xsi4E[i] = x_[i]; 00334 for (int j = 0; j < NUMBER_CHANNELS; j++) 00335 { 00336 xsi4E[i] -= A[i][j] * y_delay[j]; // using old y value -> no influence of changes (adding delta) in controller 00337 // very very strange!!!! 00338 //xsi4E[i] -= A[i][j] * g(z[j]); // using recalculating y -> influence of changes (adding delta) in controller 00339 } 00340 } 00341 double E = 0; 00342 for (int i = 0; i < NUMBER_CHANNELS; i++) 00343 { 00344 for (int j = 0; j < NUMBER_CHANNELS; j++) 00345 { 00346 E += xsi4E[i] * Q_1[i][j] * xsi4E[j]; 00347 } 00348 } 00349 00350 double E_s=0; 00351 for (int i = 0; i < NUMBER_CHANNELS; i++) 00352 { 00353 for (int j = 0; j < NUMBER_CHANNELS; j++) 00354 { 00355 E_s += (A[i][j]*g(z[j]) - x_[i]) * (A[i][j]*g(z[j]) - x_[i]); 00356 } 00357 } 00358 00359 E=(1-m)*E+ m*E_s; 00360 return (E); 00361 00362 }; 00363 00364 00365 virtual void learn(const double *x_, const double *x_delay, const double *y_delay) 00366 { 00367 // double A_update[NUMBER_CHANNELS][NUMBER_CHANNELS]; 00368 double C_update[NUMBER_CHANNELS][NUMBER_CHANNELS]; 00369 double h_update[NUMBER_CHANNELS]; 00370 00371 double E_0 = calculateE(x_,x_delay, y_delay); 00372 00373 // calculate updates for h,C,A 00374 for (int i = 0; i < NUMBER_CHANNELS; i++) 00375 { 00376 h[i] += delta; 00377 h_update[i] = -eps * (calculateE(x_,x_delay, y_delay) - E_0) / delta; 00378 h[i] -= delta; 00379 for (int j = 0; j < NUMBER_CHANNELS; j++) 00380 { 00381 C[i][j] += delta; 00382 C_update[i][j] = -eps * (calculateE(x_,x_delay, y_delay) - E_0) / delta; 00383 C[i][j] -= delta; 00384 //A[i][j] += delta; 00385 //A_update[i][j] = -eps * a_factor * (calculateE(x_delay, y_delay) - E_0) / delta; 00386 //A[i][j] -= delta; 00387 } 00388 } 00389 00390 // apply updates to h,C,A 00391 for (int i = 0; i < NUMBER_CHANNELS; i++) 00392 { 00393 h[i] += squash(h_update[i]); 00394 for (int j = 0; j < NUMBER_CHANNELS; j++) 00395 { 00396 C[i][j] += squash(C_update[i][j]); 00397 //A[i][j] += squash(A_update[i][j]); 00398 } 00399 } 00400 }; 00401 00402 00403 virtual void learnModel(const double *x_actual, double *y_effective){ 00404 /* double z[N_output]; 00405 for(int i=0; i<N_output; i++){ 00406 z[i]=h[i]; 00407 for(int j=0; j<N_input; j++) { 00408 z[i]+=C[i][j]*x_D[j]; 00409 } 00410 } 00411 */ 00412 // Berechne xsi 00413 for(int i=0; i<NUMBER_CHANNELS; i++){ 00414 xsi4Model[i]=x_actual[i]; 00415 for(int j=0; j<NUMBER_CHANNELS; j++){ 00416 xsi4Model[i]-= A[i][j]*y_effective[j]; 00417 } 00418 } 00419 00420 for(int i=0; i<NUMBER_CHANNELS; i++){ 00421 for (int j=0; j<NUMBER_CHANNELS; j++){ 00422 A[i][j]+=squash( (factor_a*eps*0.2) *xsi4Model[i] * y_effective[j]) ; 00423 } 00424 } 00425 }; 00426 00427 00428 00429 00430 00431 /// calculate delayed values 00432 virtual void calculateDelayedValues(double source[BUFFER_SIZE][NUMBER_CHANNELS], paramval number_steps_of_delay_, double *target) 00433 { 00434 // number_steps_of_delay_ must not be larger than BUFFER_SIZE 00435 assert ((int)number_steps_of_delay_ < BUFFER_SIZE); 00436 00437 // get delayed value from ring buffer 00438 00439 for (int i = 0; i < NUMBER_CHANNELS; i++) 00440 { 00441 target[i] = source[(t - (int)number_steps_of_delay_ + BUFFER_SIZE) % BUFFER_SIZE][i]; 00442 } 00443 }; 00444 00445 virtual void calculateSmoothValues(double source[BUFFER_SIZE][NUMBER_CHANNELS], paramval number_steps_for_averaging_, double *target) 00446 { 00447 // number_steps_for_averaging_ must not be larger than BUFFER_SIZE 00448 assert ((int)number_steps_for_averaging_ <= BUFFER_SIZE); 00449 00450 for (int i = 0; i < NUMBER_CHANNELS; i++) 00451 { 00452 target[i] = 0.0; 00453 for (int k = 0; k < (int)number_steps_for_averaging_; k++) 00454 { 00455 target[i] += source[(t - k + BUFFER_SIZE) % BUFFER_SIZE][i]/ (double) (number_steps_for_averaging_); 00456 } 00457 } 00458 }; 00459 00460 00461 00462 /// calculate controller ouptus 00463 virtual void calculateControllerValues(double *x_smooth, double *y) 00464 { 00465 double z[NUMBER_CHANNELS]; 00466 00467 for (int i = 0; i < NUMBER_CHANNELS; i++) 00468 { 00469 z[i] = h[i]; 00470 for (int j = 0; j < NUMBER_CHANNELS; j++) 00471 { 00472 z[i] += C[i][j] * x_smooth[j]; 00473 } 00474 y[i] = g(z[i]); 00475 } 00476 }; 00477 00478 00479 // put new value in ring buffer 00480 virtual void putInBuffer(double buffer[BUFFER_SIZE][NUMBER_CHANNELS], const double *values){ 00481 for (int i = 0; i < NUMBER_CHANNELS; i++) 00482 { 00483 buffer[(t+BUFFER_SIZE)% BUFFER_SIZE][i] = values[i]; 00484 } 00485 }; 00486 00487 00488 00489 virtual int getInternalParamNames(paramkey*& keylist){ 00490 keylist=(paramkey*)malloc(sizeof(paramkey)*10); 00491 keylist[0]="C00"; 00492 keylist[1]="C01"; 00493 keylist[2]="C10"; 00494 keylist[3]="C11"; 00495 keylist[4]="H0"; 00496 keylist[5]="H1"; 00497 keylist[6]="A00"; 00498 keylist[7]="A01"; 00499 keylist[8]="A10"; 00500 keylist[9]="A11"; 00501 return 10; 00502 } 00503 00504 virtual int getInternalParams(paramval* vallist, int length) { 00505 if(length < 10) return 0; 00506 vallist[0]=C[0][0]; 00507 vallist[1]=C[0][1]; 00508 vallist[2]=C[1][0]; 00509 vallist[3]=C[1][1]; 00510 vallist[4]=h[0]; 00511 vallist[5]=h[1]; 00512 vallist[6]=A[0][0]; 00513 vallist[7]=A[0][1]; 00514 vallist[8]=A[1][0]; 00515 vallist[9]=A[1][1]; 00516 return 10; 00517 } 00518 00519 00520 }; 00521 00522 AbstractController* getController(int sensornumber, int motornumber, 00523 int param1/*=0*/, double param2/*=0*/) 00524 { 00525 if(param1 < 2) param1 = 2; 00526 return new Invert3ChannelController<2, 4>; 00527 } 00528 00529 00530 00531 #endif