Robotsystem of the Robot Group Leipzig: dinvert3channelcontroller.hpp Source File

00001 /* 00002 * LICENSE: * 00003 * This work is licensed under the Creative Commons * 00004 * Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of * 00005 * this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ * 00006 * or send a letter to Creative Commons, 543 Howard Street, 5th Floor, * 00007 * San Francisco, California, 94105, USA. * 00008 * * 00009 * This program is distributed in the hope that it will be useful, * 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * 00012 */ 00013 00014 double** allocateMatrix(int m, int n){ 00015 double** mat=(double**)malloc(m*sizeof(double*)); 00016 for(int i=0; i < m; i++){ 00017 mat[i]=(double*)malloc(n*sizeof(double)); 00018 } 00019 return mat; 00020 } 00021 00022 void freeMatrix(double** mat, int m){ 00023 for(int i=0; i < m; i++){ 00024 free(mat[i]); 00025 } 00026 free(mat); 00027 } 00028 00029 00030 public: 00031 00032 /// return the name of the object (with version number) 00033 virtual constparamkey getName() const { 00034 return name; 00035 } 00036 00037 00038 virtual void init(int sensornumber, int motornumber){ 00039 00040 } 00041 00042 00043 DInvert3ChannelController(int numberchannels, int buffersize){ 00044 00045 eps=0.7; 00046 rho=0.0; 00047 s4delay=1; 00048 s4avg=1; 00049 delta=0.01; 00050 factor_a=1.0; 00051 t=0; 00052 m=0.0; 00053 00054 NUMBER_CHANNELS=numberchannels; 00055 BUFFER_SIZE=buffersize; 00056 // allocate field; 00057 A = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS); 00058 C = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS); 00059 h = (double*)malloc(NUMBER_CHANNELS*sizeof(double)); 00060 00061 x_buffer = allocateMatrix(BUFFER_SIZE, NUMBER_CHANNELS); 00062 y_buffer = allocateMatrix(BUFFER_SIZE, NUMBER_CHANNELS); 00063 00064 xsi4E = (double*)malloc(NUMBER_CHANNELS*sizeof(double)); 00065 xsi4Model = (double*)malloc(NUMBER_CHANNELS*sizeof(double)); 00066 00067 // internals 00068 x_smooth = (double*)malloc(NUMBER_CHANNELS*sizeof(double)); 00069 x_effective = (double*)malloc(NUMBER_CHANNELS*sizeof(double)); 00070 y_effective = (double*)malloc(NUMBER_CHANNELS*sizeof(double)); 00071 Q_buf1 = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS); 00072 Q_buf2 = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS); 00073 L = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS); 00074 z = (double*)malloc(NUMBER_CHANNELS*sizeof(double)); 00075 00076 00077 for (int i = 0; i < NUMBER_CHANNELS; i++) 00078 { 00079 h[i] = 0.0; 00080 for (int j = 0; j < NUMBER_CHANNELS; j++) 00081 { 00082 if (i == j) 00083 { 00084 A[i][j] = (rand()/10.0)/RAND_MAX; 00085 C[i][j] = (rand()/10.0)/RAND_MAX; 00086 } 00087 else 00088 { 00089 A[i][j] = (rand()/10.0)/RAND_MAX; 00090 C[i][j] = (rand()/10.0)/RAND_MAX; 00091 } 00092 } 00093 } 00094 for (int i = 0; i < NUMBER_CHANNELS; i++) 00095 { 00096 for (int k = 0; k < BUFFER_SIZE; k++) 00097 { 00098 x_buffer[k][i] = 0; 00099 y_buffer[k][i] = 0; 00100 } 00101 } 00102 // prepare name; 00103 Configurable::insertCVSInfo(name, "$RCSfile: dinvert3channelcontroller.hpp,v $", 00104 "$Revision: 1.5 $"); 00105 00106 /* // print initial values 00107 std::cout<<"Constructor of RobotLearnControl:"<<std::endl; 00108 std::cout<<"init: epsilon="<<eps<<std::endl; 00109 std::cout<<"init: rho="<<rho<<std::endl; 00110 std::cout<<"init: steps4delay="<<steps4delay<<std::endl; 00111 std::cout<<"init: steps4avg="<<steps4avg<<std::endl; 00112 std::cout<<"init: delta="<<delta<<std::endl; 00113 std::cout<<"init: m (for calculation of E)="<<m<<std::endl; 00114 */ 00115 }; 00116 00117 /// performs one step (includes learning). Calulates motor commands from sensor inputs. 00118 virtual void step(const sensor* x_, int sensornumber, motor* y_, int motornumber){ 00119 for (int i = 0; i < NUMBER_CHANNELS; i++) 00120 { 00121 x_smooth[i] = 0.0; 00122 x_effective[i] = 0.0; 00123 y_effective[i] = 0.0; 00124 } 00125 00126 // put new input value in ring buffer x_buffer 00127 putInBuffer(x_buffer, x_); 00128 00129 // averaging over the last steps4avg values of x_buffer 00130 calculateSmoothValues(x_buffer, s4avg, x_smooth); 00131 00132 // calculate controller values based on smoothed input values 00133 calculateControllerValues(x_smooth, y_); 00134 00135 // put new output value in ring buffer y_buffer 00136 putInBuffer(y_buffer, y_); 00137 00138 // calculate effective input/output, which is (actual-steps4delay) element of buffer 00139 calculateDelayedValues(x_buffer, s4delay, x_effective); 00140 calculateDelayedValues(y_buffer, s4delay, y_effective); 00141 00142 // learn controller with effective input/output 00143 learn(x_, x_effective, y_effective); 00144 00145 // learn model with actual input and effective output (that produced the actual input) 00146 learnModel(x_, y_effective); 00147 00148 // update step counter 00149 t++; 00150 }; 00151 00152 00153 /// performs one step without learning. Calulates motor commands from sensor inputs. 00154 virtual void stepNoLearning(const sensor* x_, int sensornumber, motor* y_, int motornumber){ 00155 for (int i = 0; i < NUMBER_CHANNELS; i++) 00156 { 00157 x_smooth[i] = 0.0; 00158 } 00159 00160 00161 // put new input value in ring buffer x_buffer 00162 putInBuffer(x_buffer, x_); 00163 00164 // averaging over the last steps4avg values of x_buffer 00165 calculateSmoothValues(x_buffer, s4avg, x_smooth); 00166 00167 // calculate controller values based on smoothed input values 00168 calculateControllerValues(x_smooth, y_); 00169 00170 // put new output value in ring buffer y_buffer 00171 putInBuffer(y_buffer, y_); 00172 00173 // update step counter 00174 t++; 00175 }; 00176 00177 virtual int getInternalParamNames(paramkey*& keylist){ 00178 return 0; 00179 } 00180 00181 virtual int getInternalParams(paramval* vallist, int length){ 00182 int len = 0; 00183 return len; 00184 } 00185 00186 protected: 00187 virtual void inverseMatrix(double** Q, double** Q_1){ 00188 // Berechne Inverse von Q 00189 00190 // only if NUMBER_CHANNELS<4 00191 assert(NUMBER_CHANNELS<4); 00192 if (NUMBER_CHANNELS==2){ 00193 00194 double det = Q[0][0] * Q[1][1] - Q[0][1] * Q[1][0]; 00195 Q_1[0][0] = Q[1][1] / det; 00196 Q_1[1][1] = Q[0][0] / det; 00197 Q_1[0][1] = -Q[0][1] / det; 00198 Q_1[1][0] = -Q[1][0] / det; 00199 00200 } 00201 00202 00203 if (NUMBER_CHANNELS==3){ 00204 00205 double detQ=0 ; 00206 double** Q_adjoint=Q_buf1; 00207 00208 //calculate the inverse of Q 00209 Q_adjoint[0][0]=Q[1][1]*Q[2][2]-Q[1][2]*Q[2][1] ; 00210 Q_adjoint[0][1]=(Q[1][2]*Q[2][0]-Q[1][0]*Q[2][2]) ; 00211 Q_adjoint[0][2]=Q[1][0]*Q[2][1]-Q[1][1]*Q[2][0] ; 00212 Q_adjoint[1][0]=(Q[2][1]*Q[0][2]-Q[0][1]*Q[2][2]) ; 00213 Q_adjoint[1][1]=Q[0][0]*Q[2][2]-Q[0][2]*Q[2][0] ; 00214 Q_adjoint[1][2]=(Q[0][1]*Q[2][0]-Q[0][0]*Q[2][1]) ; 00215 Q_adjoint[2][0]=Q[0][1]*Q[1][2]-Q[1][1]*Q[0][2] ; 00216 Q_adjoint[2][1]=(Q[1][0]*Q[0][2]-Q[0][0]*Q[1][2]) ; 00217 Q_adjoint[2][2]=Q[0][0]*Q[1][1]-Q[0][1]*Q[1][0] ; 00218 detQ=Q[0][0]*Q_adjoint[0][0]+Q[0][1]*Q_adjoint[0][1]+Q[0][2]*Q_adjoint[0][2] ; 00219 for(int i=0; i<NUMBER_CHANNELS; i++){ 00220 for(int j=0; j<NUMBER_CHANNELS; j++) { 00221 Q_1[i][j]=(Q_adjoint[j][i])/detQ ; 00222 } 00223 } 00224 } 00225 }; 00226 00227 00228 virtual double calculateE(const double *x_, const double *x_delay, const double *y_delay) { 00229 double** Q = Q_buf1; 00230 double** Q_1= Q_buf2; 00231 00232 00233 // Calculate z based on the delayed inputs since the present input x is 00234 // produced by the outputs tau time steps before 00235 // which on their hand are y = K(x_D) 00236 // due to the delay in the feed back loop. 00237 00238 for (int i = 0; i < NUMBER_CHANNELS; i++) 00239 { 00240 z[i] = h[i]; 00241 for (int j = 0; j < NUMBER_CHANNELS; j++) 00242 { 00243 z[i] += C[i][j] * x_delay[j]; 00244 } 00245 } 00246 00247 // Berechne Matrix L 00248 for (int i = 0; i < NUMBER_CHANNELS; i++) 00249 { 00250 for (int j = 0; j < NUMBER_CHANNELS; j++) 00251 { 00252 L[i][j] = 0.0; 00253 for (int k = 0; k < NUMBER_CHANNELS; k++) 00254 { 00255 L[i][j] += A[i][k] * g_s(z[k]) * C[k][j]; 00256 } 00257 } 00258 } 00259 00260 // Berechne Q=LL^T 00261 for (int i = 0; i < NUMBER_CHANNELS; i++) 00262 { 00263 for (int j = 0; j < NUMBER_CHANNELS; j++) 00264 { 00265 Q[i][j] = 0.0; 00266 for (int k = 0; k < NUMBER_CHANNELS; k++) 00267 { 00268 Q[i][j] += L[i][k] * L[j][k]; 00269 } 00270 if (i == j) 00271 Q[i][j] += rho / NUMBER_CHANNELS; // Regularisation 00272 } 00273 } 00274 00275 // Berechne Inverse von Q 00276 inverseMatrix(Q, Q_1); 00277 00278 // Berechne xsi 00279 for (int i = 0; i < NUMBER_CHANNELS; i++) 00280 { 00281 xsi4E[i] = x_[i]; 00282 for (int j = 0; j < NUMBER_CHANNELS; j++) 00283 { 00284 xsi4E[i] -= A[i][j] * y_delay[j]; // using old y value -> no influence of changes (adding delta) in controller 00285 // very very strange!!!! 00286 //xsi4E[i] -= A[i][j] * g(z[j]); // using recalculating y -> influence of changes (adding delta) in controller 00287 } 00288 } 00289 double E = 0; 00290 for (int i = 0; i < NUMBER_CHANNELS; i++) 00291 { 00292 for (int j = 0; j < NUMBER_CHANNELS; j++) 00293 { 00294 E += xsi4E[i] * Q_1[i][j] * xsi4E[j]; 00295 } 00296 } 00297 00298 double E_s=0; 00299 for (int i = 0; i < NUMBER_CHANNELS; i++) 00300 { 00301 for (int j = 0; j < NUMBER_CHANNELS; j++) 00302 { 00303 E_s += (A[i][j]*g(z[j]) - x_[i]) * (A[i][j]*g(z[j]) - x_[i]); 00304 } 00305 } 00306 00307 E=(1-m)*E+ m*E_s; 00308 return (E); 00309 00310 }; 00311 00312 00313 virtual void learn(const double *x_, const double *x_delay, const double *y_delay) 00314 { 00315 // double A_update[NUMBER_CHANNELS][NUMBER_CHANNELS]; 00316 double** C_update=allocateMatrix(NUMBER_CHANNELS,NUMBER_CHANNELS); 00317 double* h_update=(double*)malloc(NUMBER_CHANNELS*sizeof(double)); 00318 00319 double E_0 = calculateE(x_,x_delay, y_delay); 00320 00321 // calculate updates for h,C,A 00322 for (int i = 0; i < NUMBER_CHANNELS; i++) 00323 { 00324 h[i] += delta; 00325 h_update[i] = -eps * (calculateE(x_,x_delay, y_delay) - E_0) / delta; 00326 h[i] -= delta; 00327 for (int j = 0; j < NUMBER_CHANNELS; j++) 00328 { 00329 C[i][j] += delta; 00330 C_update[i][j] = -eps * (calculateE(x_,x_delay, y_delay) - E_0) / delta; 00331 C[i][j] -= delta; 00332 //A[i][j] += delta; 00333 //A_update[i][j] = -eps * a_factor * (calculateE(x_delay, y_delay) - E_0) / delta; 00334 //A[i][j] -= delta; 00335 } 00336 } 00337 00338 // apply updates to h,C,A 00339 for (int i = 0; i < NUMBER_CHANNELS; i++) 00340 { 00341 h[i] += squash(h_update[i]); 00342 for (int j = 0; j < NUMBER_CHANNELS; j++) 00343 { 00344 C[i][j] += squash(C_update[i][j]); 00345 //A[i][j] += squash(A_update[i][j]); 00346 } 00347 } 00348 freeMatrix(C_update, NUMBER_CHANNELS); 00349 free(h_update); 00350 }; 00351 00352 00353 virtual void learnModel(const double *x_actual, double *y_effective){ 00354 /* double z[N_output]; 00355 for(int i=0; i<N_output; i++){ 00356 z[i]=h[i]; 00357 for(int j=0; j<N_input; j++) { 00358 z[i]+=C[i][j]*x_D[j]; 00359 } 00360 } 00361 */ 00362 // Berechne xsi 00363 for(int i=0; i<NUMBER_CHANNELS; i++){ 00364 xsi4Model[i]=x_actual[i]; 00365 for(int j=0; j<NUMBER_CHANNELS; j++){ 00366 xsi4Model[i]-= A[i][j]*y_effective[j]; 00367 } 00368 } 00369 00370 for(int i=0; i<NUMBER_CHANNELS; i++){ 00371 for (int j=0; j<NUMBER_CHANNELS; j++){ 00372 A[i][j]+=squash( (factor_a*eps*0.2) *xsi4Model[i] * y_effective[j]) ; 00373 } 00374 } 00375 }; 00376 00377 00378 00379 00380 00381 /// calculate delayed values 00382 virtual void calculateDelayedValues(double** source, paramval number_steps_of_delay_, double *target) 00383 { 00384 // number_steps_of_delay_ must not be larger than BUFFER_SIZE 00385 assert ((int)number_steps_of_delay_ < BUFFER_SIZE); 00386 00387 // get delayed value from ring buffer 00388 00389 for (int i = 0; i < NUMBER_CHANNELS; i++) 00390 { 00391 target[i] = source[(t - (int)number_steps_of_delay_ + BUFFER_SIZE) % BUFFER_SIZE][i]; 00392 } 00393 }; 00394 00395 virtual void calculateSmoothValues(double** source, paramval number_steps_for_averaging_, double *target) 00396 { 00397 // number_steps_for_averaging_ must not be larger than BUFFER_SIZE 00398 assert ((int)number_steps_for_averaging_ <= BUFFER_SIZE); 00399 00400 for (int i = 0; i < NUMBER_CHANNELS; i++) 00401 { 00402 target[i] = 0.0; 00403 for (int k = 0; k < (int)number_steps_for_averaging_; k++) 00404 { 00405 target[i] += source[(t - k + BUFFER_SIZE) % BUFFER_SIZE][i]/ (double) (number_steps_for_averaging_); 00406 } 00407 } 00408 }; 00409 00410 00411 00412 /// calculate controller ouptus 00413 virtual void calculateControllerValues(double *x_smooth, double *y) 00414 { 00415 double z[NUMBER_CHANNELS]; 00416 00417 for (int i = 0; i < NUMBER_CHANNELS; i++) 00418 { 00419 z[i] = h[i]; 00420 for (int j = 0; j < NUMBER_CHANNELS; j++) 00421 { 00422 z[i] += C[i][j] * x_smooth[j]; 00423 } 00424 y[i] = g(z[i]); 00425 } 00426 }; 00427 00428 00429 // put new value in ring buffer 00430 virtual void putInBuffer(double** buffer, const double *values){ 00431 for (int i = 0; i < NUMBER_CHANNELS; i++) 00432 { 00433 buffer[(t+BUFFER_SIZE)% BUFFER_SIZE][i] = values[i]; 00434 } 00435 }; 00436