Robot Simulator of the Robotics Group for Self-Organization of Control  0.8.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
multireinforce.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005-2011 by *
3  * Georg Martius <georg dot martius at web dot de> *
4  * Ralf Der <ralfder at mis dot mpg dot de> *
5  * *
6  * ANY COMMERCIAL USE FORBIDDEN! *
7  * LICENSE: *
8  * This work is licensed under the Creative Commons *
9  * Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of *
10  * this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ *
11  * or send a letter to Creative Commons, 543 Howard Street, 5th Floor, *
12  * San Francisco, California, 94105, USA. *
13  * *
14  * This program is distributed in the hope that it will be useful, *
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
17  * *
18  ***************************************************************************/
19 #ifndef __MULTIREINFORCE_H
20 #define __MULTIREINFORCE_H
21 
22 #include <selforg/abstractcontroller.h>
23 #include <selforg/multilayerffnn.h>
24 
25 #include <assert.h>
26 #include <math.h>
27 
28 #include <selforg/matrix.h>
29 #include <selforg/noisegenerator.h>
30 #include <selforg/multilayerffnn.h>
31 #include <selforg/qlearning.h>
32 
33 typedef struct MultiReinforceConf {
34  unsigned short buffersize; ///< size of the ringbuffers for sensors, motors,...
35  int numContext; ///< number of context sensors (ignored)
36  std::list<std::string> satFiles; /// filenames for sat networks
37  int numSats; ///< number of satelite networks (derived from length of files
38  bool useDerive; ///< input to sat network includes derivatives
39  bool useY; ///< input to sat network includes y (motor values)
40 // double tauE1; ///< time horizont for short averaging error
41 // double tauH; ///< hystersis time (time an state is kept even another one seams right)
42 // double tauI; ///< maximal waiting time for state change if action was changed
43  int reinforce_interval; ///< time between consecutive reinforcement selections
44 
45  QLearning* qlearning; ///< QLearning instance
46  matrix::Matrix* actioncorrel; /// correlation matrix of actions
48 
49 /// Satelite network struct
50 typedef struct Sat {
51  Sat(MultiLayerFFNN* _net, double _eps);
53  double eps;
54  double lifetime;
55 } Sat;
56 
57 /**
58  * class for robot controller
59  * using several feedforward networks (satelite) and one selforg controller
60  */
62 
63 public:
65  virtual void init(int sensornumber, int motornumber, RandGen* randGen = 0);
66 
67  virtual ~MultiReinforce();
68 
69  /// returns the number of sensors the controller was initialised with or 0 if not initialised
70  virtual int getSensorNumber() const { return number_sensors; }
71  /// returns the mumber of motors the controller was initialised with or 0 if not initialised
72  virtual int getMotorNumber() const { return number_motors; }
73 
74  /// performs one step (includes learning).
75  /// Calulates motor commands from sensor inputs.
76  virtual void step(const sensor* , int number_sensors, motor* , int number_motors);
77 
78  /// performs one step without learning. Calulates motor commands from sensor inputs.
79  virtual void stepNoLearning(const sensor* , int number_sensors,
80  motor* , int number_motors);
81 
82  // !!!!!!!!!!!!!!!!!!! MISC STUFF !!!!!!!!
83 
84  /// restores the sat networks from seperate files
85  static std::list<std::string> createFileList(const char* filestem, int n);
86  /// restores the sat networks from seperate files
87  void restoreSats(const std::list<std::string>& files);
88  /// stores the sats into the given files
89  void storeSats(const std::list<std::string>& files);
90 
91 
92  /** enables/disables manual control, action_ is the sat network number to be used
93  if mControl is false, action is ignored
94  */
95  void setManualControl(bool mControl, int action_ = 0);
96 
97 
98  /************** CONFIGURABLE ********************************/
99  virtual void notifyOnChange(const paramkey& key);
100 
101  /**** STOREABLE ****/
102  /** stores the controller values to a given file. */
103  virtual bool store(FILE* f) const;
104  /** loads the controller values from a given file. */
105  virtual bool restore(FILE* f);
106 
107  /**** INSPECTABLE ****/
108  virtual std::list<iparamkey> getInternalParamNames() const;
109  virtual std::list<iparamval> getInternalParams() const;
110  virtual std::list<ILayer> getStructuralLayers() const;
111  virtual std::list<IConnection> getStructuralConnections() const;
112 
115  c.buffersize=10;
116  c.numContext=0;
117  c.numSats=0; // has to be changed by user!
118  c.useDerive=false;
119  c.useY=true;
120  c.qlearning=0;
121 // c.tauE1=25;
122 // c.tauH=10;
123 // c.tauI=50;
124  c.reinforce_interval=10;
125  c.actioncorrel=0;
126  c.qlearning=0;
127  return c;
128  }
129 
130 
131 protected:
132  unsigned short number_sensors;
133  unsigned short number_motors;
134 
135  // sensor, sensor-derivative and motor values storage
136  unsigned short buffersize;
141 
142  std::vector <Sat> sats; ///< satelite networks
143  bool manualControl; ///< True if actions (sats) are selected manually
144  matrix::Matrix nomSatOutput; ///< norminal output of satelite networks (x_t,y_t)^T
145  matrix::Matrix satInput; ///< input to satelite networks (x_{t-1}, xp_{t-1}, y_{t-1})^T
146  int action; ///< index of controlling network
147  int newaction; ///< index of new controlling network
148  int oldaction; ///< index of old controlling network
149  int state; ///< current state
150  double reward; ///< current reward
151  double oldreward; ///< old reward (nicer for plotting)
152  int phase; ///< current phase of the controller: 0: action just selected 1:state changed first time 2:state changed second time
153  int phasecnt; ///< counts number of steps in one phase.
154 
155  matrix::Matrix satErrors; ///< actual errors of the sats
156  matrix::Matrix satAvgErrors; ///< averaged errors of the sats
157  matrix::Matrix statesbins; ///< bins with counts for each state
158 
161  int t;
162  int managementInterval; ///< interval between subsequent management calls
163 
164  /// returns number of state, to be overwritten
165  virtual int getStateNumber() = 0;
166 
167  /// returns state, to be overwritten
168  virtual int calcState() = 0;
169 
170  /// returns the reinforcement (reward), to be overwritten
171  virtual double calcReinforcement() = 0;
172 
173 
174  // put new value in ring buffer
175  void putInBuffer(matrix::Matrix* buffer, const matrix::Matrix& vec, int delay = 0);
176 
177  /// puts the sensors in the ringbuffer
178  virtual void fillSensorBuffer(const sensor* x_, int number_sensors);
179  /// puts the motors in the ringbuffer
180  virtual void fillMotorBuffer(const motor* y_, int number_motors);
181 
182 
183  /// handles inhibition damping etc.
184  virtual void management();
185 
186  /** Calculates first and second derivative and returns both in on matrix (above).
187  We use simple discrete approximations:
188  \f[ f'(x) = (f(x) - f(x-1)) / 2 \f]
189  \f[ f''(x) = f(x) - 2f(x-1) + f(x-2) \f]
190  where we have to go into the past because we do not have f(x+1). The scaling can be neglegted.
191  */
192  matrix::Matrix calcDerivatives(const matrix::Matrix* buffer, int delay);
193 
194 };
195 
196 #endif
matrix::Matrix satInput
input to satelite networks (x_{t-1}, xp_{t-1}, y_{t-1})^T
Definition: multireinforce.h:145
int managementInterval
interval between subsequent management calls
Definition: multireinforce.h:162
virtual void fillSensorBuffer(const sensor *x_, int number_sensors)
puts the sensors in the ringbuffer
Definition: multireinforce.cpp:284
Matrix type.
Definition: matrix.h:65
virtual std::list< IConnection > getStructuralConnections() const
Specifies which parameter matrix forms a connection between layers (in terms of a neural network) The...
Definition: multireinforce.cpp:480
int newaction
index of new controlling network
Definition: multireinforce.h:147
unsigned short buffersize
Definition: multireinforce.h:136
bool useDerive
input to sat network includes derivatives
Definition: multireinforce.h:38
void storeSats(const std::list< std::string > &files)
stores the sats into the given files
Definition: multireinforce.cpp:414
MultiReinforce(const MultiReinforceConf &conf=getDefaultConf())
Definition: multireinforce.cpp:33
int phase
current phase of the controller: 0: action just selected 1:state changed first time 2:state changed s...
Definition: multireinforce.h:152
unsigned short number_sensors
Definition: multireinforce.h:132
Abstract class for robot controller (with some basic functionality).
Definition: abstractcontroller.h:46
matrix::Matrix satAvgErrors
averaged errors of the sats
Definition: multireinforce.h:156
matrix::Matrix * y_buffer
Definition: multireinforce.h:139
unsigned short buffersize
size of the ringbuffers for sensors, motors,...
Definition: multireinforce.h:34
static MultiReinforceConf getDefaultConf()
Definition: multireinforce.h:113
implements QLearning
Definition: qlearning.h:33
charArray paramkey
Definition: avrtypes.h:36
class for robot controller using several feedforward networks (satelite) and one selforg controller ...
Definition: multireinforce.h:61
virtual int getMotorNumber() const
returns the mumber of motors the controller was initialised with or 0 if not initialised ...
Definition: multireinforce.h:72
virtual double calcReinforcement()=0
returns the reinforcement (reward), to be overwritten
matrix::Matrix * xp_buffer
Definition: multireinforce.h:138
bool initialised
Definition: multireinforce.h:160
virtual std::list< iparamval > getInternalParams() const
Definition: multireinforce.cpp:458
double sensor
Definition: types.h:29
random generator with 48bit integer arithmentic
Definition: randomgenerator.h:34
int t
Definition: multireinforce.h:161
bool manualControl
True if actions (sats) are selected manually.
Definition: multireinforce.h:143
Satelite network struct.
Definition: multireinforce.h:50
virtual void step(const sensor *, int number_sensors, motor *, int number_motors)
performs one step (includes learning).
Definition: multireinforce.cpp:115
matrix::Matrix satErrors
actual errors of the sats
Definition: multireinforce.h:155
virtual bool store(FILE *f) const
stores the controller values to a given file.
Definition: multireinforce.cpp:340
virtual void fillMotorBuffer(const motor *y_, int number_motors)
puts the motors in the ringbuffer
Definition: multireinforce.cpp:298
struct MultiReinforceConf MultiReinforceConf
QLearning * qlearning
QLearning instance.
Definition: multireinforce.h:45
virtual void stepNoLearning(const sensor *, int number_sensors, motor *, int number_motors)
performs one step without learning. Calulates motor commands from sensor inputs.
Definition: multireinforce.cpp:274
virtual int getSensorNumber() const
returns the number of sensors the controller was initialised with or 0 if not initialised ...
Definition: multireinforce.h:70
double sensor
Definition: abstractcontroller.h:48
static std::list< std::string > createFileList(const char *filestem, int n)
restores the sat networks from seperate files
Definition: multireinforce.cpp:433
virtual int getStateNumber()=0
returns number of state, to be overwritten
virtual void notifyOnChange(const paramkey &key)
Is called when a parameter was changes via setParam().
Definition: multireinforce.cpp:327
virtual void management()
handles inhibition damping etc.
Definition: multireinforce.cpp:323
int oldaction
index of old controlling network
Definition: multireinforce.h:148
double eps
Definition: multireinforce.h:53
void putInBuffer(matrix::Matrix *buffer, const matrix::Matrix &vec, int delay=0)
Definition: multireinforce.cpp:109
int reinforce_interval
time between consecutive reinforcement selections
Definition: multireinforce.h:43
virtual void init(int sensornumber, int motornumber, RandGen *randGen=0)
initialisation of the controller with the given sensor/ motornumber Must be called before use...
Definition: multireinforce.cpp:65
matrix::Matrix statesbins
bins with counts for each state
Definition: multireinforce.h:157
matrix::Matrix nomSatOutput
norminal output of satelite networks (x_t,y_t)^T
Definition: multireinforce.h:144
virtual bool restore(FILE *f)
loads the controller values from a given file.
Definition: multireinforce.cpp:359
void setManualControl(bool mControl, int action_=0)
enables/disables manual control, action_ is the sat network number to be used if mControl is false...
Definition: multireinforce.cpp:306
struct Sat Sat
Satelite network struct.
double lifetime
Definition: multireinforce.h:54
virtual std::list< iparamkey > getInternalParamNames() const
The list of the names of all internal parameters given by getInternalParams().
Definition: multireinforce.cpp:443
double reward
current reward
Definition: multireinforce.h:150
virtual int calcState()=0
returns state, to be overwritten
std::vector< Sat > sats
satelite networks
Definition: multireinforce.h:142
double motor
Definition: types.h:30
double oldreward
old reward (nicer for plotting)
Definition: multireinforce.h:151
matrix::Matrix * x_buffer
Definition: multireinforce.h:137
Sat(MultiLayerFFNN *_net, double _eps)
Definition: multireinforce.cpp:26
int numSats
filenames for sat networks
Definition: multireinforce.h:37
int numContext
number of context sensors (ignored)
Definition: multireinforce.h:35
virtual ~MultiReinforce()
Definition: multireinforce.cpp:50
multi layer neural network with configurable activation functions
Definition: multilayerffnn.h:35
int state
current state
Definition: multireinforce.h:149
double motor
Definition: abstractcontroller.h:49
int action
index of controlling network
Definition: multireinforce.h:146
matrix::Matrix * x_context_buffer
Definition: multireinforce.h:140
matrix::Matrix * actioncorrel
Definition: multireinforce.h:46
std::list< std::string > satFiles
Definition: multireinforce.h:36
matrix::Matrix calcDerivatives(const matrix::Matrix *buffer, int delay)
Calculates first and second derivative and returns both in on matrix (above).
Definition: multireinforce.cpp:315
MultiLayerFFNN * net
Definition: multireinforce.h:52
int phasecnt
counts number of steps in one phase.
Definition: multireinforce.h:153
void restoreSats(const std::list< std::string > &files)
restores the sat networks from seperate files
Definition: multireinforce.cpp:394
int c
Definition: hexapod.cpp:56
virtual std::list< ILayer > getStructuralLayers() const
Specifies which parameter vector forms a structural layer (in terms of a neural network) The ordering...
Definition: multireinforce.cpp:472
unsigned short number_motors
Definition: multireinforce.h:133
MultiReinforceConf conf
Definition: multireinforce.h:159
Definition: multireinforce.h:33
bool useY
input to sat network includes y (motor values)
Definition: multireinforce.h:39