Robotsystem of the Robot Group Leipzig: classicreinforce.h Source File

00001 /***************************************************************************
00002  *   Copyright (C) 2008 by Robot Group Leipzig                             *
00003  *    martius@informatik.uni-leipzig.de                                    *
00004  *    der@informatik.uni-leipzig.de                                        *
00005  *                                                                         *
00006  *   ANY COMMERCIAL USE FORBIDDEN!                                         *
00007  *   LICENSE:                                                              *
00008  *   This work is licensed under the Creative Commons                      *
00009  *   Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of   *
00010  *   this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ *
00011  *   or send a letter to Creative Commons, 543 Howard Street, 5th Floor,   *
00012  *   San Francisco, California, 94105, USA.                                *
00013  *                                                                         *
00014  *   This program is distributed in the hope that it will be useful,       *
00015  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00016  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
00017  *                                                                         *
00018  *   This version of classicreinforce has the following features           *
00019  *    robot is controlled with decrete actions                             *
00020  *    external (e.g. keyboard) control possible                            *
00021  *    Q-learning used for action selection                                 *
00022  *     states: x, defined by subclass                                      *
00023  *     action: y, defined by subclass                                      *
00024  *                                                                         *
00025  *   $Log: classicreinforce.h,v $
00026  *   Revision 1.4  2008/07/01 12:56:21  martius
00027  *   *** empty log message ***
00028  *
00029  *   Revision 1.3  2008/05/30 11:58:27  martius
00030  *   use cmath instead of math.h
00031  *
00032  *   Revision 1.2  2008/04/17 14:54:44  martius
00033  *   randomGen added, which is a random generator with long period and an
00034  *    internal state. Each Agent has an instance and passed it to the controller
00035  *    and the wiring. This is good for
00036  *   a) repeatability on agent basis,
00037  *   b) parallel execution as done in ode_robots
00038  *
00039  *   Revision 1.1  2008/03/01 01:49:25  martius
00040  *   reinforcement learning with qlerning
00041  *
00042  *
00043  *
00044  ***************************************************************************/
00045 #ifndef __CLASSICREINFORCE_H
00046 #define __CLASSICREINFORCE_H
00047 
00048 #include <selforg/abstractcontroller.h>
00049 
00050 #include <assert.h>
00051 #include <cmath>
00052 
00053 #include <selforg/matrix.h>
00054 #include <selforg/noisegenerator.h>
00055 #include <selforg/qlearning.h>
00056 
00057 typedef struct ClassicReinforceConf {
00058   unsigned short buffersize; ///< size of the ringbuffers for sensors, motors,...
00059   int    numContext;    ///< number of context sensors (ignored)
00060   int reinforce_interval; ///<  time between consecutive reinforcement selections
00061   
00062   QLearning* qlearning;      ///< QLearning instance
00063 } ClassicReinforceConf;
00064 
00065 /**
00066  * class for robot controller 
00067  * using several feedforward networks (satelite) and one selforg controller
00068  */
00069 class ClassicReinforce : public AbstractController {
00070 
00071 public:
00072   ClassicReinforce(const ClassicReinforceConf& conf = getDefaultConf());
00073   virtual void init(int sensornumber, int motornumber, RandGen* randGen = 0);
00074 
00075   virtual ~ClassicReinforce();
00076 
00077   /// returns the number of sensors the controller was initialised with or 0 if not initialised
00078   virtual int getSensorNumber() const { return number_sensors; }
00079   /// returns the mumber of motors the controller was initialised with or 0 if not initialised
00080   virtual int getMotorNumber() const  { return number_motors; }
00081 
00082   /// performs one step (includes learning). 
00083   /// Calulates motor commands from sensor inputs.
00084   virtual void step(const sensor* , int number_sensors, motor* , int number_motors);
00085 
00086   /// performs one step without learning. Calulates motor commands from sensor inputs.
00087   virtual void stepNoLearning(const sensor* , int number_sensors, 
00088                               motor* , int number_motors);
00089 
00090   // !!!!!!!!!!!!!!!!!!! MISC STUFF !!!!!!!!
00091   
00092   /** enables/disables manual control, action_ is the sat network number to be used
00093       if mControl is false, action is ignored
00094    */
00095   void setManualControl(bool mControl, int action_ = 0);
00096 
00097 
00098   /************** CONFIGURABLE ********************************/
00099   virtual paramval getParam(const paramkey& key) const;
00100   virtual bool setParam(const paramkey& key, paramval val);
00101   virtual paramlist getParamList() const;
00102 
00103 
00104   /**** STOREABLE ****/
00105   /** stores the controller values to a given file. */
00106   virtual bool store(FILE* f) const;
00107   /** loads the controller values from a given file. */
00108   virtual bool restore(FILE* f);  
00109 
00110   /**** INSPECTABLE ****/
00111   virtual std::list<iparamkey> getInternalParamNames() const;
00112   virtual std::list<iparamval> getInternalParams() const;  
00113   virtual std::list<ILayer> getStructuralLayers() const;
00114   virtual std::list<IConnection> getStructuralConnections() const;
00115 
00116   static ClassicReinforceConf getDefaultConf(){
00117     ClassicReinforceConf c;
00118     c.buffersize=10;
00119     c.numContext=0;
00120     c.reinforce_interval=10;
00121     c.qlearning=0;
00122     return c;
00123   }
00124 
00125 
00126 protected:
00127   unsigned short number_sensors;
00128   unsigned short number_motors;
00129   
00130   // sensor, sensor-derivative and motor values storage
00131   unsigned short buffersize;
00132   matrix::Matrix* x_buffer;
00133   matrix::Matrix* y_buffer;
00134   matrix::Matrix* x_context_buffer;
00135 
00136   bool manualControl;          ///< True if actions (sats) are selected manually
00137 
00138   int action;                  ///< action
00139   int oldaction;               ///< old action
00140   int state;                   ///< current state
00141   double reward;               ///< current reward
00142   double oldreward;            ///< old reward (nicer for plotting)
00143 
00144   ClassicReinforceConf conf;
00145   bool initialised;
00146   int t;
00147   int managementInterval;       ///< interval between subsequent management calls
00148 
00149   /// returns number of state, to be overwritten
00150   virtual int getStateNumber() = 0;
00151   
00152   /// returns state, to be overwritten
00153   virtual int calcState() = 0;
00154 
00155   /// returns number of actions, to be overwritten
00156   virtual int getActionNumber() = 0;
00157   /// returns action Matrix from discrete actions, to be overwritten
00158   virtual matrix::Matrix calcMotor(int action) = 0;
00159 
00160   /// returns the reinforcement (reward), to be overwritten
00161   virtual double calcReinforcement() = 0;
00162 
00163   // put new value in ring buffer
00164   void putInBuffer(matrix::Matrix* buffer, const matrix::Matrix& vec, int delay = 0);
00165 
00166   /// puts the sensors in the ringbuffer
00167   virtual void fillSensorBuffer(const sensor* x_, int number_sensors);
00168   /// puts the motors in the ringbuffer
00169   virtual void fillMotorBuffer(const motor* y_, int number_motors);
00170 
00171   /// handles inhibition damping etc.
00172   virtual void management();
00173  
00174 };
00175 
00176 #endif