classicreinforce.h

Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2005-2011 by                                            *
00003  *    Georg Martius  <georg dot martius at web dot de>                     *
00004  *    Ralf Der       <ralfder at mis dot mpg dot de>                       *
00005  *                                                                         *
00006  *   ANY COMMERCIAL USE FORBIDDEN!                                         *
00007  *   LICENSE:                                                              *
00008  *   This work is licensed under the Creative Commons                      *
00009  *   Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of   *
00010  *   this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ *
00011  *   or send a letter to Creative Commons, 543 Howard Street, 5th Floor,   *
00012  *   San Francisco, California, 94105, USA.                                *
00013  *                                                                         *
00014  *   This program is distributed in the hope that it will be useful,       *
00015  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00016  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
00017  *                                                                         *
00018  ***************************************************************************/
00019 #ifndef __CLASSICREINFORCE_H
00020 #define __CLASSICREINFORCE_H
00021 
00022 #include <selforg/abstractcontroller.h>
00023 
00024 #include <assert.h>
00025 #include <cmath>
00026 
00027 #include <selforg/matrix.h>
00028 #include <selforg/noisegenerator.h>
00029 #include <selforg/qlearning.h>
00030 
00031 typedef struct ClassicReinforceConf {
00032   unsigned short buffersize; ///< size of the ringbuffers for sensors, motors,...
00033   int    numContext;    ///< number of context sensors (ignored)
00034   int reinforce_interval; ///<  time between consecutive reinforcement selections
00035   
00036   QLearning* qlearning;      ///< QLearning instance
00037 } ClassicReinforceConf;
00038 
00039 /**
00040  * class for robot controller 
00041  * using several feedforward networks (satelite) and one selforg controller
00042  */
00043 class ClassicReinforce : public AbstractController {
00044 
00045 public:
00046   ClassicReinforce(const ClassicReinforceConf& conf = getDefaultConf());
00047   virtual void init(int sensornumber, int motornumber, RandGen* randGen = 0);
00048 
00049   virtual ~ClassicReinforce();
00050 
00051   /// returns the number of sensors the controller was initialised with or 0 if not initialised
00052   virtual int getSensorNumber() const { return number_sensors; }
00053   /// returns the mumber of motors the controller was initialised with or 0 if not initialised
00054   virtual int getMotorNumber() const  { return number_motors; }
00055 
00056   /// performs one step (includes learning). 
00057   /// Calulates motor commands from sensor inputs.
00058   virtual void step(const sensor* , int number_sensors, motor* , int number_motors);
00059 
00060   /// performs one step without learning. Calulates motor commands from sensor inputs.
00061   virtual void stepNoLearning(const sensor* , int number_sensors, 
00062                               motor* , int number_motors);
00063 
00064   // !!!!!!!!!!!!!!!!!!! MISC STUFF !!!!!!!!
00065   
00066   /** enables/disables manual control, action_ is the sat network number to be used
00067       if mControl is false, action is ignored
00068    */
00069   void setManualControl(bool mControl, int action_ = 0);
00070 
00071 
00072   /************** CONFIGURABLE ********************************/
00073   virtual void notifyOnChange(const paramkey& key);
00074 
00075   /**** STOREABLE ****/
00076   /** stores the controller values to a given file. */
00077   virtual bool store(FILE* f) const;
00078   /** loads the controller values from a given file. */
00079   virtual bool restore(FILE* f);  
00080 
00081   /**** INSPECTABLE ****/
00082   virtual std::list<iparamkey> getInternalParamNames() const;
00083   virtual std::list<iparamval> getInternalParams() const;  
00084   virtual std::list<ILayer> getStructuralLayers() const;
00085   virtual std::list<IConnection> getStructuralConnections() const;
00086 
00087   static ClassicReinforceConf getDefaultConf(){
00088     ClassicReinforceConf c;
00089     c.buffersize=10;
00090     c.numContext=0;
00091     c.reinforce_interval=10;
00092     c.qlearning=0;
00093     return c;
00094   }
00095 
00096 
00097 protected:
00098   unsigned short number_sensors;
00099   unsigned short number_motors;
00100   
00101   // sensor, sensor-derivative and motor values storage
00102   unsigned short buffersize;
00103   matrix::Matrix* x_buffer;
00104   matrix::Matrix* y_buffer;
00105   matrix::Matrix* x_context_buffer;
00106 
00107   bool manualControl;          ///< True if actions (sats) are selected manually
00108 
00109   int action;                  ///< action
00110   int oldaction;               ///< old action
00111   int state;                   ///< current state
00112   double reward;               ///< current reward
00113   double oldreward;            ///< old reward (nicer for plotting)
00114 
00115   ClassicReinforceConf conf;
00116   bool initialised;
00117   int t;
00118   int managementInterval;       ///< interval between subsequent management calls
00119 
00120   /// returns number of state, to be overwritten
00121   virtual int getStateNumber() = 0;
00122   
00123   /// returns state, to be overwritten
00124   virtual int calcState() = 0;
00125 
00126   /// returns number of actions, to be overwritten
00127   virtual int getActionNumber() = 0;
00128   /// returns action Matrix from discrete actions, to be overwritten
00129   virtual matrix::Matrix calcMotor(int action) = 0;
00130 
00131   /// returns the reinforcement (reward), to be overwritten
00132   virtual double calcReinforcement() = 0;
00133 
00134   // put new value in ring buffer
00135   void putInBuffer(matrix::Matrix* buffer, const matrix::Matrix& vec, int delay = 0);
00136 
00137   /// puts the sensors in the ringbuffer
00138   virtual void fillSensorBuffer(const sensor* x_, int number_sensors);
00139   /// puts the motors in the ringbuffer
00140   virtual void fillMotorBuffer(const motor* y_, int number_motors);
00141 
00142   /// handles inhibition damping etc.
00143   virtual void management();
00144  
00145 };
00146 
00147 #endif