Robot Simulator of the Robotics Group for Self-Organization of Control  0.8.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
qlearning.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005-2011 LpzRobots development team *
3  * Georg Martius <georg dot martius at web dot de> *
4  * Frank Guettler <guettler at informatik dot uni-leipzig dot de *
5  * Frank Hesse <frank at nld dot ds dot mpg dot de> *
6  * Ralf Der <ralfder at mis dot mpg dot de> *
7  * *
8  * This program is free software; you can redistribute it and/or modify *
9  * it under the terms of the GNU General Public License as published by *
10  * the Free Software Foundation; either version 2 of the License, or *
11  * (at your option) any later version. *
12  * *
13  * This program is distributed in the hope that it will be useful, *
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16  * GNU General Public License for more details. *
17  * *
18  * You should have received a copy of the GNU General Public License *
19  * along with this program; if not, write to the *
20  * Free Software Foundation, Inc., *
21  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
22  * *
23  ***************************************************************************/
24 #ifndef __QLEARNING_H
25 #define __QLEARNING_H
26 
27 #include "matrix.h"
28 #include "configurable.h"
29 #include "storeable.h"
30 #include "randomgenerator.h"
31 
32 /// implements QLearning
33 class QLearning : public Configurable, public Storeable {
34 public:
35  /**
36  \param eps learning rate (typically 0.1)
37  \param discount discount factor for Q-values (typically 0.9)
38  \param exploration exploration rate (typically 0.02)
39  \param eligibility number of steps to update backwards in time
40  \param random_initQ if true Q table is filled with small random numbers at the start (default: false)
41  \param useSARSA if true, use SARSA strategy otherwise qlearning (default: false)
42  \param tau number of time steps to average over reward for col_rew
43  */
44  QLearning(double eps, double discount, double exploration, int eligibility,
45  bool random_initQ = false, bool useSARSA = false, int tau=1000);
46 
47  virtual ~QLearning();
48 
49  /** initialisation with the given number of action and states
50  @param actionDim number of actions
51  @param stateDim number of states
52  @param unit_map if 0 the parametes are choosen randomly.
53  Otherwise the model is initialised to represent a unit_map with the given response strength.
54  */
55  virtual void init(unsigned int stateDim, unsigned int actionDim, RandGen* randGen = 0);
56 
57  /** selection of action given current state.
58  The policy is to take the actions with the highest value,
59  or a random action at the rate of exploration
60  */
61  virtual unsigned int select (unsigned int state);
62 
63  /** selection of action given current state.
64  The policy is to sample from the above average actions, with bias
65  to the old action (also exploration included).
66  */
67  virtual unsigned int select_sample (unsigned int state);
68  /// select with preference to old (90% if good) and 30% second best
69  virtual unsigned int select_keepold (unsigned int state);
70 
71  /* performs learning and returns current expected reward.
72  \param state current state
73  \param action we select in current state
74  \param reward reinforcement we obtain in this state
75  \param learnRateFactor can be given to modify eps for this
76  learning step
77  */
78  virtual double learn (unsigned int state,
79  unsigned int action,
80  double reward,
81  double learnRateFactor = 1);
82 
83  /** returns the vector of values for all actions given the current state
84  */
85  matrix::Matrix getActionValues(unsigned int state);
86 
87 
88  /** tells the q learning that the agent was reset, so that it
89  forgets it memory. please note, that updating the Q-table is
90  one step later, so in case of a reward you should call learn one
91  more time before reset.
92  */
93  virtual void reset();
94 
95 
96  /// returns the number of states
97  virtual unsigned int getStateDim() const;
98  /// returns the number of actions
99  virtual unsigned int getActionDim() const;
100 
101  /// returns the collectedReward reward
102  virtual double getCollectedReward() const;
103 
104  /// expects a list of value,range and returns the associated state
105  static int valInCrossProd(const std::list<std::pair<int,int> >& vals);
106 
107  /// expects a list of ranges and a state/action and return the configuration
108  static std::list<int> ConfInCrossProd(const std::list<int>& ranges, int val);
109 
110  /// returns q table (mxn) == (states x actions)
111  virtual const matrix::Matrix& getQ() const {return Q;} ;
112 
113  virtual bool store(FILE* f) const;
114 
115  virtual bool restore(FILE* f);
116 
117 
118 protected:
119  double eps;
120  double discount;
121  double exploration;
122  double eligibility; // is used as integer (only for configration)
124 public:
125  bool useSARSA; ///< if true, use SARSA strategy otherwise qlearning
126 protected:
127  int tau; ///< time horizont for averaging the reward
128  matrix::Matrix Q; /// < Q table (mxn) == (states x actions)
129 
130 
131  int* actions; // ring buffer for actions
132  int* states; // ring buffer for states
133  double* rewards; // ring buffer for rewards
134  int ringbuffersize; // size of ring buffers, eligibility + 1
135  double* longrewards; // long ring buffer for rewards for collectedReward
136  int t; // time for ring buffers
138  double collectedReward; // sum over collected reward
139 
141 };
142 
143 
144 #endif
Matrix type.
Definition: matrix.h:65
int ringbuffersize
Definition: qlearning.h:134
bool random_initQ
Definition: qlearning.h:123
Interface for objects, that can be stored and restored to/from a file stream (binary).
Definition: storeable.h:33
int * actions
< Q table (mxn) == (states x actions)
Definition: qlearning.h:131
virtual void init(unsigned int stateDim, unsigned int actionDim, RandGen *randGen=0)
initialisation with the given number of action and states
Definition: qlearning.cpp:61
virtual ~QLearning()
Definition: qlearning.cpp:54
implements QLearning
Definition: qlearning.h:33
int tau
time horizont for averaging the reward
Definition: qlearning.h:127
int t
Definition: qlearning.h:136
virtual unsigned int select_sample(unsigned int state)
selection of action given current state.
Definition: qlearning.cpp:87
double exploration
Definition: qlearning.h:121
virtual unsigned int select_keepold(unsigned int state)
select with preference to old (90% if good) and 30% second best
Definition: qlearning.cpp:111
RandGen * randGen
Definition: qlearning.h:140
virtual double getCollectedReward() const
returns the collectedReward reward
Definition: qlearning.cpp:197
random generator with 48bit integer arithmentic
Definition: randomgenerator.h:34
virtual unsigned int getActionDim() const
returns the number of actions
Definition: qlearning.cpp:192
double discount
Definition: qlearning.h:120
virtual bool restore(FILE *f)
loads the object from the given file stream (ASCII preferred).
Definition: qlearning.cpp:232
matrix::Matrix Q
Definition: qlearning.h:128
virtual bool store(FILE *f) const
stores the object to the given file stream (ASCII preferred).
Definition: qlearning.cpp:226
static std::list< int > ConfInCrossProd(const std::list< int > &ranges, int val)
expects a list of ranges and a state/action and return the configuration
Definition: qlearning.cpp:214
bool useSARSA
if true, use SARSA strategy otherwise qlearning
Definition: qlearning.h:125
matrix::Matrix getActionValues(unsigned int state)
returns the vector of values for all actions given the current state
Definition: qlearning.cpp:147
double eligibility
Definition: qlearning.h:122
virtual const matrix::Matrix & getQ() const
returns q table (mxn) == (states x actions)
Definition: qlearning.h:111
double * rewards
Definition: qlearning.h:133
virtual double learn(unsigned int state, unsigned int action, double reward, double learnRateFactor=1)
Definition: qlearning.cpp:152
double eps
Definition: qlearning.h:119
virtual void reset()
tells the q learning that the agent was reset, so that it forgets it memory.
Definition: qlearning.cpp:184
bool initialised
Definition: qlearning.h:137
Abstact class for configurable objects.
Definition: configurable.h:81
QLearning(double eps, double discount, double exploration, int eligibility, bool random_initQ=false, bool useSARSA=false, int tau=1000)
Definition: qlearning.cpp:29
static int valInCrossProd(const std::list< std::pair< int, int > > &vals)
expects a list of value,range and returns the associated state
Definition: qlearning.cpp:202
virtual unsigned int select(unsigned int state)
selection of action given current state.
Definition: qlearning.cpp:71
int * states
Definition: qlearning.h:132
double * longrewards
Definition: qlearning.h:135
virtual unsigned int getStateDim() const
returns the number of states
Definition: qlearning.cpp:188
double collectedReward
Definition: qlearning.h:138