00001 /*************************************************************************** 00002 * Copyright (C) 2005 by Robot Group Leipzig * 00003 * martius@informatik.uni-leipzig.de * 00004 * fhesse@informatik.uni-leipzig.de * 00005 * der@informatik.uni-leipzig.de * 00006 * * 00007 * This program is free software; you can redistribute it and/or modify * 00008 * it under the terms of the GNU General Public License as published by * 00009 * the Free Software Foundation; either version 2 of the License, or * 00010 * (at your option) any later version. * 00011 * * 00012 * This program is distributed in the hope that it will be useful, * 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 00015 * GNU General Public License for more details. * 00016 * * 00017 * You should have received a copy of the GNU General Public License * 00018 * along with this program; if not, write to the * 00019 * Free Software Foundation, Inc., * 00020 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 00021 *************************************************************************** 00022 * * 00023 * DESCRIPTION * 00024 * * 00025 * This file contains activation function and there derivatives in * 00026 * different regularised versions * 00027 * * 00028 * * 00029 * $Log: regularisation.h,v $ 00030 * Revision 1.10 2008/12/22 14:36:48 martius 00031 * undo of ralf's changes on g_s and g_ss_div_s 00032 * created g_s_soft and g_ss_div_s_soft with this implementations 00033 * 00034 * Revision 1.9 2008/11/21 13:52:59 martius 00035 * changed g_s back to less regularization since there is the g_derivative version 00036 * 00037 * Revision 1.8 2008/05/02 17:20:04 martius 00038 * *** empty log message *** 00039 * 00040 * Revision 1.7 2008/02/05 10:08:47 der 00041 * new function g_derivative 00042 * 00043 * Revision 1.6 2008/01/29 09:38:02 der 00044 * function g_s changed 00045 * 00046 * Revision 1.5 2007/04/03 16:37:57 der 00047 * *** empty log message *** 00048 * 00049 * Revision 1.4 2007/02/23 09:25:52 der 00050 * regularisation with taylor expansion 00051 * 00052 * Revision 1.3 2006/11/29 16:22:43 martius 00053 * name is a variable of configurable and is used as such 00054 * 00055 * Revision 1.2 2006/10/23 10:47:59 martius 00056 * g and derivatives and inverses 00057 * 00058 * Revision 1.1 2006/10/20 15:22:15 martius 00059 * regularisation terms for g 00060 * 00061 * Revision 1.2 2006/07/14 12:23:56 martius 00062 * selforg becomes HEAD 00063 * 00064 * Revision 1.1.2.1 2005/12/06 17:38:21 martius 00065 * *** empty log message *** 00066 * 00067 * * 00068 ***************************************************************************/ 00069 #ifndef __REGULARISATION_H 00070 #define __REGULARISATION_H 00071 00072 #include <cmath> 00073 #include <selforg/controller_misc.h> 00074 00075 double inline sqr(double x) { 00076 return x*x; 00077 } 00078 00079 /// neuron transfer function 00080 double inline g(double z) 00081 { 00082 return tanh(z); 00083 }; 00084 00085 /// first dervative 00086 double inline g_s(double z) 00087 { 00088 double k=tanh(z); 00089 return 1.025 - k*k; 00090 // return 1/((1+0.5 * z*z)*(1+0.5 * z*z)); // softer 00091 //return 1/(1+log(1+z*z)); // even softer 00092 }; 00093 00094 00095 /// first dervative with smoothing for large z 00096 double inline g_derivative(double z) 00097 { 00098 return 1/((1+0.5 * z*z)*(1+0.5 * z*z)); 00099 }; 00100 00101 /// inverse of the first derivative 00102 double inline g_s_inv(double z) 00103 { 00104 double k=tanh(z); 00105 return 1/(1.025 - k*k); 00106 // return 1+z*z; // softer 00107 //return 1+log(1+z*z); // even softer 00108 }; 00109 00110 /** \f[ g'(z+xsi) = 1-(tanh(z+xsi))^2 \f] with additional clipping */ 00111 double inline g_s(double z, double xsi) { 00112 double Z = clip(z, -3.0, 3.0) + clip(xsi, -1.0, 1.0); 00113 double k=tanh(Z); // approximation with Mittelwertsatz 00114 return 1 - k*k; 00115 }; 00116 00117 00118 /** soft version: \f[ g'(z+xsi) = 1/(1+(z+xsi)^2 \f] with additional clipping */ 00119 double inline g_s_soft(double z, double xsi) { 00120 double Z = clip(z, -3.0, 3.0) + clip(xsi, -1.0, 1.0); 00121 return 1/(1 + Z*Z);//TEST 00122 }; 00123 00124 00125 /// an exact formula for g''/g'= -2g(Z), with clipped Z = z+xsi 00126 double inline g_ss_div_s(double z, double xsi) { 00127 // for consistency reasons we use the same clipped z as for g'. 00128 double Z = clip(z, -3.0, 3.0) + clip(xsi, -1.0, 1.0); 00129 // approximation with Mittelwertsatz (z is clipped) 00130 return -2*g(Z); 00131 }; 00132 00133 /// an soft formula for g''/g' = -2Z, with clipped Z = z+xsi 00134 double inline g_ss_div_s_soft(double z, double xsi) { 00135 // for consistency reasons we use the same clipped z as for g'. 00136 double Z = clip(z, -3.0, 3.0) + clip(xsi, -1.0, 1.0); 00137 return -2*Z;//TEST 00138 }; 00139 00140 /** with \f[ g'(z) = 1-(g(z+\xi))^2 \f] we get 00141 \f[\frac{\partial}{\partial z} \frac{1}{g'(Z)} = \frac{g''}{g'^2} \f] 00142 again with clipped Z 00143 */ 00144 double inline derive_g_s_inv_exact_clip(double z, double xsi){ 00145 double Z = clip(z, -3.0, 3.0) + clip(xsi, -1.0, 1.0); 00146 double k=tanh(Z); // approximation with Mittelwertsatz 00147 return -2*k/(1-k*k); 00148 } 00149 00150 /** \f[ g'(z) = 1-(z+\xi)^2 \f] which is the series expansion to the second order 00151 */ 00152 double inline g_s_expand2(double z, double xsi){ 00153 double Z = z + clip(xsi, -fabs(z), fabs(z)); 00154 return 1/(1+sqr(Z)); 00155 } 00156 00157 /** \f[ \frac{1}{g'(z)} \approx 1+(z+\xi)^2 \f] with geometric series approximation 00158 */ 00159 double inline g_s_inv_expand2(double z, double xsi){ 00160 double Z = z + clip(xsi, -fabs(z)/2.0, fabs(z)/2.0); 00161 return 1+sqr(Z); 00162 } 00163 00164 /** \f[ \frac{g''(z)}{g'(z)} \approx 2(z+\xi)(1+(z+\xi)^2) \f] with geometric series approximation 00165 */ 00166 double inline g_ss_div_s_expand2(double z, double xsi){ 00167 double Z = z + clip(xsi, -fabs(z)/2.0, fabs(z)/2.0); 00168 // double Z = z + clip(xsi, -fabs(z), fabs(z)); 00169 return -2*tanh(Z); 00170 } 00171 00172 00173 /// squashing function (-0.1 to 0.1), to protect against to large weight updates 00174 double inline squash(double z) 00175 { 00176 return clip(z, -0.1, 0.1); 00177 //return 0.1 * tanh(10.0 * z); 00178 }; 00179 00180 /// squashing function with adjustable clipping size, to protect against too large weight updates 00181 double inline squash(void* d, double z) { 00182 double size = *((double*)d); 00183 return clip(z, -size, size); 00184 }; 00185 00186 00187 00188 #endif 00189