ROOT_528-00b_version: include/TMVA/MethodMLP.h Source File

00001 // @(#)root/tmva $Id: MethodMLP.h 36966 2010-11-26 09:50:13Z evt $
00002 // Author: Andreas Hoecker, Peter Speckmayer, Matt Jachowski
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodMLP                                                             *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      ANN Multilayer Perceptron  class for the discrimination of signal         *
00012  *      from background.  BFGS implementation based on TMultiLayerPerceptron      *
00013  *      class from ROOT (http://root.cern.ch).                                    *
00014  *                                                                                *
00015  * Authors (alphabetical):                                                        *
00016  *      Krzysztof Danielowski <danielow@cern.ch>       - IFJ & AGH, Poland        *
00017  *      Andreas Hoecker       <Andreas.Hocker@cern.ch> - CERN, Switzerland        *
00018  *      Peter Speckmayer      <peter.speckmayer@cern.ch> - CERN, Switzerland      *
00019  *      Matt Jachowski        <jachowski@stanford.edu> - Stanford University, USA *
00020  *      Kamil Kraszewski      <kalq@cern.ch>           - IFJ & UJ, Poland         *
00021  *      Maciej Kruk           <mkruk@cern.ch>          - IFJ & AGH, Poland        *
00022  *                                                                                *
00023  * Copyright (c) 2005:                                                            *
00024  *      CERN, Switzerland                                                         *
00025  *                                                                                *
00026  * Redistribution and use in source and binary forms, with or without             *
00027  * modification, are permitted according to the terms listed in LICENSE           *
00028  * (http://tmva.sourceforge.net/LICENSE)                                          *
00029  **********************************************************************************/
00030 
00031 #ifndef ROOT_TMVA_MethodMLP
00032 #define ROOT_TMVA_MethodMLP
00033 
00034 //////////////////////////////////////////////////////////////////////////
00035 //                                                                      //
00036 // MethodMLP                                                            //
00037 //                                                                      //
00038 // Multilayer Perceptron built off of MethodANNBase                     //
00039 //                                                                      //
00040 //////////////////////////////////////////////////////////////////////////
00041 
00042 #include <vector>
00043 #ifndef ROOT_TString
00044 #include "TString.h"
00045 #endif
00046 #ifndef ROOT_TTree
00047 #include "TTree.h"
00048 #endif
00049 #ifndef ROOT_TObjArray
00050 #include "TObjArray.h"
00051 #endif
00052 #ifndef ROOT_TRandom3
00053 #include "TRandom3.h"
00054 #endif
00055 #ifndef ROOT_TH1F
00056 #include "TH1F.h"
00057 #endif
00058 #ifndef ROOT_TMatrixDfwd
00059 #include "TMatrixDfwd.h"
00060 #endif
00061 
00062 #ifndef ROOT_TMVA_IFitterTarget
00063 #include "TMVA/IFitterTarget.h"
00064 #endif
00065 #ifndef ROOT_TMVA_MethodBase
00066 #include "TMVA/MethodBase.h"
00067 #endif
00068 #ifndef ROOT_TMVA_MethodANNBase
00069 #include "TMVA/MethodANNBase.h"
00070 #endif
00071 #ifndef ROOT_TMVA_TNeuron
00072 #include "TMVA/TNeuron.h"
00073 #endif
00074 #ifndef ROOT_TMVA_TActivation
00075 #include "TMVA/TActivation.h"
00076 #endif
00077 #ifndef ROOT_TMVA_ConvergenceTest
00078 #include "TMVA/ConvergenceTest.h"
00079 #endif
00080 
00081 #define MethodMLP_UseMinuit__
00082 #undef  MethodMLP_UseMinuit__
00083 
00084 namespace TMVA {
00085 
00086    class MethodMLP : public MethodANNBase, public IFitterTarget, public ConvergenceTest {
00087 
00088    public:
00089 
00090       // standard constructors
00091       MethodMLP( const TString& jobName,
00092                  const TString&  methodTitle,
00093                  DataSetInfo& theData,
00094                  const TString& theOption,
00095                  TDirectory* theTargetDir = 0 );
00096 
00097       MethodMLP( DataSetInfo& theData,
00098                  const TString& theWeightFile,
00099                  TDirectory* theTargetDir = 0 );
00100 
00101       virtual ~MethodMLP();
00102 
00103       virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
00104 
00105       void Train() { Train(NumCycles()); }
00106 
00107       // for GA
00108       Double_t ComputeEstimator ( std::vector<Double_t>& parameters );
00109       Double_t EstimatorFunction( std::vector<Double_t>& parameters );
00110 
00111       enum ETrainingMethod { kBP=0, kBFGS, kGA };
00112       enum EBPTrainingMode { kSequential=0, kBatch };
00113 
00114       bool     HasInverseHessian() { return fCalculateErrors; }
00115       Double_t GetMvaValueAsymError( Double_t* errUpper, Double_t* errLower );
00116 
00117    protected:
00118 
00119       // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
00120       void MakeClassSpecific( std::ostream&, const TString& ) const;
00121 
00122       // get help message text
00123       void GetHelpMessage() const;
00124 
00125 
00126    private:
00127 
00128       // the option handling methods
00129       void DeclareOptions();
00130       void ProcessOptions();
00131 
00132       // general helper functions
00133       void     Train( Int_t nEpochs );
00134       void     Init();
00135       void     InitializeLearningRates(); // although this is only needed by backprop
00136 
00137       // used as a measure of success in all minimization techniques
00138       Double_t CalculateEstimator( Types::ETreeType treeType = Types::kTraining, Int_t iEpoch = -1 );
00139 
00140       // BFGS functions
00141       void     BFGSMinimize( Int_t nEpochs );
00142       void     SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &Buffer );
00143       void     SteepestDir( TMatrixD &Dir );
00144       Bool_t   GetHessian( TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta );
00145       void     SetDir( TMatrixD &Hessian, TMatrixD &Dir );
00146       Double_t DerivDir( TMatrixD &Dir );
00147       Bool_t   LineSearch( TMatrixD &Dir, std::vector<Double_t> &Buffer, Double_t* dError=0 ); //zjh
00148       void     ComputeDEDw();
00149       void     SimulateEvent( const Event* ev );
00150       void     SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha );
00151       Double_t GetError();
00152       Double_t GetMSEErr( const Event* ev, UInt_t index = 0 );   //zjh
00153       Double_t GetCEErr( const Event* ev, UInt_t index = 0 );   //zjh
00154 
00155       // backpropagation functions
00156       void     BackPropagationMinimize( Int_t nEpochs );
00157       void     TrainOneEpoch();
00158       void     Shuffle( Int_t* index, Int_t n );
00159       void     DecaySynapseWeights(Bool_t lateEpoch );
00160       void     TrainOneEvent( Int_t ievt);
00161       Double_t GetDesiredOutput( const Event* ev );
00162       void     UpdateNetwork( Double_t desired, Double_t eventWeight=1.0 );
00163       void     UpdateNetwork(std::vector<Float_t>& desired, Double_t eventWeight=1.0);
00164       void     CalculateNeuronDeltas();
00165       void     UpdateSynapses();
00166       void     AdjustSynapseWeights();
00167 
00168       // faster backpropagation
00169       void     TrainOneEventFast( Int_t ievt, Float_t*& branchVar, Int_t& type );
00170 
00171       // genetic algorithm functions
00172       void GeneticMinimize();
00173       
00174 
00175 #ifdef MethodMLP_UseMinuit__
00176       // minuit functions -- commented out because they rely on a static pointer
00177       void MinuitMinimize();
00178       static MethodMLP* GetThisPtr();
00179       static void IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
00180       void FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
00181 #endif
00182 
00183       // general
00184       bool               fUseRegulator;         // zjh
00185       bool               fCalculateErrors;      // compute inverse hessian matrix at the end of the training
00186       Double_t           fPrior;                // zjh
00187       std::vector<Double_t> fPriorDev;          // zjh
00188       void               GetApproxInvHessian ( TMatrixD& InvHessian, bool regulate=true );   //rank-1 approximation, neglect 2nd derivatives. //zjh
00189       void               UpdateRegulators();    // zjh
00190       void               UpdatePriors();        // zjh
00191       Int_t                              fUpdateLimit;          // zjh
00192 
00193       ETrainingMethod fTrainingMethod; // method of training, BP or GA
00194       TString         fTrainMethodS;   // training method option param
00195 
00196       Float_t         fSamplingFraction;  // fraction of events which is sampled for training
00197       Float_t         fSamplingEpoch;     // fraction of epochs where sampling is used
00198       Float_t         fSamplingWeight;    // changing factor for event weights when sampling is turned on
00199       Bool_t          fSamplingTraining;  // The training sample is sampled
00200       Bool_t          fSamplingTesting;   // The testing sample is sampled
00201 
00202       // BFGS variables
00203       Double_t        fLastAlpha;      // line search variable
00204       Double_t        fTau;            // line search variable
00205       Int_t           fResetStep;      // reset time (how often we clear hessian matrix)
00206 
00207       // backpropagation variable
00208       Double_t        fLearnRate;      // learning rate for synapse weight adjustments
00209       Double_t        fDecayRate;      // decay rate for above learning rate
00210       EBPTrainingMode fBPMode;         // backprop learning mode (sequential or batch)
00211       TString         fBpModeS;        // backprop learning mode option string (sequential or batch)
00212       Int_t           fBatchSize;      // batch size, only matters if in batch learning mode
00213       Int_t           fTestRate;       // test for overtraining performed at each #th epochs
00214       Bool_t          fEpochMon;       // create and fill epoch-wise monitoring histograms (makes outputfile big!)
00215       
00216       // genetic algorithm variables
00217       Int_t           fGA_nsteps;      // GA settings: number of steps
00218       Int_t           fGA_preCalc;     // GA settings: number of pre-calc steps
00219       Int_t           fGA_SC_steps;    // GA settings: SC_steps
00220       Int_t           fGA_SC_rate; // GA settings: SC_rate
00221       Double_t        fGA_SC_factor;   // GA settings: SC_factor
00222 
00223 #ifdef MethodMLP_UseMinuit__
00224       // minuit variables -- commented out because they rely on a static pointer
00225       Int_t          fNumberOfWeights; // Minuit: number of weights
00226       static MethodMLP* fgThis;        // Minuit: this pointer
00227 #endif
00228 
00229       // debugging flags
00230       static const Int_t  fgPRINT_ESTIMATOR_INC = 10;     // debug flags
00231       static const Bool_t fgPRINT_SEQ           = kFALSE; // debug flags
00232       static const Bool_t fgPRINT_BATCH         = kFALSE; // debug flags
00233 
00234       ClassDef(MethodMLP,0) // Multi-layer perceptron implemented specifically for TMVA
00235    };
00236 
00237 } // namespace TMVA
00238 
00239 #endif