MethodBoost.h

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodBoost.h 36966 2010-11-26 09:50:13Z evt $
00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodCompositeBase                                                   *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Virtual base class for all MVA method                                     *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Andreas Hoecker    <Andreas.Hocker@cern.ch> - CERN, Switzerland           *
00015  *      Joerg Stelzer      <Joerg.Stelzer@cern.ch>  - CERN, Switzerland           *
00016  *      Helge Voss         <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany   *
00017  *      Kai Voss           <Kai.Voss@cern.ch>       - U. of Victoria, Canada      *
00018  *      Or Cohen           <orcohenor@gmail.com>    - Weizmann Inst., Israel      *
00019  *      Eckhard v. Toerne  <evt@uni-bonn.de>        - U of Bonn, Germany          *
00020  *                                                                                *
00021  * Copyright (c) 2005:                                                            *
00022  *      CERN, Switzerland                                                         *
00023  *      U. of Victoria, Canada                                                    *
00024  *      MPI-K Heidelberg, Germany                                                 *
00025  *      LAPP, Annecy, France                                                      *
00026  *                                                                                *
00027  * Redistribution and use in source and binary forms, with or without             *
00028  * modification, are permitted according to the terms listed in LICENSE           *
00029  * (http://tmva.sourceforge.net/LICENSE)                                          *
00030  **********************************************************************************/
00031 
00032 #ifndef ROOT_TMVA_MethodBoost
00033 #define ROOT_TMVA_MethodBoost
00034 
00035 //////////////////////////////////////////////////////////////////////////
00036 //                                                                      //
00037 // MethodBoost                                                          //
00038 //                                                                      //
00039 // Class for boosting a TMVA method                                     //
00040 //                                                                      //
00041 //////////////////////////////////////////////////////////////////////////
00042 
00043 #include <iosfwd>
00044 #include <vector>
00045 
00046 #ifndef ROOT_TMVA_MethodBase
00047 #include "TMVA/MethodBase.h"
00048 #endif
00049 
00050 #ifndef ROOT_TMVA_MethodCompositeBase
00051 #include "TMVA/MethodCompositeBase.h"
00052 #endif
00053 
00054 namespace TMVA {
00055 
00056    class Factory;  // DSMTEST
00057    class Reader;   // DSMTEST
00058    class DataSetManager;  // DSMTEST
00059 
00060    class MethodBoost : public MethodCompositeBase {
00061 
00062    public :
00063 
00064       // constructors
00065       MethodBoost( const TString& jobName,
00066                    const TString& methodTitle,
00067                    DataSetInfo& theData,
00068                    const TString& theOption = "",
00069                    TDirectory* theTargetDir = NULL );
00070 
00071       MethodBoost( DataSetInfo& dsi,
00072                    const TString& theWeightFile,
00073                    TDirectory* theTargetDir = NULL );
00074 
00075       virtual ~MethodBoost( void );
00076 
00077       virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );
00078 
00079       // training and boosting all the classifiers
00080       void Train( void );
00081 
00082       // ranking of input variables
00083       const Ranking* CreateRanking();
00084 
00085       // saves the name and options string of the boosted classifier
00086       Bool_t BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption );
00087       void SetBoostedMethodName ( TString methodName )     { fBoostedMethodName  = methodName; }
00088 
00089       Int_t          GetBoostNum() { return fBoostNum; }
00090 
00091       // gives the monitoring historgram from the vector according to index of the
00092       // histrogram added in the MonitorBoost function
00093       TH1*           GetMonitoringHist( Int_t histInd ) { return (*fMonitorHist)[fDefaultHistNum+histInd]; }
00094 
00095       void           AddMonitoringHist( TH1* hist )     { return fMonitorHist->push_back(hist); }
00096 
00097       Types::EBoostStage    GetBoostStage() { return fBoostStage; }
00098 
00099       void CleanBoostOptions();
00100 
00101       Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper = 0 );
00102 
00103    private :
00104       // clean up
00105       void ClearAll();
00106 
00107       // print fit results
00108       void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
00109 
00110       // initializing mostly monitoring tools of the boost process
00111       void Init();
00112       void InitHistos();
00113       void CheckSetup();
00114 
00115       // the option handling methods
00116       void DeclareOptions();
00117       void ProcessOptions();
00118 
00119       MethodBoost* SetStage( Types::EBoostStage stage ) { fBoostStage = stage; return this; }
00120 
00121       //training a single classifier
00122       void SingleTrain();
00123 
00124       //calculating a boosting weight from the classifier, storing it in the next one
00125       void SingleBoost();
00126 
00127       // calculate weight of single method
00128       void CalcMethodWeight();
00129 
00130       // return ROC integral on training/testing sample
00131       Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE);
00132 
00133       //writing the monitoring histograms and tree to a file
00134       void WriteMonitoringHistosToFile( void ) const;
00135 
00136       // write evaluation histograms into target file
00137       virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
00138 
00139       // performs the MethodBase testing + testing of each boosted classifier
00140       virtual void TestClassification();
00141 
00142       //finding the MVA to cut between sig and bgd according to fMVACutPerc,fMVACutType
00143       void FindMVACut();
00144 
00145       //setting all the boost weights to 1
00146       void ResetBoostWeights();
00147 
00148       //creating the vectors of histogram for monitoring MVA response of each classifier
00149       void CreateMVAHistorgrams();
00150 
00151       // calculate MVA values of current trained method on training
00152       // sample
00153       void CalcMVAValues();
00154 
00155       //Number of times the classifier is boosted (set by the user)
00156       Int_t             fBoostNum;
00157       // string specifying the boost type (AdaBoost / Bagging )
00158       TString           fBoostType;
00159 
00160       // string specifying the boost type ( ByError,Average,LastMethod )
00161       TString           fMethodWeightType;
00162 
00163       //estimation of the level error of the classifier analysing the train dataset
00164       Double_t          fMethodError;
00165       //estimation of the level error of the classifier analysing the train dataset (with unboosted weights)
00166       Double_t          fOrigMethodError;
00167 
00168       //the weight used to boost the next classifier
00169       Double_t          fBoostWeight;
00170 
00171       // min and max values for the classifier response
00172       TString fTransformString;
00173 
00174       //ADA boost parameter, default is 1
00175       Double_t          fADABoostBeta;
00176 
00177       // seed for random number generator used for bagging
00178       UInt_t            fRandomSeed;
00179 
00180       // details of the boosted classifier
00181       TString           fBoostedMethodName;
00182       TString           fBoostedMethodTitle;
00183       TString           fBoostedMethodOptions;
00184 
00185       // histograms to monitor values during the boosting
00186       std::vector<TH1*>* fMonitorHist;
00187 
00188       //whether to monitor the MVA response of every classifier using the
00189       Bool_t                fMonitorBoostedMethod;
00190 
00191       //MVA output from each classifier over the training hist, using orignal events weights
00192       std::vector< TH1* >   fTrainSigMVAHist;
00193       std::vector< TH1* >   fTrainBgdMVAHist;
00194       //MVA output from each classifier over the training hist, using boosted events weights
00195       std::vector< TH1* >   fBTrainSigMVAHist;
00196       std::vector< TH1* >   fBTrainBgdMVAHist;
00197       //MVA output from each classifier over the testing hist
00198       std::vector< TH1* >   fTestSigMVAHist;
00199       std::vector< TH1* >   fTestBgdMVAHist;
00200 
00201       // tree  to monitor values during the boosting
00202       TTree*            fMonitorTree;
00203 
00204       // the stage of the boosting
00205       Types::EBoostStage fBoostStage;
00206 
00207       //the number of histogram filled for every type of boosted classifier
00208       Int_t             fDefaultHistNum;
00209 
00210       //whether to recalculate the MVA cut at every boosting step
00211       Bool_t            fRecalculateMVACut;
00212 
00213       // roc integral of last trained method (on training sample)
00214       Double_t          fROC_training;
00215 
00216       // overlap integral of mva distributions for signal and
00217       // background (training sample)
00218       Double_t          fOverlap_integral;
00219 
00220       // mva values for the last trained method (on training sample)
00221       std::vector<Float_t> *fMVAvalues;
00222 
00223       DataSetManager* fDataSetManager; // DSMTEST
00224       friend class Factory; // DSMTEST
00225       friend class Reader;  // DSMTEST
00226 
00227 
00228 
00229 
00230 
00231    protected:
00232 
00233       // get help message text
00234       void GetHelpMessage() const;
00235 
00236       ClassDef(MethodBoost,0)
00237    };
00238 }
00239 
00240 #endif

Generated on Tue Jul 5 14:27:31 2011 for ROOT_528-00b_version by  doxygen 1.5.1