MethodCommittee.h

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodCommittee.h 36966 2010-11-26 09:50:13Z evt $ 
00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodCommittee                                                       *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Boosting                                                                  *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
00015  *      Joerg Stelzer   <Joerg.Stelzer@cern.ch>  - CERN, Switzerland              *
00016  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
00017  *                                                                                *
00018  * Copyright (c) 2005:                                                            *
00019  *      CERN, Switzerland                                                         * 
00020  *      U. of Victoria, Canada                                                    * 
00021  *      MPI-K Heidelberg, Germany                                                 * 
00022  *      LAPP, Annecy, France                                                      *
00023  *                                                                                *
00024  * Redistribution and use in source and binary forms, with or without             *
00025  * modification, are permitted according to the terms listed in LICENSE           *
00026  * (http://tmva.sourceforge.net/LICENSE)                                          *
00027  **********************************************************************************/
00028 
00029 #ifndef ROOT_TMVA_MethodCommittee
00030 #define ROOT_TMVA_MethodCommittee
00031 
00032 //////////////////////////////////////////////////////////////////////////
00033 //                                                                      //
00034 // MethodCommittee                                                      //
00035 //                                                                      //
00036 // Committee method                                                     //
00037 //                                                                      //
00038 //////////////////////////////////////////////////////////////////////////
00039 
00040 #include <vector>
00041 #include <iosfwd>
00042 #ifndef ROOT_TH2
00043 #include "TH2.h"
00044 #endif
00045 #ifndef ROOT_TTree
00046 #include "TTree.h"
00047 #endif
00048 
00049 #ifndef ROOT_TMVA_MethodBase
00050 #include "TMVA/MethodBase.h"
00051 #endif
00052 
00053 namespace TMVA {
00054 
00055    class MethodCommittee : public MethodBase {
00056 
00057    public:
00058 
00059       // constructor for training and reading
00060       MethodCommittee( const TString& jobName,
00061                        const TString& methodTitle,
00062                        DataSetInfo& dsi, 
00063                        const TString& theOption,
00064                        TDirectory* theTargetDir = 0 );
00065 
00066       // constructor for calculating Committee-MVA using previously generatad members
00067       MethodCommittee( DataSetInfo& theData, 
00068                        const TString& theWeightFile,  
00069                        TDirectory* theTargetDir = 0 );
00070   
00071       virtual ~MethodCommittee( void );
00072     
00073       virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
00074 
00075       // overloaded members from MethodBase
00076       void WriteStateToFile() const;
00077 
00078       // the training
00079       void Train();
00080 
00081       using MethodBase::ReadWeightsFromStream;
00082 
00083       // write weights to file
00084       void AddWeightsXMLTo( void* parent ) const;
00085 
00086       // read weights from file
00087       void ReadWeightsFromStream( istream& istr );
00088       void ReadWeightsFromXML   ( void* /*wghtnode*/ ) {}
00089 
00090       // write method specific histos to target file
00091       void WriteMonitoringHistosToFile( void ) const;
00092 
00093       // calculate the MVA value
00094       Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
00095 
00096       // apply the boost algorithm to a member in the committee
00097       Double_t Boost(  TMVA::MethodBase*, UInt_t imember );
00098 
00099       // ranking of input variables
00100       const Ranking* CreateRanking();
00101 
00102       // the option handling methods
00103       void DeclareOptions();
00104       void ProcessOptions();
00105 
00106       // accessors
00107       const std::vector<TMVA::IMethod*>& GetCommittee()    const { return fCommittee; }
00108       const std::vector<Double_t>&       GetBoostWeights() const { return fBoostWeights; }
00109 
00110       //return the individual relative variable importance 
00111       std::vector<Double_t> GetVariableImportance();
00112       Double_t GetVariableImportance( UInt_t ivar );
00113 
00114    protected:
00115 
00116       // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
00117       void MakeClassSpecific( std::ostream&, const TString& ) const;
00118 
00119       // get help message text
00120       void GetHelpMessage() const;
00121 
00122    private:
00123 
00124       // accessors
00125       std::vector<IMethod*>& GetCommittee()    { return fCommittee; }
00126       std::vector<Double_t>& GetBoostWeights() { return fBoostWeights; }
00127 
00128       // boosting algorithm (adaptive boosting)
00129       Double_t AdaBoost( MethodBase* );
00130  
00131       // boosting as a random re-weighting
00132       Double_t Bagging( UInt_t imember);
00133   
00134       UInt_t                          fNMembers;        // number of members requested
00135       std::vector<IMethod*>           fCommittee;       // the collection of members
00136       std::vector<Double_t>           fBoostWeights;    // the weights applied in the individual boosts
00137       TString                         fBoostType;       // string specifying the boost type
00138 
00139       // options for the MVA method
00140       Types::EMVA                     fMemberType;      // the MVA method to be boosted
00141       TString                         fMemberOption;    // the options for that method
00142 
00143       Bool_t                          fUseMemberDecision;  // use binary information from IsSignal
00144       // use average classification from the members, or have the individual members 
00145       
00146       Bool_t                          fUseWeightedMembers; // in the committee weighted from AdaBoost
00147     
00148 
00149       // Init used in the various constructors
00150       void Init( void );
00151 
00152       //some histograms for monitoring
00153       TH1F*                           fBoostFactorHist; // weights applied in boosting
00154       TH2F*                           fErrFractHist;    // error fraction vs member number
00155       TTree*                          fMonitorNtuple;   // monitoring ntuple
00156       Int_t                           fITree      ;     // ntuple var: ith member
00157       Double_t                        fBoostFactor;     // ntuple var: boost weight
00158       Double_t                        fErrorFraction;   // ntuple var: misclassification error fraction 
00159       Int_t                           fNnodes;          // ntuple var: nNodes
00160 
00161       std::vector< Double_t >         fVariableImportance; // the relative importance of the different variables 
00162 
00163       ClassDef(MethodCommittee,0)  // Analysis of Boosted MVA methods
00164    };
00165 
00166 } // namespace TMVA
00167 
00168 #endif

Generated on Tue Jul 5 14:27:31 2011 for ROOT_528-00b_version by  doxygen 1.5.1