MethodDT.h

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodDT.h 36966 2010-11-26 09:50:13Z evt $ 
00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss 
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodDT  (Boosted Decision Trees)                                   *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Analysis of Boosted Decision Trees                                        *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
00015  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
00016  *      Or Cohen        <orcohenor@gmail.com>    - Weizmann Inst., Israel         *
00017  *                                                                                *
00018  * Copyright (c) 2005:                                                            *
00019  *      CERN, Switzerland                                                         * 
00020  *      MPI-K Heidelberg, Germany                                                 * 
00021  *                                                                                *
00022  * Redistribution and use in source and binary forms, with or without             *
00023  * modification, are permitted according to the terms listed in LICENSE           *
00024  * (http://tmva.sourceforge.net/LICENSE)                                          *
00025  **********************************************************************************/
00026 
00027 #ifndef ROOT_TMVA_MethodDT
00028 #define ROOT_TMVA_MethodDT
00029 
00030 //////////////////////////////////////////////////////////////////////////
00031 //                                                                      //
00032 // MethodDT                                                             //
00033 //                                                                      //
00034 // Analysis of Single Decision Tree                                     //
00035 //                                                                      //
00036 //////////////////////////////////////////////////////////////////////////
00037 
00038 #include <vector>
00039 #ifndef ROOT_TH1
00040 #include "TH1.h"
00041 #endif
00042 #ifndef ROOT_TH2
00043 #include "TH2.h"
00044 #endif
00045 #ifndef ROOT_TTree
00046 #include "TTree.h"
00047 #endif
00048 #ifndef ROOT_TMVA_MethodBase
00049 #include "TMVA/MethodBase.h"
00050 #endif
00051 #ifndef ROOT_TMVA_DecisionTree
00052 #include "TMVA/DecisionTree.h"
00053 #endif
00054 #ifndef ROOT_TMVA_Event
00055 #include "TMVA/Event.h"
00056 #endif
00057 
00058 namespace TMVA {
00059    class MethodBoost;
00060 
00061    class MethodDT : public MethodBase {
00062    public:
00063       MethodDT( const TString& jobName, 
00064                 const TString& methodTitle, 
00065                 DataSetInfo& theData,
00066                 const TString& theOption = "",
00067                 TDirectory* theTargetDir = 0 );
00068 
00069       MethodDT( DataSetInfo& dsi, 
00070                 const TString& theWeightFile,  
00071                 TDirectory* theTargetDir = NULL );
00072 
00073       virtual ~MethodDT( void );
00074 
00075       virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
00076 
00077       void Train( void );
00078       
00079       using MethodBase::ReadWeightsFromStream;
00080 
00081       // write weights to file
00082       void AddWeightsXMLTo( void* parent ) const;
00083 
00084       // read weights from file
00085       void ReadWeightsFromStream( istream& istr );
00086       void ReadWeightsFromXML   ( void* wghtnode );
00087 
00088       // calculate the MVA value
00089       Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
00090 
00091       // the option handling methods
00092       void DeclareOptions();
00093       void ProcessOptions();
00094 
00095       void GetHelpMessage() const;
00096 
00097       // ranking of input variables
00098       const Ranking* CreateRanking();
00099 
00100       Double_t PruneTree(const Int_t methodIndex);
00101 
00102       Double_t TestTreeQuality( DecisionTree *dt );
00103 
00104       Double_t GetPruneStrength () { return fPruneStrength; }
00105 
00106       Bool_t MonitorBoost( MethodBoost* booster);
00107 
00108    private:
00109       // Init used in the various constructors
00110       void Init( void );
00111 
00112    private:
00113 
00114       std::vector<Event*>             fEventSample;     // the training events
00115 
00116       DecisionTree*                   fTree;            // the decision tree
00117       //options for the decision Tree
00118       SeparationBase                 *fSepType;         // the separation used in node splitting
00119       TString                         fSepTypeS;        // the separation (option string) used in node splitting
00120       Int_t                           fNodeMinEvents;   // min number of events in node 
00121   
00122       Int_t                           fNCuts;           // grid used in cut applied in node splitting
00123       Bool_t                          fUseYesNoLeaf;    // use sig or bkg classification in leave nodes or sig/bkg
00124       Double_t                        fNodePurityLimit; // purity limit for sig/bkg nodes
00125       UInt_t                          fNNodesMax;       // max # of nodes
00126       UInt_t                          fMaxDepth;        // max depth
00127 
00128 
00129       Double_t                         fErrorFraction;   // ntuple var: misclassification error fraction 
00130       Double_t                         fPruneStrength;   // a parameter to set the "amount" of pruning..needs to be adjusted
00131       DecisionTree::EPruneMethod       fPruneMethod;     // method used for prunig 
00132       TString                          fPruneMethodS;    // prune method option String
00133       Bool_t                           fAutomatic;       // use user given prune strength or automatically determined one using a validation sample 
00134       Bool_t                           fRandomisedTrees; // choose a random subset of possible cut variables at each node during training
00135       Int_t                            fUseNvars;        // the number of variables used in the randomised tree splitting
00136       Bool_t                           fPruneBeforeBoost; //whether to prune right after the training (before the boosting)
00137 
00138       std::vector<Double_t>           fVariableImportance; // the relative importance of the different variables 
00139 
00140       Double_t                        fDeltaPruneStrength; // step size in pruning, is adjusted according to experience of previous trees        
00141       // debugging flags
00142       static const Int_t  fgDebugLevel = 0;     // debug level determining some printout/control plots etc.
00143 
00144       ClassDef(MethodDT,0)  // Analysis of Decision Trees 
00145 
00146          };
00147 }
00148 
00149 #endif

Generated on Tue Jul 5 14:27:31 2011 for ROOT_528-00b_version by  doxygen 1.5.1