MethodCuts.h

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodCuts.h 36966 2010-11-26 09:50:13Z evt $
00002 // Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Helge Voss, Kai Voss
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodCuts                                                            *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Multivariate optimisation of signal efficiency for given background       *
00012  *      efficiency, using rectangular minimum and maximum requirements on         *
00013  *      input variables                                                           *
00014  *                                                                                *
00015  * Authors (alphabetical):                                                        *
00016  *      Andreas Hoecker  <Andreas.Hocker@cern.ch> - CERN, Switzerland             *
00017  *      Matt Jachowski   <jachowski@stanford.edu> - Stanford University, USA      *
00018  *      Peter Speckmayer <speckmay@mail.cern.ch>  - CERN, Switzerland             *
00019  *      Helge Voss       <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany     *
00020  *      Kai Voss         <Kai.Voss@cern.ch>       - U. of Victoria, Canada        *
00021  *                                                                                *
00022  * Copyright (c) 2005:                                                            *
00023  *      CERN, Switzerland                                                         *
00024  *      U. of Victoria, Canada                                                    *
00025  *      MPI-K Heidelberg, Germany                                                 *
00026  *      LAPP, Annecy, France                                                      *
00027  *                                                                                *
00028  * Redistribution and use in source and binary forms, with or without             *
00029  * modification, are permitted according to the terms listed in LICENSE           *
00030  * (http://tmva.sourceforge.net/LICENSE)                                          *
00031  **********************************************************************************/
00032 
00033 #ifndef ROOT_TMVA_MethodCuts
00034 #define ROOT_TMVA_MethodCuts
00035 
00036 //////////////////////////////////////////////////////////////////////////
00037 //                                                                      //
00038 // MethodCuts                                                           //
00039 //                                                                      //
00040 // Multivariate optimisation of signal efficiency for given background  //
00041 // efficiency, using rectangular minimum and maximum requirements on    //
00042 // input variables                                                      //
00043 //                                                                      //
00044 //////////////////////////////////////////////////////////////////////////
00045 
00046 #include <vector>
00047 #include <map>
00048 
00049 #ifndef ROOT_TMVA_MethodBase
00050 #include "TMVA/MethodBase.h"
00051 #endif
00052 #ifndef ROOT_TMVA_BinarySearchTree
00053 #include "TMVA/BinarySearchTree.h"
00054 #endif
00055 #ifndef ROOT_TMVA_PDF
00056 #include "TMVA/PDF.h"
00057 #endif
00058 #ifndef ROOT_TMVA_TMatrixDfwd
00059 #ifndef ROOT_TMatrixDfwd
00060 #include "TMatrixDfwd.h"
00061 #endif
00062 #endif
00063 #ifndef ROOT_TMVA_IFitterTarget
00064 #ifndef ROOT_IFitterTarget
00065 #include "IFitterTarget.h"
00066 #endif
00067 #endif
00068 
00069 class TRandom;
00070 
00071 namespace TMVA {
00072 
00073    class Interval;
00074 
00075    class MethodCuts : public MethodBase, public IFitterTarget {
00076 
00077    public:
00078 
00079       MethodCuts( const TString& jobName,
00080                   const TString& methodTitle, 
00081                   DataSetInfo& theData,
00082                   const TString& theOption = "MC:150:10000:",
00083                   TDirectory* theTargetFile = 0 );
00084 
00085       MethodCuts( DataSetInfo& theData,
00086                   const TString& theWeightFile,
00087                   TDirectory* theTargetDir = NULL );
00088 
00089       // this is a workaround which is necessary since CINT is not capable of handling dynamic casts
00090       static MethodCuts* DynamicCast( IMethod* method ) { return dynamic_cast<MethodCuts*>(method); }
00091 
00092       virtual ~MethodCuts( void );
00093 
00094       virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
00095 
00096       // training method
00097       void Train( void );
00098 
00099       using MethodBase::ReadWeightsFromStream;
00100 
00101       void AddWeightsXMLTo      ( void* parent ) const;
00102 
00103       void ReadWeightsFromStream( std::istream & i );
00104       void ReadWeightsFromXML   ( void* wghtnode );
00105 
00106       // calculate the MVA value (for CUTs this is just a dummy)
00107       Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
00108 
00109       // write method specific histos to target file
00110       void WriteMonitoringHistosToFile( void ) const;
00111 
00112       // test the method
00113       void TestClassification();
00114      
00115       // also overwrite --> not computed for cuts
00116       Double_t GetSeparation  ( TH1*, TH1* ) const { return -1; }
00117       Double_t GetSeparation  ( PDF* = 0, PDF* = 0 ) const { return -1; }
00118       Double_t GetSignificance( void )       const { return -1; }
00119       Double_t GetmuTransform ( TTree *)           { return -1; }
00120       Double_t GetEfficiency  ( const TString&, Types::ETreeType, Double_t& );
00121       Double_t GetTrainingEfficiency(const TString& );
00122 
00123       // rarity distributions (signal or background (default) is uniform in [0,1])
00124       Double_t GetRarity( Double_t, Types::ESBType ) const { return 0; }
00125 
00126       // accessors for Minuit
00127       Double_t ComputeEstimator( std::vector<Double_t> & );
00128       
00129       Double_t EstimatorFunction( std::vector<Double_t> & );
00130       Double_t EstimatorFunction( Int_t ievt1, Int_t ievt2 );
00131 
00132       void     SetTestSignalEfficiency( Double_t effS ) { fTestSignalEff = effS; }
00133 
00134       // retrieve cut values for given signal efficiency
00135       void     PrintCuts( Double_t effS ) const;
00136       Double_t GetCuts  ( Double_t effS, std::vector<Double_t>& cutMin, std::vector<Double_t>& cutMax ) const;
00137       Double_t GetCuts  ( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const;
00138 
00139       // ranking of input variables (not available for cuts)
00140       const Ranking* CreateRanking() { return 0; }
00141 
00142       void DeclareOptions();
00143       void ProcessOptions();
00144 
00145       // maximum |cut| value
00146       static const Double_t fgMaxAbsCutVal;
00147 
00148       // no check of options at this place
00149       void CheckSetup() {}
00150 
00151    protected:
00152 
00153       // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
00154       void MakeClassSpecific( std::ostream&, const TString& ) const;
00155 
00156       // get help message text
00157       void GetHelpMessage() const;
00158 
00159    private:
00160 
00161       // optimisation method
00162       enum EFitMethodType { kUseMonteCarlo = 0,
00163                             kUseGeneticAlgorithm,
00164                             kUseSimulatedAnnealing,
00165                             kUseMinuit,
00166                             kUseEventScan,
00167                             kUseMonteCarloEvents };
00168 
00169       // efficiency calculation method
00170       // - kUseEventSelection: computes efficiencies from given data sample
00171       // - kUsePDFs          : creates smoothed PDFs from data samples, and 
00172       //                       uses this to compute efficiencies
00173       enum EEffMethod     { kUseEventSelection = 0,
00174                             kUsePDFs };
00175 
00176       // improve the Monte Carlo by providing some additional information
00177       enum EFitParameters { kNotEnforced = 0,
00178                             kForceMin,
00179                             kForceMax,
00180                             kForceSmart };
00181 
00182       // general
00183       TString                 fFitMethodS;         // chosen fit method (string)
00184       EFitMethodType          fFitMethod;          // chosen fit method
00185       TString                 fEffMethodS;         // chosen efficiency calculation method (string)
00186       EEffMethod              fEffMethod;          // chosen efficiency calculation method
00187       std::vector<EFitParameters>* fFitParams;     // vector for series of fit methods
00188       Double_t                fTestSignalEff;      // used to test optimized signal efficiency
00189       Double_t                fEffSMin;            // used to test optimized signal efficiency
00190       Double_t                fEffSMax;            // used to test optimized signal efficiency
00191       Double_t*               fCutRangeMin;        // minimum of allowed cut range
00192       Double_t*               fCutRangeMax;        // maximum of allowed cut range
00193       std::vector<Interval*>  fCutRange;           // allowed ranges for cut optimisation
00194 
00195       // for the use of the binary tree method
00196       BinarySearchTree*       fBinaryTreeS;
00197       BinarySearchTree*       fBinaryTreeB;
00198 
00199       // MC method
00200       Double_t**              fCutMin;             // minimum requirement
00201       Double_t**              fCutMax;             // maximum requirement
00202       Double_t*               fTmpCutMin;          // temporary minimum requirement
00203       Double_t*               fTmpCutMax;          // temporary maximum requirement
00204       TString*                fAllVarsI;           // what to do with variables
00205 
00206       // relevant for all methods
00207       Int_t                   fNpar;               // number of parameters in fit (default: 2*Nvar)
00208       Double_t                fEffRef;             // reference efficiency
00209       std::vector<Int_t>*     fRangeSign;          // used to match cuts to fit parameters (and vice versa)
00210       TRandom*                fRandom;             // random generator for MC optimisation method
00211 
00212       // basic statistics
00213       std::vector<Double_t>*  fMeanS;              // means of variables (signal)
00214       std::vector<Double_t>*  fMeanB;              // means of variables (background)
00215       std::vector<Double_t>*  fRmsS;               // RMSs of variables (signal)
00216       std::vector<Double_t>*  fRmsB;               // RMSs of variables (background)
00217 
00218       TH1*                    fEffBvsSLocal;       // intermediate eff. background versus eff signal histo
00219 
00220       // PDF section
00221       std::vector<TH1*>*      fVarHistS;           // reference histograms (signal)
00222       std::vector<TH1*>*      fVarHistB;           // reference histograms (background)
00223       std::vector<TH1*>*      fVarHistS_smooth;    // smoothed reference histograms (signal)        
00224       std::vector<TH1*>*      fVarHistB_smooth;    // smoothed reference histograms (background)
00225       std::vector<PDF*>*      fVarPdfS;            // reference PDFs (signal)
00226       std::vector<PDF*>*      fVarPdfB;            // reference PDFs (background)
00227 
00228       // negative efficiencies
00229       Bool_t                  fNegEffWarning;      // flag risen in case of negative efficiency warning
00230 
00231 
00232       // the definition of fit parameters can be different from the actual 
00233       // cut requirements; these functions provide the matching
00234       void     MatchParsToCuts( const std::vector<Double_t>&, Double_t*, Double_t* );
00235       void     MatchParsToCuts( Double_t*, Double_t*, Double_t* );
00236 
00237       void     MatchCutsToPars( std::vector<Double_t>&, Double_t*, Double_t* );
00238       void     MatchCutsToPars( std::vector<Double_t>&, Double_t**, Double_t**, Int_t ibin );
00239 
00240       // creates PDFs in case these are used to compute efficiencies 
00241       // (corresponds to: EffMethod == kUsePDFs)
00242       void     CreateVariablePDFs( void );
00243 
00244       // returns signal and background efficiencies for given cuts - using event counting
00245       void     GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
00246                                      Double_t& effS, Double_t& effB );
00247       // returns signal and background efficiencies for given cuts - using PDFs
00248       void     GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
00249                                 Double_t& effS, Double_t& effB );
00250 
00251       // default initialisation method called by all constructors
00252       void     Init( void );
00253 
00254       ClassDef(MethodCuts,0)  // Multivariate optimisation of signal efficiency
00255    };
00256 
00257 } // namespace TMVA
00258 
00259 #endif

Generated on Tue Jul 5 14:27:31 2011 for ROOT_528-00b_version by  doxygen 1.5.1