MethodBoost.cxx

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodBoost.cxx 37986 2011-02-04 21:42:15Z pcanal $
00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Eckhard von Toerne
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodCompositeBase                                                   *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Virtual base class for all MVA method                                     *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
00015  *      Joerg Stelzer   <Joerg.Stelzer@cern.ch>  - CERN, Switzerland              *
00016  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
00017  *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada         *
00018  *      Or Cohen        <orcohenor@gmail.com>    - Weizmann Inst., Israel         *
00019  *      Eckhard v. Toerne  <evt@uni-bonn.de>        - U of Bonn, Germany          *
00020  *                                                                                *
00021  * Copyright (c) 2005:                                                            *
00022  *      CERN, Switzerland                                                         *
00023  *      U. of Victoria, Canada                    #include "TMVA/Timer.h"         *
00024  *      MPI-K Heidelberg, Germany                                                 *
00025  *      U. of Bonn, Germany                                                       *
00026  *                                                                                *
00027  * Redistribution and use in source and binary forms, with or without             *
00028  * modification, are permitted according to the terms listed in LICENSE           *
00029  * (http://tmva.sourceforge.net/LICENSE)                                          *
00030  **********************************************************************************/
00031 
00032 //_______________________________________________________________________
00033 //
00034 // This class is meant to boost a single classifier. Boosting means    //
00035 // training the classifier a few times. Everytime the wieghts of the   //
00036 // events are modified according to how well the classifier performed  //
00037 // on the test sample.                                                 //
00038 //_______________________________________________________________________
00039 #include <algorithm>
00040 #include <iomanip>
00041 #include <vector>
00042 #include <cmath>
00043 
00044 #include "Riostream.h"
00045 #include "TRandom3.h"
00046 #include "TMath.h"
00047 #include "TObjString.h"
00048 #include "TH1F.h"
00049 #include "TGraph.h"
00050 #include "TSpline.h"
00051 #include "TDirectory.h"
00052 
00053 #include "TMVA/MethodCompositeBase.h"
00054 #include "TMVA/MethodBase.h"
00055 #include "TMVA/MethodBoost.h"
00056 #include "TMVA/MethodCategory.h"
00057 #include "TMVA/Tools.h"
00058 #include "TMVA/ClassifierFactory.h"
00059 #include "TMVA/Timer.h"
00060 #include "TMVA/Types.h"
00061 #include "TMVA/PDF.h"
00062 #include "TMVA/Results.h"
00063 #include "TMVA/Config.h"
00064 
00065 #include "TMVA/SeparationBase.h"
00066 #include "TMVA/GiniIndex.h"
00067 #include "TMVA/RegressionVariance.h"
00068 
00069 REGISTER_METHOD(Boost)
00070 
00071 ClassImp(TMVA::MethodBoost)
00072 
00073 //_______________________________________________________________________
00074 TMVA::MethodBoost::MethodBoost( const TString& jobName,
00075                                 const TString& methodTitle,
00076                                 DataSetInfo& theData,
00077                                 const TString& theOption,
00078                                 TDirectory* theTargetDir ) :
00079    TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption, theTargetDir )
00080    , fBoostNum(0)
00081    , fMethodError(0)
00082    , fOrigMethodError(0)
00083    , fBoostWeight(0)
00084    , fADABoostBeta(0)
00085    , fRandomSeed(0)
00086    , fBoostedMethodTitle(methodTitle)
00087    , fBoostedMethodOptions(theOption)
00088    , fMonitorHist(0)
00089    , fMonitorBoostedMethod(kFALSE)
00090    , fMonitorTree(0)
00091    , fBoostStage(Types::kBoostProcBegin)
00092    , fDefaultHistNum(0)
00093    , fRecalculateMVACut(kFALSE)
00094    , fROC_training(0.0)
00095    , fOverlap_integral(0.0)
00096    , fMVAvalues(0)
00097 {
00098    fMVAvalues = new std::vector<Float_t>;
00099 }
00100 
00101 //_______________________________________________________________________
00102 TMVA::MethodBoost::MethodBoost( DataSetInfo& dsi,
00103                                 const TString& theWeightFile,
00104                                 TDirectory* theTargetDir )
00105    : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile, theTargetDir )
00106    , fBoostNum(0)
00107    , fMethodError(0)
00108    , fOrigMethodError(0)
00109    , fBoostWeight(0)
00110    , fADABoostBeta(0)
00111    , fRandomSeed(0)
00112    , fBoostedMethodTitle("")
00113    , fBoostedMethodOptions("")
00114    , fMonitorHist(0)
00115    , fMonitorBoostedMethod(kFALSE)
00116    , fMonitorTree(0)
00117    , fBoostStage(Types::kBoostProcBegin)
00118    , fDefaultHistNum(0)
00119    , fRecalculateMVACut(kFALSE)
00120    , fROC_training(0.0)
00121    , fOverlap_integral(0.0)
00122    , fMVAvalues(0)
00123 {
00124    fMVAvalues = new std::vector<Float_t>;
00125 }
00126 
00127 //_______________________________________________________________________
00128 TMVA::MethodBoost::~MethodBoost( void )
00129 {
00130    // destructor
00131    fMethodWeight.clear();
00132 
00133    // the histogram themselves are deleted when the file is closed
00134 
00135    if(fMonitorHist) {
00136       for ( std::vector<TH1*>::iterator it = fMonitorHist->begin(); it != fMonitorHist->end(); ++it) delete *it;
00137       delete fMonitorHist;
00138    }
00139    fTrainSigMVAHist.clear();
00140    fTrainBgdMVAHist.clear();
00141    fBTrainSigMVAHist.clear();
00142    fBTrainBgdMVAHist.clear();
00143    fTestSigMVAHist.clear();
00144    fTestBgdMVAHist.clear();
00145 
00146    if (fMVAvalues) {
00147       delete fMVAvalues;
00148       fMVAvalues = 0;
00149    }
00150 }
00151 
00152 
00153 //_______________________________________________________________________
00154 Bool_t TMVA::MethodBoost::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ )
00155 {
00156    // Boost can handle classification with 2 classes and regression with one regression-target
00157    if( type == Types::kClassification && numberClasses == 2 ) return kTRUE;
00158    //   if( type == Types::kRegression && numberTargets == 1 ) return kTRUE;
00159    return kFALSE;
00160 }
00161 
00162 
00163 //_______________________________________________________________________
00164 void TMVA::MethodBoost::DeclareOptions()
00165 {
00166    DeclareOptionRef( fBoostNum = 1, "Boost_Num",
00167                      "Number of times the classifier is boosted");
00168 
00169    DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod",
00170                      "Whether to write monitoring histogram for each boosted classifier");
00171    
00172    DeclareOptionRef(fBoostType  = "AdaBoost", "Boost_Type", "Boosting type for the classifiers");
00173    AddPreDefVal(TString("AdaBoost"));
00174    AddPreDefVal(TString("Bagging"));
00175    AddPreDefVal(TString("HighEdgeGauss"));
00176    AddPreDefVal(TString("HighEdgeCoPara"));
00177 
00178    DeclareOptionRef(fMethodWeightType = "ByError", "Boost_MethodWeightType",
00179                     "How to set the final weight of the boosted classifiers");
00180    AddPreDefVal(TString("ByError"));
00181    AddPreDefVal(TString("Average"));
00182    AddPreDefVal(TString("ByROC"));
00183    AddPreDefVal(TString("ByOverlap"));
00184    AddPreDefVal(TString("LastMethod"));
00185 
00186    DeclareOptionRef(fRecalculateMVACut = kTRUE, "Boost_RecalculateMVACut",
00187                     "Whether to recalculate the classifier MVA Signallike cut at every boost iteration");
00188 
00189    DeclareOptionRef(fADABoostBeta = 1.0, "Boost_AdaBoostBeta",
00190                     "The ADA boost parameter that sets the effect of every boost step on the events' weights");
00191    
00192    DeclareOptionRef(fTransformString = "step", "Boost_Transform",
00193                     "Type of transform applied to every boosted method linear, log, step");
00194    AddPreDefVal(TString("step"));
00195    AddPreDefVal(TString("linear"));
00196    AddPreDefVal(TString("log"));
00197 
00198    DeclareOptionRef(fRandomSeed = 0, "Boost_RandomSeed",
00199                     "Seed for random number generator used for bagging");
00200 
00201    TMVA::MethodCompositeBase::fMethods.reserve(fBoostNum);;
00202 }
00203 
00204 //_______________________________________________________________________
00205 Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption )
00206 {
00207    // just registering the string from which the boosted classifier will be created
00208    fBoostedMethodName = Types::Instance().GetMethodName( theMethod );
00209    fBoostedMethodTitle = methodTitle;
00210    fBoostedMethodOptions = theOption;
00211    return kTRUE;
00212 }
00213 
00214 //_______________________________________________________________________
00215 void TMVA::MethodBoost::Init()
00216 {}
00217 
00218 //_______________________________________________________________________
00219 void TMVA::MethodBoost::InitHistos()
00220 {
00221    // initialisation routine
00222    if(fMonitorHist) {
00223       for ( std::vector<TH1*>::iterator it = fMonitorHist->begin(); it != fMonitorHist->end(); ++it) delete *it;
00224       delete fMonitorHist;
00225    }
00226    fMonitorHist = new std::vector<TH1*>();
00227    fMonitorHist->push_back(new TH1F("MethodWeight","Normalized Classifier Weight",fBoostNum,0,fBoostNum));
00228    fMonitorHist->push_back(new TH1F("BoostWeight","Boost Weight",fBoostNum,0,fBoostNum));
00229    fMonitorHist->push_back(new TH1F("ErrFraction","Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum));
00230    fMonitorHist->push_back(new TH1F("OrigErrFraction","Error Fraction (by original event weights)",fBoostNum,0,fBoostNum));
00231    fMonitorHist->push_back(new TH1F("ROCIntegral_test","ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum));
00232    fMonitorHist->push_back(new TH1F("ROCIntegralBoosted_test","ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum));
00233    fMonitorHist->push_back(new TH1F("ROCIntegral_train","ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum));
00234    fMonitorHist->push_back(new TH1F("ROCIntegralBoosted_train","ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum));
00235    fMonitorHist->push_back(new TH1F("OverlapIntegal_train","Overlap integral (training sample)",fBoostNum,0,fBoostNum));
00236    for ( std::vector<TH1*>::iterator it = fMonitorHist->begin(); it != fMonitorHist->end(); ++it ) (*it)->SetDirectory(0);
00237    fDefaultHistNum = fMonitorHist->size();
00238    (*fMonitorHist)[0]->GetXaxis()->SetTitle("Index of boosted classifier");
00239    (*fMonitorHist)[0]->GetYaxis()->SetTitle("Classifier Weight");
00240    (*fMonitorHist)[1]->GetXaxis()->SetTitle("Index of boosted classifier");
00241    (*fMonitorHist)[1]->GetYaxis()->SetTitle("Boost Weight");
00242    (*fMonitorHist)[2]->GetXaxis()->SetTitle("Index of boosted classifier");
00243    (*fMonitorHist)[2]->GetYaxis()->SetTitle("Error Fraction");
00244    (*fMonitorHist)[3]->GetXaxis()->SetTitle("Index of boosted classifier");
00245    (*fMonitorHist)[3]->GetYaxis()->SetTitle("Error Fraction");
00246    (*fMonitorHist)[4]->GetXaxis()->SetTitle("Index of boosted classifier");
00247    (*fMonitorHist)[4]->GetYaxis()->SetTitle("ROC integral of single classifier");
00248    (*fMonitorHist)[5]->GetXaxis()->SetTitle("Number of boosts");
00249    (*fMonitorHist)[5]->GetYaxis()->SetTitle("ROC integral boosted");
00250    (*fMonitorHist)[6]->GetXaxis()->SetTitle("Index of boosted classifier");
00251    (*fMonitorHist)[6]->GetYaxis()->SetTitle("ROC integral of single classifier");
00252    (*fMonitorHist)[7]->GetXaxis()->SetTitle("Number of boosts");
00253    (*fMonitorHist)[7]->GetYaxis()->SetTitle("ROC integral boosted");
00254    (*fMonitorHist)[8]->GetXaxis()->SetTitle("Index of boosted classifier");
00255    (*fMonitorHist)[8]->GetYaxis()->SetTitle("Overlap integral");
00256 
00257    fMonitorTree= new TTree("MonitorBoost","Boost variables");
00258    fMonitorTree->Branch("iMethod",&fMethodIndex,"iMethod/I");
00259    fMonitorTree->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
00260    fMonitorTree->Branch("errorFraction",&fMethodError,"errorFraction/D");
00261    fMonitorBoostedMethod = kTRUE;
00262 }
00263 
00264 
00265 //_______________________________________________________________________
00266 void TMVA::MethodBoost::CheckSetup()
00267 {
00268    Log() << kDEBUG << "CheckSetup: fBoostType="<<fBoostType<<" fMethodWeightType=" << fMethodWeightType << Endl;
00269    Log() << kDEBUG << "CheckSetup: fADABoostBeta="<<fADABoostBeta<<Endl;
00270    Log() << kDEBUG << "CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
00271    Log() << kDEBUG << "CheckSetup: fMethodError="<<fMethodError<<Endl;
00272    Log() << kDEBUG << "CheckSetup: fOrigMethodError="<<fOrigMethodError<<Endl;
00273    Log() << kDEBUG << "CheckSetup: fBoostNum="<<fBoostNum<< " fMonitorHist="<< fMonitorHist<< Endl;
00274    Log() << kDEBUG << "CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
00275    Log() << kDEBUG << "CheckSetup: fDefaultHistNum=" << fDefaultHistNum << " fRecalculateMVACut=" << (fRecalculateMVACut? "true" : "false") << Endl;
00276    Log() << kDEBUG << "CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
00277    Log() << kDEBUG << "CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
00278    Log() << kDEBUG << "CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod? "true" : "false") << Endl;
00279    Log() << kDEBUG << "CheckSetup: MName=" << fBoostedMethodName << " Title="<< fBoostedMethodTitle<< Endl;
00280    Log() << kDEBUG << "CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
00281    Log() << kDEBUG << "CheckSetup: fBoostStage=" << fBoostStage<<Endl;
00282    Log() << kDEBUG << "CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
00283    Log() << kDEBUG << "CheckSetup: fMethodIndex=" <<fMethodIndex << Endl;
00284    if (fMethods.size()>0) Log() << kDEBUG << "CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
00285    Log() << kDEBUG << "CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
00286    if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
00287    Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl;
00288 
00289    //TMVA::MethodBase::CheckSetup();
00290    if (fMonitorHist == 0){
00291       InitHistos();
00292       CheckSetup();
00293    }
00294 }
00295 //_______________________________________________________________________
00296 void TMVA::MethodBoost::Train()
00297 {
00298    Double_t    AllMethodsWeight=0;
00299    TDirectory* methodDir( 0 );
00300    TString     dirName,dirTitle;
00301    Int_t       StopCounter=0;
00302 
00303    if (Data()->GetNTrainingEvents()==0) Log() << kFATAL << "<Train> Data() has zero events" << Endl;
00304    Data()->SetCurrentType(Types::kTraining);
00305 
00306    if (fMethods.size() > 0) fMethods.clear();
00307    fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
00308 
00309    Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " Classifiers ... patience please" << Endl;
00310    Timer timer( fBoostNum, GetName() );
00311 
00312    ResetBoostWeights();
00313 
00314    // clean boosted method options
00315    CleanBoostOptions();
00316    //
00317    // training and boosting the classifiers
00318    for (fMethodIndex=0;fMethodIndex<fBoostNum;fMethodIndex++) {
00319       // the first classifier shows the option string output, the rest not
00320       if (fMethodIndex>0) TMVA::MsgLogger::InhibitOutput();
00321       IMethod* method = ClassifierFactory::Instance().Create(std::string(fBoostedMethodName),
00322                                                              GetJobName(),
00323                                                              Form("%s_B%04i", fBoostedMethodName.Data(),fMethodIndex),
00324                                                              DataInfo(),
00325                                                              fBoostedMethodOptions);
00326       TMVA::MsgLogger::EnableOutput();
00327 
00328       // supressing the rest of the classifier output the right way
00329       MethodBase *meth = (dynamic_cast<MethodBase*>(method));
00330 
00331       if(meth==0) continue;
00332 
00333       // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
00334       if( meth->GetMethodType() == Types::kCategory ){ // DSMTEST
00335          MethodCategory *methCat = (dynamic_cast<MethodCategory*>(meth)); // DSMTEST
00336          if( !methCat ) // DSMTEST
00337             Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl; // DSMTEST
00338          methCat->fDataSetManager = fDataSetManager; // DSMTEST
00339       } // DSMTEST
00340 
00341 
00342       meth->SetMsgType(kWARNING);
00343       meth->SetupMethod();
00344       meth->ParseOptions();
00345       // put SetAnalysisType here for the needs of MLP
00346       meth->SetAnalysisType( GetAnalysisType() );
00347       meth->ProcessSetup();
00348       meth->CheckSetup();
00349 
00350       // creating the directory of the classifier
00351       if (fMonitorBoostedMethod)
00352          {
00353             methodDir=MethodBaseDir()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fMethodIndex));
00354             if (methodDir==0)
00355                methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fMethodIndex));
00356             MethodBase* m = dynamic_cast<MethodBase*>(method);
00357             if(m) {
00358                m->SetMethodDir(methodDir);
00359                m->BaseDir()->cd();
00360             }
00361          }
00362 
00363       // training
00364       TMVA::MethodCompositeBase::fMethods.push_back(method);
00365       timer.DrawProgressBar( fMethodIndex );
00366       if (fMethodIndex==0) method->MonitorBoost(SetStage(Types::kBoostProcBegin));
00367       method->MonitorBoost(SetStage(Types::kBeforeTraining));
00368       TMVA::MsgLogger::InhibitOutput(); //supressing Logger outside the method
00369       SingleTrain();
00370       TMVA::MsgLogger::EnableOutput();
00371       method->WriteMonitoringHistosToFile();
00372       
00373       // calculate MVA values of method on training sample
00374       CalcMVAValues();
00375       
00376       if (fMethodIndex==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
00377       
00378       // get ROC integral and overlap integral for single method on
00379       // training sample
00380       fROC_training = GetBoostROCIntegral(kTRUE, Types::kTraining, kTRUE);
00381          
00382       // calculate method weight
00383       CalcMethodWeight();
00384       AllMethodsWeight += fMethodWeight.back();
00385 
00386       (*fMonitorHist)[4]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kTRUE, Types::kTesting));
00387       (*fMonitorHist)[5]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
00388       (*fMonitorHist)[6]->SetBinContent(fMethodIndex+1, fROC_training);
00389       (*fMonitorHist)[7]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
00390       (*fMonitorHist)[8]->SetBinContent(fMethodIndex+1, fOverlap_integral);
00391 
00392       // boosting (reweight training sample)
00393       method->MonitorBoost(SetStage(Types::kBeforeBoosting));
00394       SingleBoost();
00395       method->MonitorBoost(SetStage(Types::kAfterBoosting));
00396       (*fMonitorHist)[1]->SetBinContent(fMethodIndex+1,fBoostWeight);
00397       (*fMonitorHist)[2]->SetBinContent(fMethodIndex+1,fMethodError);
00398       (*fMonitorHist)[3]->SetBinContent(fMethodIndex+1,fOrigMethodError);
00399 
00400       fMonitorTree->Fill();
00401 
00402       // stop boosting if needed when error has reached 0.5
00403       // thought of counting a few steps, but it doesn't seem to be necessary
00404       Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl;
00405       if (fMethodError > 0.49999) StopCounter++; 
00406       if (StopCounter > 0 && fBoostType != "Bagging")
00407          {
00408             timer.DrawProgressBar( fBoostNum );
00409             fBoostNum = fMethodIndex+1; 
00410             Log() << kINFO << "Error rate has reached 0.5, boosting process stopped at #" << fBoostNum << " classifier" << Endl;
00411             if (fBoostNum < 5)
00412                Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fADABoostBeta << ", try reducing it." <<Endl;
00413             for (Int_t i=0;i<fDefaultHistNum;i++)
00414                (*fMonitorHist)[i]->SetBins(fBoostNum,0,fBoostNum);
00415             break;
00416          }
00417    }
00418    if (fMethodWeightType == "LastMethod") { fMethodWeight.back() = AllMethodsWeight = 1.0; }
00419 
00420    ResetBoostWeights();
00421    Timer* timer1=new Timer();
00422    // normalizing the weights of the classifiers
00423    for (fMethodIndex=0;fMethodIndex<fBoostNum;fMethodIndex++) {
00424       // pefroming post-boosting actions
00425       if (fMethods[fMethodIndex]->MonitorBoost(SetStage(Types::kBoostValidation))) {
00426          if (fMethodIndex==0) timer1 = new Timer( fBoostNum, GetName() );
00427 
00428          timer1->DrawProgressBar( fMethodIndex );
00429 
00430          if (fMethodIndex==fBoostNum) {
00431             Log() << kINFO << "Elapsed time: " << timer1->GetElapsedTime() 
00432                   << "                              " << Endl;
00433          }
00434       }
00435 
00436       if (AllMethodsWeight != 0.0)
00437          fMethodWeight[fMethodIndex] = fMethodWeight[fMethodIndex] / AllMethodsWeight;
00438       (*fMonitorHist)[0]->SetBinContent(fMethodIndex+1,fMethodWeight[fMethodIndex]);
00439    }
00440 
00441    // Ensure that in case of only 1 boost the method weight equals
00442    // 1.0.  This avoids unexpected behaviour in case of very bad
00443    // classifiers which have fBoostWeight=1 or fMethodError=0.5,
00444    // because their weight would be set to zero.  This behaviour is
00445    // not ok if one boosts just one time.
00446    if (fMethods.size()==1)  fMethodWeight[0] = 1.0;
00447 
00448    fMethods.back()->MonitorBoost(SetStage(Types::kBoostProcEnd));
00449 
00450    delete timer1;
00451 }
00452 
00453 //_______________________________________________________________________
00454 void TMVA::MethodBoost::CleanBoostOptions()
00455 {
00456    fBoostedMethodOptions=GetOptions(); 
00457 }
00458 
00459 //_______________________________________________________________________
00460 void TMVA::MethodBoost::CreateMVAHistorgrams()
00461 {
00462    if (fBoostNum <=0) Log() << kFATAL << "CreateHistorgrams called before fBoostNum is initialized" << Endl;
00463    // calculating histograms boundries and creating histograms..
00464    // nrms = number of rms around the average to use for outline (of the 0 classifier)
00465    Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
00466    Int_t signalClass = 0;
00467    if (DataInfo().GetClassInfo("Signal") != 0) {
00468       signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
00469    }
00470    gTools().ComputeStat( Data()->GetEventCollection(), fMVAvalues,
00471                          meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
00472 
00473    fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
00474    xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
00475    xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
00476 
00477    // creating all the historgrams
00478    for (Int_t imtd=0; imtd<fBoostNum; imtd++) {
00479       fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) );
00480       fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i",imtd), "MVA_Train_B", fNbins, xmin, xmax ) );
00481       fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
00482       fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
00483       fTestSigMVAHist  .push_back( new TH1F( Form("MVA_Test_S%04i",imtd), "MVA_Test_S", fNbins, xmin, xmax ) );
00484       fTestBgdMVAHist  .push_back( new TH1F( Form("MVA_Test_B%04i",imtd), "MVA_Test_B", fNbins, xmin, xmax ) );
00485    }
00486 }
00487 
00488 //_______________________________________________________________________
00489 void TMVA::MethodBoost::ResetBoostWeights()
00490 {
00491    // resetting back the boosted weights of the events to 1
00492    for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00493       Event *ev = Data()->GetEvent(ievt);
00494       ev->SetBoostWeight( 1.0 );
00495    }
00496 }
00497 
00498 //_______________________________________________________________________
00499 void TMVA::MethodBoost::WriteMonitoringHistosToFile( void ) const
00500 {
00501    TDirectory* dir=0;
00502    if (fMonitorBoostedMethod) {
00503       for (Int_t imtd=0;imtd<fBoostNum;imtd++) {
00504 
00505          //writing the histograms in the specific classifier's directory
00506          MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]);
00507          if(!m) continue;
00508          dir = m->BaseDir();
00509          dir->cd();
00510          fTrainSigMVAHist[imtd]->SetDirectory(dir);
00511          fTrainSigMVAHist[imtd]->Write();
00512          fTrainBgdMVAHist[imtd]->SetDirectory(dir);
00513          fTrainBgdMVAHist[imtd]->Write();
00514          fBTrainSigMVAHist[imtd]->SetDirectory(dir);
00515          fBTrainSigMVAHist[imtd]->Write();
00516          fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
00517          fBTrainBgdMVAHist[imtd]->Write();
00518       }
00519    }
00520 
00521    // going back to the original folder
00522    BaseDir()->cd();
00523    for (UInt_t i=0;i<fMonitorHist->size();i++) {
00524       ((*fMonitorHist)[i])->Write(Form("Booster_%s",((*fMonitorHist)[i])->GetName()));
00525    }
00526 
00527    fMonitorTree->Write();
00528 }
00529 
00530 //_______________________________________________________________________
00531 void TMVA::MethodBoost::TestClassification()
00532 {
00533    MethodBase::TestClassification();
00534    if (fMonitorBoostedMethod) {
00535       UInt_t nloop = fTestSigMVAHist.size();
00536       if (fMethods.size()<nloop) nloop = fMethods.size();
00537       //running over all the events and populating the test MVA histograms
00538       Data()->SetCurrentType(Types::kTesting);
00539       for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00540          Event* ev = Data()->GetEvent(ievt);
00541          Float_t w = ev->GetWeight();
00542          if (DataInfo().IsSignal(ev)) {
00543             for (UInt_t imtd=0; imtd<nloop; imtd++) {
00544                fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
00545             }
00546          }
00547          else {
00548             for (UInt_t imtd=0; imtd<nloop; imtd++) {
00549                fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
00550             }
00551          }
00552       }
00553       Data()->SetCurrentType(Types::kTraining);
00554    }
00555 }
00556 
00557 //_______________________________________________________________________
00558 void TMVA::MethodBoost::WriteEvaluationHistosToFile(Types::ETreeType treetype)
00559 {
00560    MethodBase::WriteEvaluationHistosToFile(treetype);
00561    if(treetype==Types::kTraining) return;
00562    UInt_t nloop = fTestSigMVAHist.size();
00563    if (fMethods.size()<nloop) nloop = fMethods.size();
00564    if (fMonitorBoostedMethod) {
00565       TDirectory* dir=0;
00566       for (UInt_t imtd=0;imtd<nloop;imtd++) {
00567          //writing the histograms in the specific classifier's directory
00568          MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]);
00569          if(!mva) continue;
00570          dir = mva->BaseDir();
00571          if(dir==0) continue;
00572          dir->cd();
00573          fTestSigMVAHist[imtd]->SetDirectory(dir);
00574          fTestSigMVAHist[imtd]->Write();
00575          fTestBgdMVAHist[imtd]->SetDirectory(dir);
00576          fTestBgdMVAHist[imtd]->Write();
00577       }
00578    }
00579 }
00580 
00581 //_______________________________________________________________________
00582 void TMVA::MethodBoost::ProcessOptions()
00583 {
00584    // process user options
00585 }
00586 
00587 //_______________________________________________________________________
00588 void TMVA::MethodBoost::SingleTrain()
00589 {
00590    // initialization
00591    Data()->SetCurrentType(Types::kTraining);
00592    MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod());
00593    if(meth)
00594       meth->TrainMethod();
00595 }
00596 
00597 //_______________________________________________________________________
00598 void TMVA::MethodBoost::FindMVACut()
00599 {
00600    // find the CUT on the individual MVA that defines an event as 
00601    // correct or misclassified (to be used in the boosting process)
00602 
00603    MethodBase* lastMethod=dynamic_cast<MethodBase*>(fMethods.back());
00604    if (!lastMethod || lastMethod->GetMethodType() == Types::kDT ){ return;}
00605 
00606    if (!fRecalculateMVACut && fMethodIndex>0) {
00607       MethodBase* m = dynamic_cast<MethodBase*>(fMethods[0]);
00608       if(m)
00609          lastMethod->SetSignalReferenceCut(m->GetSignalReferenceCut());
00610    } else {
00611 
00612       // creating a fine histograms containing the error rate
00613       const Int_t nValBins=1000;
00614       Double_t* err=new Double_t[nValBins];
00615       const Double_t valmin=-1.5;
00616       const Double_t valmax=1.5;
00617       for (Int_t i=0;i<nValBins;i++) err[i]=0.;
00618       Double_t sum = 0.;
00619       for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00620          Double_t weight = GetEvent(ievt)->GetWeight();
00621          sum +=weight;
00622          Double_t val=lastMethod->GetMvaValue();
00623          Int_t ibin = (Int_t) (((val-valmin)/(valmax-valmin))*nValBins);
00624          
00625          if (ibin>=nValBins) ibin = nValBins-1;
00626          if (ibin<0) ibin = 0;
00627          if (DataInfo().IsSignal(Data()->GetEvent(ievt))){
00628             for (Int_t i=ibin;i<nValBins;i++) err[i]+=weight;
00629          }
00630          else {
00631             for (Int_t i=0;i<ibin;i++) err[i]+=weight;
00632          }
00633       }
00634       Double_t minerr=1.e6;
00635       Int_t minbin=-1;
00636       for (Int_t i=0;i<nValBins;i++){
00637          if (err[i]<=minerr){
00638             minerr=err[i];
00639             minbin=i;
00640          }
00641       }
00642       delete[] err;
00643       
00644       
00645       Double_t sigCutVal = valmin + ((valmax-valmin)*minbin)/Float_t(nValBins+1);
00646       lastMethod->SetSignalReferenceCut(sigCutVal);
00647       
00648       Log() << kDEBUG << "(old step) Setting method cut to " <<lastMethod->GetSignalReferenceCut()<< Endl;
00649       
00650    }
00651    
00652 }
00653 
00654 //_______________________________________________________________________
00655 void TMVA::MethodBoost::SingleBoost()
00656 {
00657    MethodBase* method =  dynamic_cast<MethodBase*>(fMethods.back());
00658    if(!method) return;
00659    Event * ev; Float_t w,v,wo; Bool_t sig=kTRUE;
00660    Double_t sumAll=0, sumWrong=0, sumAllOrig=0, sumWrongOrig=0, sumAll1=0;
00661    Bool_t* WrongDetection=new Bool_t[Data()->GetNEvents()];
00662    for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
00663 
00664    // finding the wrong events and calculating their total weights
00665    for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00666       ev = Data()->GetEvent(ievt);
00667       sig=DataInfo().IsSignal(ev);
00668       v = fMVAvalues->at(ievt);
00669       w = ev->GetWeight();
00670       wo = ev->GetOriginalWeight();
00671       if (sig && fMonitorBoostedMethod) {
00672          fBTrainSigMVAHist[fMethodIndex]->Fill(v,w);
00673          fTrainSigMVAHist[fMethodIndex]->Fill(v,ev->GetOriginalWeight());
00674       }
00675       else if (fMonitorBoostedMethod) {
00676          fBTrainBgdMVAHist[fMethodIndex]->Fill(v,w);
00677          fTrainBgdMVAHist[fMethodIndex]->Fill(v,ev->GetOriginalWeight());
00678       }
00679       sumAll += w;
00680       sumAllOrig += wo;
00681       if ( sig != (fMVAvalues->at(ievt) > method->GetSignalReferenceCut()) ) {
00682          WrongDetection[ievt]=kTRUE; 
00683          sumWrong+=w; 
00684          sumWrongOrig+=wo;
00685       }
00686       else WrongDetection[ievt]=kFALSE;
00687    }
00688    fMethodError=sumWrong/sumAll;
00689    fOrigMethodError = sumWrongOrig/sumAllOrig;
00690    Log() << kDEBUG << "AdaBoost err (MethodErr1)= " << fMethodError<<" = wrong/all: " << sumWrong << "/" << sumAll<< " cut="<<method->GetSignalReferenceCut()<< Endl;
00691 
00692    // calculating the fMethodError and the fBoostWeight out of it uses the formula 
00693    // w = ((1-err)/err)^beta
00694    if (fMethodError>0 && fADABoostBeta == 1.0) {
00695       fBoostWeight = (1.0-fMethodError)/fMethodError;
00696    }
00697    else if (fMethodError>0 && fADABoostBeta != 1.0) {
00698       fBoostWeight =  TMath::Power((1.0 - fMethodError)/fMethodError, fADABoostBeta);
00699    }
00700    else fBoostWeight = 1000;
00701 
00702    Double_t alphaWeight = ( fBoostWeight > 0.0 ? TMath::Log(fBoostWeight) : 0.0);
00703    if (alphaWeight>5.) alphaWeight = 5.;
00704    if (alphaWeight<0.){
00705       //Log()<<kWARNING<<"alphaWeight is too small in AdaBoost alpha=" << alphaWeight<< Endl;
00706       alphaWeight = -alphaWeight;
00707    }
00708    if (fBoostType == "AdaBoost") {
00709       // ADA boosting, rescaling the weight of the wrong events according to the error level
00710       // over the entire test sample rescaling all the weights to have the same sum, but without
00711       // touching the original weights (changing only the boosted weight of all the events)
00712       // first reweight
00713       Double_t newSum=0., oldSum=0.;
00714       for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00715          ev =  Data()->GetEvent(ievt);
00716          oldSum += ev->GetWeight();
00717          //         ev->ScaleBoostWeight(TMath::Exp(-alphaWeight*((WrongDetection[ievt])? -1.0 : 1.0)));
00718          //ev->ScaleBoostWeight(TMath::Exp(-alphaWeight*((WrongDetection[ievt])? -1.0 : 0)));
00719          if (WrongDetection[ievt]) ev->ScaleBoostWeight(fBoostWeight);
00720          newSum += ev->GetWeight();
00721       }
00722 
00723       Double_t normWeight = oldSum/newSum;
00724       // bla      std::cout << "Normalize weight by (Boost)" << normWeight <<  " = " << oldSum<<"/"<<newSum<< " eventBoostFactor="<<fBoostWeight<<std::endl;
00725       // next normalize the weights
00726       for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00727          Data()->GetEvent(ievt)->ScaleBoostWeight(normWeight);
00728       }
00729 
00730    }
00731    else if (fBoostType == "Bagging") {
00732       // Bagging or Bootstrap boosting, gives new random weight for every event
00733       TRandom3*trandom   = new TRandom3(fRandomSeed+fMethods.size());
00734       for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00735          ev = Data()->GetEvent(ievt);
00736          ev->SetBoostWeight(trandom->Rndm());
00737          sumAll1+=ev->GetWeight();
00738       }
00739       // rescaling all the weights to have the same sum, but without touching the original
00740       // weights (changing only the boosted weight of all the events)
00741       Double_t Factor=sumAll/sumAll1;
00742       for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00743          ev = Data()->GetEvent(ievt);
00744          ev->ScaleBoostWeight(Factor);
00745       }
00746    }
00747    else if (fBoostType == "HighEdgeGauss" || 
00748             fBoostType == "HighEdgeCoPara") {
00749       // Give events high boost weight, which are close of far away
00750       // from the MVA cut value
00751       Double_t MVACutValue = method->GetSignalReferenceCut();
00752       sumAll1 = 0;
00753       for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00754          ev = Data()->GetEvent(ievt);
00755          if (fBoostType == "HighEdgeGauss")
00756             ev->SetBoostWeight( TMath::Exp( -std::pow(fMVAvalues->at(ievt)-MVACutValue,2)/(0.1*fADABoostBeta) ) );
00757          else if (fBoostType == "HighEdgeCoPara")
00758             ev->SetBoostWeight( DataInfo().IsSignal(ev) ? TMath::Power(1.0-fMVAvalues->at(ievt),fADABoostBeta) : TMath::Power(fMVAvalues->at(ievt),fADABoostBeta) );
00759          else
00760             Log() << kFATAL << "Unknown event weight type!" << Endl;
00761 
00762          sumAll1 += ev->GetWeight();
00763       }
00764       // rescaling all the weights to have the same sum, but without
00765       // touching the original weights (changing only the boosted
00766       // weight of all the events)
00767       Double_t Factor=sumAll/sumAll1;
00768       for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++)
00769          Data()->GetEvent(ievt)->ScaleBoostWeight(Factor);
00770    }
00771    delete[] WrongDetection;
00772 }
00773 
00774 //_______________________________________________________________________
00775 void TMVA::MethodBoost::CalcMethodWeight()
00776 {
00777    // Calculate weight of single method.
00778    // This is no longer done in SingleBoost();
00779 
00780    MethodBase* method =  dynamic_cast<MethodBase*>(fMethods.back());
00781    if (!method) {
00782       Log() << kFATAL << "Dynamic cast to MethodBase* failed" <<Endl;
00783       return;
00784    }
00785 
00786    Event * ev; Float_t w;
00787    Double_t sumAll=0, sumWrong=0;
00788 
00789    // finding the MVA cut value for IsSignalLike, stored in the method
00790    FindMVACut();
00791 
00792    // finding the wrong events and calculating their total weights
00793    for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00794       ev      = Data()->GetEvent(ievt);
00795       w       = ev->GetWeight();
00796       sumAll += w;
00797       if ( DataInfo().IsSignal(ev) != 
00798            (fMVAvalues->at(ievt) > method->GetSignalReferenceCut()) )
00799          sumWrong += w;
00800    }
00801    fMethodError=sumWrong/sumAll;
00802 
00803    // calculating the fMethodError and the fBoostWeight out of it uses
00804    // the formula
00805    // w = ((1-err)/err)^beta
00806    if (fMethodError>0 && fADABoostBeta == 1.0) {
00807       fBoostWeight = (1.0-fMethodError)/fMethodError;
00808    }
00809    else if (fMethodError>0 && fADABoostBeta != 1.0) {
00810       fBoostWeight =  TMath::Power((1.0 - fMethodError)/fMethodError, fADABoostBeta);
00811    }
00812    else fBoostWeight = 1000;
00813 
00814    // sanity check to avoid log() with negative argument
00815    if (fBoostWeight <= 0.0)  fBoostWeight = 1.0;
00816    
00817    // calculate method weight
00818    if      (fMethodWeightType == "ByError") fMethodWeight.push_back(TMath::Log(fBoostWeight));
00819    else if (fMethodWeightType == "Average") fMethodWeight.push_back(1.0);
00820    else if (fMethodWeightType == "ByROC")   fMethodWeight.push_back(fROC_training);
00821    else if (fMethodWeightType == "ByOverlap") fMethodWeight.push_back((fOverlap_integral > 0.0 ? 1.0/fOverlap_integral : 1000.0));
00822    else                                     fMethodWeight.push_back(0);
00823 }
00824 
00825 //_______________________________________________________________________
00826 void TMVA::MethodBoost::GetHelpMessage() const
00827 {
00828    // Get help message text
00829    //
00830    // typical length of text line:
00831    //         "|--------------------------------------------------------------|"
00832    Log() << Endl;
00833    Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
00834    Log() << Endl;
00835    Log() << "This method combines several classifier of one species in a "<<Endl;
00836    Log() << "single multivariate quantity via the boost algorithm." << Endl;
00837    Log() << "the output is a weighted sum over all individual classifiers" <<Endl;
00838    Log() << "By default, the AdaBoost method is employed, which gives " << Endl;
00839    Log() << "events that were misclassified in the previous tree a larger " << Endl;
00840    Log() << "weight in the training of the following classifier."<<Endl;
00841    Log() << "Optionally, Bagged boosting can also be applied." << Endl;
00842    Log() << Endl;
00843    Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
00844    Log() << Endl;
00845    Log() << "The most important parameter in the configuration is the "<<Endl;
00846    Log() << "number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
00847    Log() << "(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
00848    Log() << "AdaBoosting: The most important parameters in this configuration" <<Endl;
00849    Log() << "is the beta parameter (Boost_AdaBoostBeta)  " << Endl;
00850    Log() << "When boosting a linear classifier, it is sometimes advantageous"<<Endl; 
00851    Log() << "to transform the MVA output non-linearly. The following options" <<Endl;
00852    Log() << "are available: step, log, and minmax, the default is no transform."<<Endl;
00853    Log() <<Endl;
00854    Log() << "Some classifiers are hard to boost and do not improve much in"<<Endl; 
00855    Log() << "their performance by boosting them, some even slightly deteriorate"<< Endl;
00856    Log() << "due to the boosting." <<Endl;
00857    Log() << "The booking of the boost method is special since it requires"<<Endl;
00858    Log() << "the booing of the method to be boosted and the boost itself."<<Endl;
00859    Log() << "This is solved by booking the method to be boosted and to add"<<Endl;
00860    Log() << "all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
00861    Log() << "options string. The factory separates the options and initiates"<<Endl;
00862    Log() << "the boost process. The TMVA macro directory contains the example"<<Endl;
00863    Log() << "macro \"Boost.C\"" <<Endl;
00864 }
00865 
00866 //_______________________________________________________________________
00867 const TMVA::Ranking* TMVA::MethodBoost::CreateRanking()
00868 { 
00869    return 0;
00870 }
00871 
00872 //_______________________________________________________________________
00873 Double_t TMVA::MethodBoost::GetMvaValue( Double_t* err, Double_t* errUpper )
00874 {
00875    // return boosted MVA response
00876    Double_t mvaValue = 0;
00877    Double_t epsilon = TMath::Exp(-1.);
00878    //Double_t fact    = TMath::Exp(-1.)+TMath::Exp(1.);
00879    for (UInt_t i=0;i< fMethods.size(); i++){
00880       MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]);
00881       if(m==0) continue;
00882       Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
00883       Double_t sigcut = m->GetSignalReferenceCut();
00884       // default is no transform
00885       if (fTransformString == "linear"){
00886 
00887       }
00888       else if (fTransformString == "log"){
00889          if (val < sigcut) val = sigcut;
00890 
00891          val = TMath::Log((val-sigcut)+epsilon);
00892       }
00893       else if (fTransformString == "step" ){
00894          if (val < sigcut) val = -1.;
00895          else val = 1.;
00896       }
00897       else {
00898          Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl;
00899       }
00900       mvaValue+=val*fMethodWeight[i];
00901    }
00902    // cannot determine error
00903    NoErrorCalc(err, errUpper);
00904 
00905    return mvaValue;
00906 }
00907 
00908 //_______________________________________________________________________
00909 Double_t TMVA::MethodBoost::GetBoostROCIntegral(Bool_t singleMethod, Types::ETreeType eTT, Bool_t CalcOverlapIntergral)
00910 {
00911    // Calculate the ROC integral of a single classifier or even the
00912    // whole boosted classifier.  The tree type (training or testing
00913    // sample) is specified by 'eTT'.
00914    //
00915    // If tree type kTraining is set, the original training sample is
00916    // used to compute the ROC integral (original weights).
00917    //
00918    // - singleMethod - if kTRUE, return ROC integral of single (last
00919    //                  trained) classifier; if kFALSE, return ROC
00920    //                  integral of full classifier
00921    //
00922    // - eTT - tree type (Types::kTraining / Types::kTesting)
00923    //
00924    // - CalcOverlapIntergral - if kTRUE, the overlap integral of the
00925    //                          signal/background MVA distributions
00926    //                          is calculated and stored in
00927    //                          'fOverlap_integral'
00928 
00929    // set data sample training / testing
00930    Data()->SetCurrentType(eTT);
00931 
00932    MethodBase* method = singleMethod ? dynamic_cast<MethodBase*>(fMethods.back()) : 0; // ToDo CoVerity flags this line as there is no prtection against a zero-pointer delivered by dynamic_cast
00933    // to make CoVerity happy (although, OF COURSE, the last method in the commitee
00934    // has to be also of type MethodBase as ANY method is... hence the dynamic_cast
00935    // will never by "zero" ...
00936    if (singleMethod && !method) {
00937       Log() << kFATAL << " What do you do? Your method:"
00938             << fMethods.back()->GetName() 
00939             << " seems not to be a propper TMVA method" 
00940             << Endl;
00941       std::exit(1);
00942    }
00943    Double_t err = 0.0;
00944 
00945    // temporary renormalize the method weights in case of evaluation
00946    // of full classifier.
00947    // save the old normalization of the methods
00948    std::vector<Double_t> OldMethodWeight(fMethodWeight);
00949    if (!singleMethod) {
00950       // calculate sum of weights of all methods
00951       Double_t AllMethodsWeight = 0;
00952       for (Int_t i=0; i<=fMethodIndex; i++)
00953          AllMethodsWeight += fMethodWeight.at(i);
00954       // normalize the weights of the classifiers
00955       if (fMethodWeightType == "LastMethod")
00956          fMethodWeight.back() = AllMethodsWeight = 1.0;
00957       if (AllMethodsWeight != 0.0) {
00958          for (Int_t i=0; i<=fMethodIndex; i++)
00959             fMethodWeight[i] /= AllMethodsWeight;
00960       }
00961    }
00962 
00963    // calculate MVA values
00964    Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
00965    std::vector <Float_t>* mvaRes;
00966    if (singleMethod && eTT==Types::kTraining)
00967       mvaRes = fMVAvalues; // values already calculated
00968    else {  
00969       mvaRes = new std::vector <Float_t>(Data()->GetNEvents());
00970       for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00971          Data()->GetEvent(ievt);
00972          (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
00973       }
00974    }
00975 
00976    // restore the method weights
00977    if (!singleMethod)
00978       fMethodWeight = OldMethodWeight;
00979 
00980    // now create histograms for calculation of the ROC integral
00981    Int_t signalClass = 0;
00982    if (DataInfo().GetClassInfo("Signal") != 0) {
00983       signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
00984    }
00985    gTools().ComputeStat( Data()->GetEventCollection(eTT), mvaRes,
00986                          meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
00987 
00988    fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
00989    xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
00990    xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
00991 
00992    // calculate ROC integral
00993    TH1* mva_s = new TH1F( "MVA_S", "MVA_S", fNbins, xmin, xmax );
00994    TH1* mva_b = new TH1F( "MVA_B", "MVA_B", fNbins, xmin, xmax );
00995    TH1 *mva_s_overlap=0, *mva_b_overlap=0;
00996    if (CalcOverlapIntergral) {
00997       mva_s_overlap = new TH1F( "MVA_S_OVERLAP", "MVA_S_OVERLAP", fNbins, xmin, xmax );
00998       mva_b_overlap = new TH1F( "MVA_B_OVERLAP", "MVA_B_OVERLAP", fNbins, xmin, xmax );
00999    }
01000    for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
01001       const Event* ev = GetEvent(ievt);
01002       Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
01003       if (DataInfo().IsSignal(ev))  mva_s->Fill( (*mvaRes)[ievt], w );
01004       else                          mva_b->Fill( (*mvaRes)[ievt], w );
01005 
01006       if (CalcOverlapIntergral) {
01007          Float_t w_ov = ev->GetWeight();
01008          if (DataInfo().IsSignal(ev))  
01009             mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
01010          else
01011             mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
01012       }
01013    }
01014    gTools().NormHist( mva_s );
01015    gTools().NormHist( mva_b );
01016    PDF *fS = new PDF( "PDF Sig", mva_s, PDF::kSpline2 );
01017    PDF *fB = new PDF( "PDF Bkg", mva_b, PDF::kSpline2 );
01018 
01019    // calculate ROC integral from fS, fB
01020    Double_t ROC = MethodBase::GetROCIntegral(fS, fB);
01021    
01022    // calculate overlap integral
01023    if (CalcOverlapIntergral) {
01024       gTools().NormHist( mva_s_overlap );
01025       gTools().NormHist( mva_b_overlap );
01026 
01027       fOverlap_integral = 0.0;
01028       for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
01029          Double_t bc_s = mva_s_overlap->GetBinContent(bin);
01030          Double_t bc_b = mva_b_overlap->GetBinContent(bin);
01031          if (bc_s > 0.0 && bc_b > 0.0)
01032             fOverlap_integral += TMath::Min(bc_s, bc_b);
01033       }
01034 
01035       delete mva_s_overlap;
01036       delete mva_b_overlap;
01037    }
01038 
01039    delete mva_s;
01040    delete mva_b;
01041    delete fS;
01042    delete fB;
01043    if (!(singleMethod && eTT==Types::kTraining))  delete mvaRes;
01044 
01045    Data()->SetCurrentType(Types::kTraining);
01046 
01047    return ROC;
01048 }
01049 
01050 void TMVA::MethodBoost::CalcMVAValues()
01051 {
01052    // Calculate MVA values of current method fMethods.back() on
01053    // training sample
01054 
01055    Data()->SetCurrentType(Types::kTraining);
01056    MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
01057    if (!method) {
01058       Log() << kFATAL << "dynamic cast to MethodBase* failed" <<Endl;
01059       return;
01060    }
01061    // calculate MVA values
01062    for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
01063       Data()->GetEvent(ievt);
01064       fMVAvalues->at(ievt) = method->GetMvaValue();
01065    }
01066 }
01067 

Generated on Tue Jul 5 15:25:01 2011 for ROOT_528-00b_version by  doxygen 1.5.1