00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #include <algorithm>
00040 #include <iomanip>
00041 #include <vector>
00042 #include <cmath>
00043
00044 #include "Riostream.h"
00045 #include "TRandom3.h"
00046 #include "TMath.h"
00047 #include "TObjString.h"
00048 #include "TH1F.h"
00049 #include "TGraph.h"
00050 #include "TSpline.h"
00051 #include "TDirectory.h"
00052
00053 #include "TMVA/MethodCompositeBase.h"
00054 #include "TMVA/MethodBase.h"
00055 #include "TMVA/MethodBoost.h"
00056 #include "TMVA/MethodCategory.h"
00057 #include "TMVA/Tools.h"
00058 #include "TMVA/ClassifierFactory.h"
00059 #include "TMVA/Timer.h"
00060 #include "TMVA/Types.h"
00061 #include "TMVA/PDF.h"
00062 #include "TMVA/Results.h"
00063 #include "TMVA/Config.h"
00064
00065 #include "TMVA/SeparationBase.h"
00066 #include "TMVA/GiniIndex.h"
00067 #include "TMVA/RegressionVariance.h"
00068
00069 REGISTER_METHOD(Boost)
00070
00071 ClassImp(TMVA::MethodBoost)
00072
00073
00074 TMVA::MethodBoost::MethodBoost( const TString& jobName,
00075 const TString& methodTitle,
00076 DataSetInfo& theData,
00077 const TString& theOption,
00078 TDirectory* theTargetDir ) :
00079 TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption, theTargetDir )
00080 , fBoostNum(0)
00081 , fMethodError(0)
00082 , fOrigMethodError(0)
00083 , fBoostWeight(0)
00084 , fADABoostBeta(0)
00085 , fRandomSeed(0)
00086 , fBoostedMethodTitle(methodTitle)
00087 , fBoostedMethodOptions(theOption)
00088 , fMonitorHist(0)
00089 , fMonitorBoostedMethod(kFALSE)
00090 , fMonitorTree(0)
00091 , fBoostStage(Types::kBoostProcBegin)
00092 , fDefaultHistNum(0)
00093 , fRecalculateMVACut(kFALSE)
00094 , fROC_training(0.0)
00095 , fOverlap_integral(0.0)
00096 , fMVAvalues(0)
00097 {
00098 fMVAvalues = new std::vector<Float_t>;
00099 }
00100
00101
00102 TMVA::MethodBoost::MethodBoost( DataSetInfo& dsi,
00103 const TString& theWeightFile,
00104 TDirectory* theTargetDir )
00105 : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile, theTargetDir )
00106 , fBoostNum(0)
00107 , fMethodError(0)
00108 , fOrigMethodError(0)
00109 , fBoostWeight(0)
00110 , fADABoostBeta(0)
00111 , fRandomSeed(0)
00112 , fBoostedMethodTitle("")
00113 , fBoostedMethodOptions("")
00114 , fMonitorHist(0)
00115 , fMonitorBoostedMethod(kFALSE)
00116 , fMonitorTree(0)
00117 , fBoostStage(Types::kBoostProcBegin)
00118 , fDefaultHistNum(0)
00119 , fRecalculateMVACut(kFALSE)
00120 , fROC_training(0.0)
00121 , fOverlap_integral(0.0)
00122 , fMVAvalues(0)
00123 {
00124 fMVAvalues = new std::vector<Float_t>;
00125 }
00126
00127
00128 TMVA::MethodBoost::~MethodBoost( void )
00129 {
00130
00131 fMethodWeight.clear();
00132
00133
00134
00135 if(fMonitorHist) {
00136 for ( std::vector<TH1*>::iterator it = fMonitorHist->begin(); it != fMonitorHist->end(); ++it) delete *it;
00137 delete fMonitorHist;
00138 }
00139 fTrainSigMVAHist.clear();
00140 fTrainBgdMVAHist.clear();
00141 fBTrainSigMVAHist.clear();
00142 fBTrainBgdMVAHist.clear();
00143 fTestSigMVAHist.clear();
00144 fTestBgdMVAHist.clear();
00145
00146 if (fMVAvalues) {
00147 delete fMVAvalues;
00148 fMVAvalues = 0;
00149 }
00150 }
00151
00152
00153
00154 Bool_t TMVA::MethodBoost::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
00155 {
00156
00157 if( type == Types::kClassification && numberClasses == 2 ) return kTRUE;
00158
00159 return kFALSE;
00160 }
00161
00162
00163
00164 void TMVA::MethodBoost::DeclareOptions()
00165 {
00166 DeclareOptionRef( fBoostNum = 1, "Boost_Num",
00167 "Number of times the classifier is boosted");
00168
00169 DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod",
00170 "Whether to write monitoring histogram for each boosted classifier");
00171
00172 DeclareOptionRef(fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers");
00173 AddPreDefVal(TString("AdaBoost"));
00174 AddPreDefVal(TString("Bagging"));
00175 AddPreDefVal(TString("HighEdgeGauss"));
00176 AddPreDefVal(TString("HighEdgeCoPara"));
00177
00178 DeclareOptionRef(fMethodWeightType = "ByError", "Boost_MethodWeightType",
00179 "How to set the final weight of the boosted classifiers");
00180 AddPreDefVal(TString("ByError"));
00181 AddPreDefVal(TString("Average"));
00182 AddPreDefVal(TString("ByROC"));
00183 AddPreDefVal(TString("ByOverlap"));
00184 AddPreDefVal(TString("LastMethod"));
00185
00186 DeclareOptionRef(fRecalculateMVACut = kTRUE, "Boost_RecalculateMVACut",
00187 "Whether to recalculate the classifier MVA Signallike cut at every boost iteration");
00188
00189 DeclareOptionRef(fADABoostBeta = 1.0, "Boost_AdaBoostBeta",
00190 "The ADA boost parameter that sets the effect of every boost step on the events' weights");
00191
00192 DeclareOptionRef(fTransformString = "step", "Boost_Transform",
00193 "Type of transform applied to every boosted method linear, log, step");
00194 AddPreDefVal(TString("step"));
00195 AddPreDefVal(TString("linear"));
00196 AddPreDefVal(TString("log"));
00197
00198 DeclareOptionRef(fRandomSeed = 0, "Boost_RandomSeed",
00199 "Seed for random number generator used for bagging");
00200
00201 TMVA::MethodCompositeBase::fMethods.reserve(fBoostNum);;
00202 }
00203
00204
00205 Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption )
00206 {
00207
00208 fBoostedMethodName = Types::Instance().GetMethodName( theMethod );
00209 fBoostedMethodTitle = methodTitle;
00210 fBoostedMethodOptions = theOption;
00211 return kTRUE;
00212 }
00213
00214
00215 void TMVA::MethodBoost::Init()
00216 {}
00217
00218
00219 void TMVA::MethodBoost::InitHistos()
00220 {
00221
00222 if(fMonitorHist) {
00223 for ( std::vector<TH1*>::iterator it = fMonitorHist->begin(); it != fMonitorHist->end(); ++it) delete *it;
00224 delete fMonitorHist;
00225 }
00226 fMonitorHist = new std::vector<TH1*>();
00227 fMonitorHist->push_back(new TH1F("MethodWeight","Normalized Classifier Weight",fBoostNum,0,fBoostNum));
00228 fMonitorHist->push_back(new TH1F("BoostWeight","Boost Weight",fBoostNum,0,fBoostNum));
00229 fMonitorHist->push_back(new TH1F("ErrFraction","Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum));
00230 fMonitorHist->push_back(new TH1F("OrigErrFraction","Error Fraction (by original event weights)",fBoostNum,0,fBoostNum));
00231 fMonitorHist->push_back(new TH1F("ROCIntegral_test","ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum));
00232 fMonitorHist->push_back(new TH1F("ROCIntegralBoosted_test","ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum));
00233 fMonitorHist->push_back(new TH1F("ROCIntegral_train","ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum));
00234 fMonitorHist->push_back(new TH1F("ROCIntegralBoosted_train","ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum));
00235 fMonitorHist->push_back(new TH1F("OverlapIntegal_train","Overlap integral (training sample)",fBoostNum,0,fBoostNum));
00236 for ( std::vector<TH1*>::iterator it = fMonitorHist->begin(); it != fMonitorHist->end(); ++it ) (*it)->SetDirectory(0);
00237 fDefaultHistNum = fMonitorHist->size();
00238 (*fMonitorHist)[0]->GetXaxis()->SetTitle("Index of boosted classifier");
00239 (*fMonitorHist)[0]->GetYaxis()->SetTitle("Classifier Weight");
00240 (*fMonitorHist)[1]->GetXaxis()->SetTitle("Index of boosted classifier");
00241 (*fMonitorHist)[1]->GetYaxis()->SetTitle("Boost Weight");
00242 (*fMonitorHist)[2]->GetXaxis()->SetTitle("Index of boosted classifier");
00243 (*fMonitorHist)[2]->GetYaxis()->SetTitle("Error Fraction");
00244 (*fMonitorHist)[3]->GetXaxis()->SetTitle("Index of boosted classifier");
00245 (*fMonitorHist)[3]->GetYaxis()->SetTitle("Error Fraction");
00246 (*fMonitorHist)[4]->GetXaxis()->SetTitle("Index of boosted classifier");
00247 (*fMonitorHist)[4]->GetYaxis()->SetTitle("ROC integral of single classifier");
00248 (*fMonitorHist)[5]->GetXaxis()->SetTitle("Number of boosts");
00249 (*fMonitorHist)[5]->GetYaxis()->SetTitle("ROC integral boosted");
00250 (*fMonitorHist)[6]->GetXaxis()->SetTitle("Index of boosted classifier");
00251 (*fMonitorHist)[6]->GetYaxis()->SetTitle("ROC integral of single classifier");
00252 (*fMonitorHist)[7]->GetXaxis()->SetTitle("Number of boosts");
00253 (*fMonitorHist)[7]->GetYaxis()->SetTitle("ROC integral boosted");
00254 (*fMonitorHist)[8]->GetXaxis()->SetTitle("Index of boosted classifier");
00255 (*fMonitorHist)[8]->GetYaxis()->SetTitle("Overlap integral");
00256
00257 fMonitorTree= new TTree("MonitorBoost","Boost variables");
00258 fMonitorTree->Branch("iMethod",&fMethodIndex,"iMethod/I");
00259 fMonitorTree->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
00260 fMonitorTree->Branch("errorFraction",&fMethodError,"errorFraction/D");
00261 fMonitorBoostedMethod = kTRUE;
00262 }
00263
00264
00265
00266 void TMVA::MethodBoost::CheckSetup()
00267 {
00268 Log() << kDEBUG << "CheckSetup: fBoostType="<<fBoostType<<" fMethodWeightType=" << fMethodWeightType << Endl;
00269 Log() << kDEBUG << "CheckSetup: fADABoostBeta="<<fADABoostBeta<<Endl;
00270 Log() << kDEBUG << "CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
00271 Log() << kDEBUG << "CheckSetup: fMethodError="<<fMethodError<<Endl;
00272 Log() << kDEBUG << "CheckSetup: fOrigMethodError="<<fOrigMethodError<<Endl;
00273 Log() << kDEBUG << "CheckSetup: fBoostNum="<<fBoostNum<< " fMonitorHist="<< fMonitorHist<< Endl;
00274 Log() << kDEBUG << "CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
00275 Log() << kDEBUG << "CheckSetup: fDefaultHistNum=" << fDefaultHistNum << " fRecalculateMVACut=" << (fRecalculateMVACut? "true" : "false") << Endl;
00276 Log() << kDEBUG << "CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
00277 Log() << kDEBUG << "CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
00278 Log() << kDEBUG << "CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod? "true" : "false") << Endl;
00279 Log() << kDEBUG << "CheckSetup: MName=" << fBoostedMethodName << " Title="<< fBoostedMethodTitle<< Endl;
00280 Log() << kDEBUG << "CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
00281 Log() << kDEBUG << "CheckSetup: fBoostStage=" << fBoostStage<<Endl;
00282 Log() << kDEBUG << "CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
00283 Log() << kDEBUG << "CheckSetup: fMethodIndex=" <<fMethodIndex << Endl;
00284 if (fMethods.size()>0) Log() << kDEBUG << "CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
00285 Log() << kDEBUG << "CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
00286 if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
00287 Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl;
00288
00289
00290 if (fMonitorHist == 0){
00291 InitHistos();
00292 CheckSetup();
00293 }
00294 }
00295
00296 void TMVA::MethodBoost::Train()
00297 {
00298 Double_t AllMethodsWeight=0;
00299 TDirectory* methodDir( 0 );
00300 TString dirName,dirTitle;
00301 Int_t StopCounter=0;
00302
00303 if (Data()->GetNTrainingEvents()==0) Log() << kFATAL << "<Train> Data() has zero events" << Endl;
00304 Data()->SetCurrentType(Types::kTraining);
00305
00306 if (fMethods.size() > 0) fMethods.clear();
00307 fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
00308
00309 Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " Classifiers ... patience please" << Endl;
00310 Timer timer( fBoostNum, GetName() );
00311
00312 ResetBoostWeights();
00313
00314
00315 CleanBoostOptions();
00316
00317
00318 for (fMethodIndex=0;fMethodIndex<fBoostNum;fMethodIndex++) {
00319
00320 if (fMethodIndex>0) TMVA::MsgLogger::InhibitOutput();
00321 IMethod* method = ClassifierFactory::Instance().Create(std::string(fBoostedMethodName),
00322 GetJobName(),
00323 Form("%s_B%04i", fBoostedMethodName.Data(),fMethodIndex),
00324 DataInfo(),
00325 fBoostedMethodOptions);
00326 TMVA::MsgLogger::EnableOutput();
00327
00328
00329 MethodBase *meth = (dynamic_cast<MethodBase*>(method));
00330
00331 if(meth==0) continue;
00332
00333
00334 if( meth->GetMethodType() == Types::kCategory ){
00335 MethodCategory *methCat = (dynamic_cast<MethodCategory*>(meth));
00336 if( !methCat )
00337 Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl;
00338 methCat->fDataSetManager = fDataSetManager;
00339 }
00340
00341
00342 meth->SetMsgType(kWARNING);
00343 meth->SetupMethod();
00344 meth->ParseOptions();
00345
00346 meth->SetAnalysisType( GetAnalysisType() );
00347 meth->ProcessSetup();
00348 meth->CheckSetup();
00349
00350
00351 if (fMonitorBoostedMethod)
00352 {
00353 methodDir=MethodBaseDir()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fMethodIndex));
00354 if (methodDir==0)
00355 methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fMethodIndex));
00356 MethodBase* m = dynamic_cast<MethodBase*>(method);
00357 if(m) {
00358 m->SetMethodDir(methodDir);
00359 m->BaseDir()->cd();
00360 }
00361 }
00362
00363
00364 TMVA::MethodCompositeBase::fMethods.push_back(method);
00365 timer.DrawProgressBar( fMethodIndex );
00366 if (fMethodIndex==0) method->MonitorBoost(SetStage(Types::kBoostProcBegin));
00367 method->MonitorBoost(SetStage(Types::kBeforeTraining));
00368 TMVA::MsgLogger::InhibitOutput();
00369 SingleTrain();
00370 TMVA::MsgLogger::EnableOutput();
00371 method->WriteMonitoringHistosToFile();
00372
00373
00374 CalcMVAValues();
00375
00376 if (fMethodIndex==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
00377
00378
00379
00380 fROC_training = GetBoostROCIntegral(kTRUE, Types::kTraining, kTRUE);
00381
00382
00383 CalcMethodWeight();
00384 AllMethodsWeight += fMethodWeight.back();
00385
00386 (*fMonitorHist)[4]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kTRUE, Types::kTesting));
00387 (*fMonitorHist)[5]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
00388 (*fMonitorHist)[6]->SetBinContent(fMethodIndex+1, fROC_training);
00389 (*fMonitorHist)[7]->SetBinContent(fMethodIndex+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
00390 (*fMonitorHist)[8]->SetBinContent(fMethodIndex+1, fOverlap_integral);
00391
00392
00393 method->MonitorBoost(SetStage(Types::kBeforeBoosting));
00394 SingleBoost();
00395 method->MonitorBoost(SetStage(Types::kAfterBoosting));
00396 (*fMonitorHist)[1]->SetBinContent(fMethodIndex+1,fBoostWeight);
00397 (*fMonitorHist)[2]->SetBinContent(fMethodIndex+1,fMethodError);
00398 (*fMonitorHist)[3]->SetBinContent(fMethodIndex+1,fOrigMethodError);
00399
00400 fMonitorTree->Fill();
00401
00402
00403
00404 Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl;
00405 if (fMethodError > 0.49999) StopCounter++;
00406 if (StopCounter > 0 && fBoostType != "Bagging")
00407 {
00408 timer.DrawProgressBar( fBoostNum );
00409 fBoostNum = fMethodIndex+1;
00410 Log() << kINFO << "Error rate has reached 0.5, boosting process stopped at #" << fBoostNum << " classifier" << Endl;
00411 if (fBoostNum < 5)
00412 Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fADABoostBeta << ", try reducing it." <<Endl;
00413 for (Int_t i=0;i<fDefaultHistNum;i++)
00414 (*fMonitorHist)[i]->SetBins(fBoostNum,0,fBoostNum);
00415 break;
00416 }
00417 }
00418 if (fMethodWeightType == "LastMethod") { fMethodWeight.back() = AllMethodsWeight = 1.0; }
00419
00420 ResetBoostWeights();
00421 Timer* timer1=new Timer();
00422
00423 for (fMethodIndex=0;fMethodIndex<fBoostNum;fMethodIndex++) {
00424
00425 if (fMethods[fMethodIndex]->MonitorBoost(SetStage(Types::kBoostValidation))) {
00426 if (fMethodIndex==0) timer1 = new Timer( fBoostNum, GetName() );
00427
00428 timer1->DrawProgressBar( fMethodIndex );
00429
00430 if (fMethodIndex==fBoostNum) {
00431 Log() << kINFO << "Elapsed time: " << timer1->GetElapsedTime()
00432 << " " << Endl;
00433 }
00434 }
00435
00436 if (AllMethodsWeight != 0.0)
00437 fMethodWeight[fMethodIndex] = fMethodWeight[fMethodIndex] / AllMethodsWeight;
00438 (*fMonitorHist)[0]->SetBinContent(fMethodIndex+1,fMethodWeight[fMethodIndex]);
00439 }
00440
00441
00442
00443
00444
00445
00446 if (fMethods.size()==1) fMethodWeight[0] = 1.0;
00447
00448 fMethods.back()->MonitorBoost(SetStage(Types::kBoostProcEnd));
00449
00450 delete timer1;
00451 }
00452
00453
00454 void TMVA::MethodBoost::CleanBoostOptions()
00455 {
00456 fBoostedMethodOptions=GetOptions();
00457 }
00458
00459
00460 void TMVA::MethodBoost::CreateMVAHistorgrams()
00461 {
00462 if (fBoostNum <=0) Log() << kFATAL << "CreateHistorgrams called before fBoostNum is initialized" << Endl;
00463
00464
00465 Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
00466 Int_t signalClass = 0;
00467 if (DataInfo().GetClassInfo("Signal") != 0) {
00468 signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
00469 }
00470 gTools().ComputeStat( Data()->GetEventCollection(), fMVAvalues,
00471 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
00472
00473 fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
00474 xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
00475 xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
00476
00477
00478 for (Int_t imtd=0; imtd<fBoostNum; imtd++) {
00479 fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) );
00480 fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i",imtd), "MVA_Train_B", fNbins, xmin, xmax ) );
00481 fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
00482 fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
00483 fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i",imtd), "MVA_Test_S", fNbins, xmin, xmax ) );
00484 fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i",imtd), "MVA_Test_B", fNbins, xmin, xmax ) );
00485 }
00486 }
00487
00488
00489 void TMVA::MethodBoost::ResetBoostWeights()
00490 {
00491
00492 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00493 Event *ev = Data()->GetEvent(ievt);
00494 ev->SetBoostWeight( 1.0 );
00495 }
00496 }
00497
00498
00499 void TMVA::MethodBoost::WriteMonitoringHistosToFile( void ) const
00500 {
00501 TDirectory* dir=0;
00502 if (fMonitorBoostedMethod) {
00503 for (Int_t imtd=0;imtd<fBoostNum;imtd++) {
00504
00505
00506 MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]);
00507 if(!m) continue;
00508 dir = m->BaseDir();
00509 dir->cd();
00510 fTrainSigMVAHist[imtd]->SetDirectory(dir);
00511 fTrainSigMVAHist[imtd]->Write();
00512 fTrainBgdMVAHist[imtd]->SetDirectory(dir);
00513 fTrainBgdMVAHist[imtd]->Write();
00514 fBTrainSigMVAHist[imtd]->SetDirectory(dir);
00515 fBTrainSigMVAHist[imtd]->Write();
00516 fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
00517 fBTrainBgdMVAHist[imtd]->Write();
00518 }
00519 }
00520
00521
00522 BaseDir()->cd();
00523 for (UInt_t i=0;i<fMonitorHist->size();i++) {
00524 ((*fMonitorHist)[i])->Write(Form("Booster_%s",((*fMonitorHist)[i])->GetName()));
00525 }
00526
00527 fMonitorTree->Write();
00528 }
00529
00530
00531 void TMVA::MethodBoost::TestClassification()
00532 {
00533 MethodBase::TestClassification();
00534 if (fMonitorBoostedMethod) {
00535 UInt_t nloop = fTestSigMVAHist.size();
00536 if (fMethods.size()<nloop) nloop = fMethods.size();
00537
00538 Data()->SetCurrentType(Types::kTesting);
00539 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00540 Event* ev = Data()->GetEvent(ievt);
00541 Float_t w = ev->GetWeight();
00542 if (DataInfo().IsSignal(ev)) {
00543 for (UInt_t imtd=0; imtd<nloop; imtd++) {
00544 fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
00545 }
00546 }
00547 else {
00548 for (UInt_t imtd=0; imtd<nloop; imtd++) {
00549 fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
00550 }
00551 }
00552 }
00553 Data()->SetCurrentType(Types::kTraining);
00554 }
00555 }
00556
00557
00558 void TMVA::MethodBoost::WriteEvaluationHistosToFile(Types::ETreeType treetype)
00559 {
00560 MethodBase::WriteEvaluationHistosToFile(treetype);
00561 if(treetype==Types::kTraining) return;
00562 UInt_t nloop = fTestSigMVAHist.size();
00563 if (fMethods.size()<nloop) nloop = fMethods.size();
00564 if (fMonitorBoostedMethod) {
00565 TDirectory* dir=0;
00566 for (UInt_t imtd=0;imtd<nloop;imtd++) {
00567
00568 MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]);
00569 if(!mva) continue;
00570 dir = mva->BaseDir();
00571 if(dir==0) continue;
00572 dir->cd();
00573 fTestSigMVAHist[imtd]->SetDirectory(dir);
00574 fTestSigMVAHist[imtd]->Write();
00575 fTestBgdMVAHist[imtd]->SetDirectory(dir);
00576 fTestBgdMVAHist[imtd]->Write();
00577 }
00578 }
00579 }
00580
00581
00582 void TMVA::MethodBoost::ProcessOptions()
00583 {
00584
00585 }
00586
00587
00588 void TMVA::MethodBoost::SingleTrain()
00589 {
00590
00591 Data()->SetCurrentType(Types::kTraining);
00592 MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod());
00593 if(meth)
00594 meth->TrainMethod();
00595 }
00596
00597
00598 void TMVA::MethodBoost::FindMVACut()
00599 {
00600
00601
00602
00603 MethodBase* lastMethod=dynamic_cast<MethodBase*>(fMethods.back());
00604 if (!lastMethod || lastMethod->GetMethodType() == Types::kDT ){ return;}
00605
00606 if (!fRecalculateMVACut && fMethodIndex>0) {
00607 MethodBase* m = dynamic_cast<MethodBase*>(fMethods[0]);
00608 if(m)
00609 lastMethod->SetSignalReferenceCut(m->GetSignalReferenceCut());
00610 } else {
00611
00612
00613 const Int_t nValBins=1000;
00614 Double_t* err=new Double_t[nValBins];
00615 const Double_t valmin=-1.5;
00616 const Double_t valmax=1.5;
00617 for (Int_t i=0;i<nValBins;i++) err[i]=0.;
00618 Double_t sum = 0.;
00619 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00620 Double_t weight = GetEvent(ievt)->GetWeight();
00621 sum +=weight;
00622 Double_t val=lastMethod->GetMvaValue();
00623 Int_t ibin = (Int_t) (((val-valmin)/(valmax-valmin))*nValBins);
00624
00625 if (ibin>=nValBins) ibin = nValBins-1;
00626 if (ibin<0) ibin = 0;
00627 if (DataInfo().IsSignal(Data()->GetEvent(ievt))){
00628 for (Int_t i=ibin;i<nValBins;i++) err[i]+=weight;
00629 }
00630 else {
00631 for (Int_t i=0;i<ibin;i++) err[i]+=weight;
00632 }
00633 }
00634 Double_t minerr=1.e6;
00635 Int_t minbin=-1;
00636 for (Int_t i=0;i<nValBins;i++){
00637 if (err[i]<=minerr){
00638 minerr=err[i];
00639 minbin=i;
00640 }
00641 }
00642 delete[] err;
00643
00644
00645 Double_t sigCutVal = valmin + ((valmax-valmin)*minbin)/Float_t(nValBins+1);
00646 lastMethod->SetSignalReferenceCut(sigCutVal);
00647
00648 Log() << kDEBUG << "(old step) Setting method cut to " <<lastMethod->GetSignalReferenceCut()<< Endl;
00649
00650 }
00651
00652 }
00653
00654
00655 void TMVA::MethodBoost::SingleBoost()
00656 {
00657 MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
00658 if(!method) return;
00659 Event * ev; Float_t w,v,wo; Bool_t sig=kTRUE;
00660 Double_t sumAll=0, sumWrong=0, sumAllOrig=0, sumWrongOrig=0, sumAll1=0;
00661 Bool_t* WrongDetection=new Bool_t[Data()->GetNEvents()];
00662 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
00663
00664
00665 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00666 ev = Data()->GetEvent(ievt);
00667 sig=DataInfo().IsSignal(ev);
00668 v = fMVAvalues->at(ievt);
00669 w = ev->GetWeight();
00670 wo = ev->GetOriginalWeight();
00671 if (sig && fMonitorBoostedMethod) {
00672 fBTrainSigMVAHist[fMethodIndex]->Fill(v,w);
00673 fTrainSigMVAHist[fMethodIndex]->Fill(v,ev->GetOriginalWeight());
00674 }
00675 else if (fMonitorBoostedMethod) {
00676 fBTrainBgdMVAHist[fMethodIndex]->Fill(v,w);
00677 fTrainBgdMVAHist[fMethodIndex]->Fill(v,ev->GetOriginalWeight());
00678 }
00679 sumAll += w;
00680 sumAllOrig += wo;
00681 if ( sig != (fMVAvalues->at(ievt) > method->GetSignalReferenceCut()) ) {
00682 WrongDetection[ievt]=kTRUE;
00683 sumWrong+=w;
00684 sumWrongOrig+=wo;
00685 }
00686 else WrongDetection[ievt]=kFALSE;
00687 }
00688 fMethodError=sumWrong/sumAll;
00689 fOrigMethodError = sumWrongOrig/sumAllOrig;
00690 Log() << kDEBUG << "AdaBoost err (MethodErr1)= " << fMethodError<<" = wrong/all: " << sumWrong << "/" << sumAll<< " cut="<<method->GetSignalReferenceCut()<< Endl;
00691
00692
00693
00694 if (fMethodError>0 && fADABoostBeta == 1.0) {
00695 fBoostWeight = (1.0-fMethodError)/fMethodError;
00696 }
00697 else if (fMethodError>0 && fADABoostBeta != 1.0) {
00698 fBoostWeight = TMath::Power((1.0 - fMethodError)/fMethodError, fADABoostBeta);
00699 }
00700 else fBoostWeight = 1000;
00701
00702 Double_t alphaWeight = ( fBoostWeight > 0.0 ? TMath::Log(fBoostWeight) : 0.0);
00703 if (alphaWeight>5.) alphaWeight = 5.;
00704 if (alphaWeight<0.){
00705
00706 alphaWeight = -alphaWeight;
00707 }
00708 if (fBoostType == "AdaBoost") {
00709
00710
00711
00712
00713 Double_t newSum=0., oldSum=0.;
00714 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00715 ev = Data()->GetEvent(ievt);
00716 oldSum += ev->GetWeight();
00717
00718
00719 if (WrongDetection[ievt]) ev->ScaleBoostWeight(fBoostWeight);
00720 newSum += ev->GetWeight();
00721 }
00722
00723 Double_t normWeight = oldSum/newSum;
00724
00725
00726 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00727 Data()->GetEvent(ievt)->ScaleBoostWeight(normWeight);
00728 }
00729
00730 }
00731 else if (fBoostType == "Bagging") {
00732
00733 TRandom3*trandom = new TRandom3(fRandomSeed+fMethods.size());
00734 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00735 ev = Data()->GetEvent(ievt);
00736 ev->SetBoostWeight(trandom->Rndm());
00737 sumAll1+=ev->GetWeight();
00738 }
00739
00740
00741 Double_t Factor=sumAll/sumAll1;
00742 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00743 ev = Data()->GetEvent(ievt);
00744 ev->ScaleBoostWeight(Factor);
00745 }
00746 }
00747 else if (fBoostType == "HighEdgeGauss" ||
00748 fBoostType == "HighEdgeCoPara") {
00749
00750
00751 Double_t MVACutValue = method->GetSignalReferenceCut();
00752 sumAll1 = 0;
00753 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00754 ev = Data()->GetEvent(ievt);
00755 if (fBoostType == "HighEdgeGauss")
00756 ev->SetBoostWeight( TMath::Exp( -std::pow(fMVAvalues->at(ievt)-MVACutValue,2)/(0.1*fADABoostBeta) ) );
00757 else if (fBoostType == "HighEdgeCoPara")
00758 ev->SetBoostWeight( DataInfo().IsSignal(ev) ? TMath::Power(1.0-fMVAvalues->at(ievt),fADABoostBeta) : TMath::Power(fMVAvalues->at(ievt),fADABoostBeta) );
00759 else
00760 Log() << kFATAL << "Unknown event weight type!" << Endl;
00761
00762 sumAll1 += ev->GetWeight();
00763 }
00764
00765
00766
00767 Double_t Factor=sumAll/sumAll1;
00768 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++)
00769 Data()->GetEvent(ievt)->ScaleBoostWeight(Factor);
00770 }
00771 delete[] WrongDetection;
00772 }
00773
00774
00775 void TMVA::MethodBoost::CalcMethodWeight()
00776 {
00777
00778
00779
00780 MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
00781 if (!method) {
00782 Log() << kFATAL << "Dynamic cast to MethodBase* failed" <<Endl;
00783 return;
00784 }
00785
00786 Event * ev; Float_t w;
00787 Double_t sumAll=0, sumWrong=0;
00788
00789
00790 FindMVACut();
00791
00792
00793 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00794 ev = Data()->GetEvent(ievt);
00795 w = ev->GetWeight();
00796 sumAll += w;
00797 if ( DataInfo().IsSignal(ev) !=
00798 (fMVAvalues->at(ievt) > method->GetSignalReferenceCut()) )
00799 sumWrong += w;
00800 }
00801 fMethodError=sumWrong/sumAll;
00802
00803
00804
00805
00806 if (fMethodError>0 && fADABoostBeta == 1.0) {
00807 fBoostWeight = (1.0-fMethodError)/fMethodError;
00808 }
00809 else if (fMethodError>0 && fADABoostBeta != 1.0) {
00810 fBoostWeight = TMath::Power((1.0 - fMethodError)/fMethodError, fADABoostBeta);
00811 }
00812 else fBoostWeight = 1000;
00813
00814
00815 if (fBoostWeight <= 0.0) fBoostWeight = 1.0;
00816
00817
00818 if (fMethodWeightType == "ByError") fMethodWeight.push_back(TMath::Log(fBoostWeight));
00819 else if (fMethodWeightType == "Average") fMethodWeight.push_back(1.0);
00820 else if (fMethodWeightType == "ByROC") fMethodWeight.push_back(fROC_training);
00821 else if (fMethodWeightType == "ByOverlap") fMethodWeight.push_back((fOverlap_integral > 0.0 ? 1.0/fOverlap_integral : 1000.0));
00822 else fMethodWeight.push_back(0);
00823 }
00824
00825
00826 void TMVA::MethodBoost::GetHelpMessage() const
00827 {
00828
00829
00830
00831
00832 Log() << Endl;
00833 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
00834 Log() << Endl;
00835 Log() << "This method combines several classifier of one species in a "<<Endl;
00836 Log() << "single multivariate quantity via the boost algorithm." << Endl;
00837 Log() << "the output is a weighted sum over all individual classifiers" <<Endl;
00838 Log() << "By default, the AdaBoost method is employed, which gives " << Endl;
00839 Log() << "events that were misclassified in the previous tree a larger " << Endl;
00840 Log() << "weight in the training of the following classifier."<<Endl;
00841 Log() << "Optionally, Bagged boosting can also be applied." << Endl;
00842 Log() << Endl;
00843 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
00844 Log() << Endl;
00845 Log() << "The most important parameter in the configuration is the "<<Endl;
00846 Log() << "number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
00847 Log() << "(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
00848 Log() << "AdaBoosting: The most important parameters in this configuration" <<Endl;
00849 Log() << "is the beta parameter (Boost_AdaBoostBeta) " << Endl;
00850 Log() << "When boosting a linear classifier, it is sometimes advantageous"<<Endl;
00851 Log() << "to transform the MVA output non-linearly. The following options" <<Endl;
00852 Log() << "are available: step, log, and minmax, the default is no transform."<<Endl;
00853 Log() <<Endl;
00854 Log() << "Some classifiers are hard to boost and do not improve much in"<<Endl;
00855 Log() << "their performance by boosting them, some even slightly deteriorate"<< Endl;
00856 Log() << "due to the boosting." <<Endl;
00857 Log() << "The booking of the boost method is special since it requires"<<Endl;
00858 Log() << "the booing of the method to be boosted and the boost itself."<<Endl;
00859 Log() << "This is solved by booking the method to be boosted and to add"<<Endl;
00860 Log() << "all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
00861 Log() << "options string. The factory separates the options and initiates"<<Endl;
00862 Log() << "the boost process. The TMVA macro directory contains the example"<<Endl;
00863 Log() << "macro \"Boost.C\"" <<Endl;
00864 }
00865
00866
00867 const TMVA::Ranking* TMVA::MethodBoost::CreateRanking()
00868 {
00869 return 0;
00870 }
00871
00872
00873 Double_t TMVA::MethodBoost::GetMvaValue( Double_t* err, Double_t* errUpper )
00874 {
00875
00876 Double_t mvaValue = 0;
00877 Double_t epsilon = TMath::Exp(-1.);
00878
00879 for (UInt_t i=0;i< fMethods.size(); i++){
00880 MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]);
00881 if(m==0) continue;
00882 Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
00883 Double_t sigcut = m->GetSignalReferenceCut();
00884
00885 if (fTransformString == "linear"){
00886
00887 }
00888 else if (fTransformString == "log"){
00889 if (val < sigcut) val = sigcut;
00890
00891 val = TMath::Log((val-sigcut)+epsilon);
00892 }
00893 else if (fTransformString == "step" ){
00894 if (val < sigcut) val = -1.;
00895 else val = 1.;
00896 }
00897 else {
00898 Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl;
00899 }
00900 mvaValue+=val*fMethodWeight[i];
00901 }
00902
00903 NoErrorCalc(err, errUpper);
00904
00905 return mvaValue;
00906 }
00907
00908
00909 Double_t TMVA::MethodBoost::GetBoostROCIntegral(Bool_t singleMethod, Types::ETreeType eTT, Bool_t CalcOverlapIntergral)
00910 {
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930 Data()->SetCurrentType(eTT);
00931
00932 MethodBase* method = singleMethod ? dynamic_cast<MethodBase*>(fMethods.back()) : 0;
00933
00934
00935
00936 if (singleMethod && !method) {
00937 Log() << kFATAL << " What do you do? Your method:"
00938 << fMethods.back()->GetName()
00939 << " seems not to be a propper TMVA method"
00940 << Endl;
00941 std::exit(1);
00942 }
00943 Double_t err = 0.0;
00944
00945
00946
00947
00948 std::vector<Double_t> OldMethodWeight(fMethodWeight);
00949 if (!singleMethod) {
00950
00951 Double_t AllMethodsWeight = 0;
00952 for (Int_t i=0; i<=fMethodIndex; i++)
00953 AllMethodsWeight += fMethodWeight.at(i);
00954
00955 if (fMethodWeightType == "LastMethod")
00956 fMethodWeight.back() = AllMethodsWeight = 1.0;
00957 if (AllMethodsWeight != 0.0) {
00958 for (Int_t i=0; i<=fMethodIndex; i++)
00959 fMethodWeight[i] /= AllMethodsWeight;
00960 }
00961 }
00962
00963
00964 Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
00965 std::vector <Float_t>* mvaRes;
00966 if (singleMethod && eTT==Types::kTraining)
00967 mvaRes = fMVAvalues;
00968 else {
00969 mvaRes = new std::vector <Float_t>(Data()->GetNEvents());
00970 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00971 Data()->GetEvent(ievt);
00972 (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
00973 }
00974 }
00975
00976
00977 if (!singleMethod)
00978 fMethodWeight = OldMethodWeight;
00979
00980
00981 Int_t signalClass = 0;
00982 if (DataInfo().GetClassInfo("Signal") != 0) {
00983 signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
00984 }
00985 gTools().ComputeStat( Data()->GetEventCollection(eTT), mvaRes,
00986 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
00987
00988 fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
00989 xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
00990 xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
00991
00992
00993 TH1* mva_s = new TH1F( "MVA_S", "MVA_S", fNbins, xmin, xmax );
00994 TH1* mva_b = new TH1F( "MVA_B", "MVA_B", fNbins, xmin, xmax );
00995 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
00996 if (CalcOverlapIntergral) {
00997 mva_s_overlap = new TH1F( "MVA_S_OVERLAP", "MVA_S_OVERLAP", fNbins, xmin, xmax );
00998 mva_b_overlap = new TH1F( "MVA_B_OVERLAP", "MVA_B_OVERLAP", fNbins, xmin, xmax );
00999 }
01000 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
01001 const Event* ev = GetEvent(ievt);
01002 Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
01003 if (DataInfo().IsSignal(ev)) mva_s->Fill( (*mvaRes)[ievt], w );
01004 else mva_b->Fill( (*mvaRes)[ievt], w );
01005
01006 if (CalcOverlapIntergral) {
01007 Float_t w_ov = ev->GetWeight();
01008 if (DataInfo().IsSignal(ev))
01009 mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
01010 else
01011 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
01012 }
01013 }
01014 gTools().NormHist( mva_s );
01015 gTools().NormHist( mva_b );
01016 PDF *fS = new PDF( "PDF Sig", mva_s, PDF::kSpline2 );
01017 PDF *fB = new PDF( "PDF Bkg", mva_b, PDF::kSpline2 );
01018
01019
01020 Double_t ROC = MethodBase::GetROCIntegral(fS, fB);
01021
01022
01023 if (CalcOverlapIntergral) {
01024 gTools().NormHist( mva_s_overlap );
01025 gTools().NormHist( mva_b_overlap );
01026
01027 fOverlap_integral = 0.0;
01028 for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
01029 Double_t bc_s = mva_s_overlap->GetBinContent(bin);
01030 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
01031 if (bc_s > 0.0 && bc_b > 0.0)
01032 fOverlap_integral += TMath::Min(bc_s, bc_b);
01033 }
01034
01035 delete mva_s_overlap;
01036 delete mva_b_overlap;
01037 }
01038
01039 delete mva_s;
01040 delete mva_b;
01041 delete fS;
01042 delete fB;
01043 if (!(singleMethod && eTT==Types::kTraining)) delete mvaRes;
01044
01045 Data()->SetCurrentType(Types::kTraining);
01046
01047 return ROC;
01048 }
01049
01050 void TMVA::MethodBoost::CalcMVAValues()
01051 {
01052
01053
01054
01055 Data()->SetCurrentType(Types::kTraining);
01056 MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
01057 if (!method) {
01058 Log() << kFATAL << "dynamic cast to MethodBase* failed" <<Endl;
01059 return;
01060 }
01061
01062 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
01063 Data()->GetEvent(ievt);
01064 fMVAvalues->at(ievt) = method->GetMvaValue();
01065 }
01066 }
01067