00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <iostream>
00013 #include <vector>
00014 #include <limits>
00015
00016 #include "TChain.h"
00017 #include "TCut.h"
00018 #include "TDirectory.h"
00019 #include "TH1F.h"
00020 #include "TH1.h"
00021 #include "TMath.h"
00022 #include "TFile.h"
00023 #include "TStopwatch.h"
00024 #include "TROOT.h"
00025
00026 #include "TMVA/GeneticAlgorithm.h"
00027 #include "TMVA/GeneticFitter.h"
00028 #include "TMVA/IFitterTarget.h"
00029 #include "TMVA/Factory.h"
00030 #include "TMVA/Reader.h"
00031
00032 using namespace std;
00033
00034 namespace TMVA {
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 void Training(){
00045 std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D:AnalysisType=Classification" );
00046 TString fname = "./tmva_example_multiple_background.root";
00047
00048 TFile *input(0);
00049 input = TFile::Open( fname );
00050
00051 TTree *signal = (TTree*)input->Get("TreeS");
00052 TTree *background0 = (TTree*)input->Get("TreeB0");
00053 TTree *background1 = (TTree*)input->Get("TreeB1");
00054 TTree *background2 = (TTree*)input->Get("TreeB2");
00055
00056
00057 Double_t signalWeight = 1.0;
00058 Double_t background0Weight = 1.0;
00059 Double_t background1Weight = 1.0;
00060 Double_t background2Weight = 1.0;
00061
00062
00063 TString outfileName( "TMVASignalBackground0.root" );
00064 TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
00065
00066
00067
00068
00069 TMVA::Factory *factory = new TMVA::Factory( "TMVAMultiBkg0", outputFile, factoryOptions );
00070 factory->AddVariable( "var1", "Variable 1", "", 'F' );
00071 factory->AddVariable( "var2", "Variable 2", "", 'F' );
00072 factory->AddVariable( "var3", "Variable 3", "units", 'F' );
00073 factory->AddVariable( "var4", "Variable 4", "units", 'F' );
00074
00075 factory->AddSignalTree ( signal, signalWeight );
00076 factory->AddBackgroundTree( background0, background0Weight );
00077
00078
00079 TCut mycuts = "";
00080 TCut mycutb = "";
00081
00082
00083 factory->PrepareTrainingAndTestTree( mycuts, mycutb,
00084 "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
00085
00086
00087 factory->BookMethod( TMVA::Types::kBDT, "BDTG",
00088 "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );
00089 factory->TrainAllMethods();
00090 factory->TestAllMethods();
00091 factory->EvaluateAllMethods();
00092
00093 outputFile->Close();
00094
00095 delete factory;
00096
00097
00098
00099
00100
00101 outfileName = "TMVASignalBackground1.root";
00102 outputFile = TFile::Open( outfileName, "RECREATE" );
00103
00104 factory = new TMVA::Factory( "TMVAMultiBkg1", outputFile, factoryOptions );
00105 factory->AddVariable( "var1", "Variable 1", "", 'F' );
00106 factory->AddVariable( "var2", "Variable 2", "", 'F' );
00107 factory->AddVariable( "var3", "Variable 3", "units", 'F' );
00108 factory->AddVariable( "var4", "Variable 4", "units", 'F' );
00109
00110 factory->AddSignalTree ( signal, signalWeight );
00111 factory->AddBackgroundTree( background1, background1Weight );
00112
00113
00114
00115
00116 factory->PrepareTrainingAndTestTree( mycuts, mycutb,
00117 "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
00118
00119
00120 factory->BookMethod( TMVA::Types::kBDT, "BDTG",
00121 "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );
00122 factory->TrainAllMethods();
00123 factory->TestAllMethods();
00124 factory->EvaluateAllMethods();
00125
00126 outputFile->Close();
00127
00128 delete factory;
00129
00130
00131
00132
00133
00134 outfileName = "TMVASignalBackground2.root";
00135 outputFile = TFile::Open( outfileName, "RECREATE" );
00136
00137 factory = new TMVA::Factory( "TMVAMultiBkg2", outputFile, factoryOptions );
00138 factory->AddVariable( "var1", "Variable 1", "", 'F' );
00139 factory->AddVariable( "var2", "Variable 2", "", 'F' );
00140 factory->AddVariable( "var3", "Variable 3", "units", 'F' );
00141 factory->AddVariable( "var4", "Variable 4", "units", 'F' );
00142
00143 factory->AddSignalTree ( signal, signalWeight );
00144 factory->AddBackgroundTree( background2, background2Weight );
00145
00146
00147
00148
00149 factory->PrepareTrainingAndTestTree( mycuts, mycutb,
00150 "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
00151
00152
00153 factory->BookMethod( TMVA::Types::kBDT, "BDTG",
00154 "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.5:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );
00155 factory->TrainAllMethods();
00156 factory->TestAllMethods();
00157 factory->EvaluateAllMethods();
00158
00159 outputFile->Close();
00160
00161 delete factory;
00162
00163 }
00164
00165
00166
00167
00168
00169
00170
00171 void ApplicationCreateCombinedTree(){
00172
00173
00174 TString outfileName( "tmva_example_multiple_backgrounds__applied.root" );
00175 TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
00176 TTree* outputTree = new TTree("multiBkg","multiple backgrounds tree");
00177
00178 Float_t var1, var2;
00179 Float_t var3, var4;
00180 Int_t classID = 0;
00181 Float_t weight = 1.f;
00182
00183 Float_t classifier0, classifier1, classifier2;
00184
00185 outputTree->Branch("classID", &classID, "classID/I");
00186 outputTree->Branch("var1", &var1, "var1/F");
00187 outputTree->Branch("var2", &var2, "var2/F");
00188 outputTree->Branch("var3", &var3, "var3/F");
00189 outputTree->Branch("var4", &var4, "var4/F");
00190 outputTree->Branch("weight", &weight, "weight/F");
00191 outputTree->Branch("cls0", &classifier0, "cls0/F");
00192 outputTree->Branch("cls1", &classifier1, "cls1/F");
00193 outputTree->Branch("cls2", &classifier2, "cls2/F");
00194
00195
00196
00197 TMVA::Reader *reader0 = new TMVA::Reader( "!Color:!Silent" );
00198 TMVA::Reader *reader1 = new TMVA::Reader( "!Color:!Silent" );
00199 TMVA::Reader *reader2 = new TMVA::Reader( "!Color:!Silent" );
00200
00201 reader0->AddVariable( "var1", &var1 );
00202 reader0->AddVariable( "var2", &var2 );
00203 reader0->AddVariable( "var3", &var3 );
00204 reader0->AddVariable( "var4", &var4 );
00205
00206 reader1->AddVariable( "var1", &var1 );
00207 reader1->AddVariable( "var2", &var2 );
00208 reader1->AddVariable( "var3", &var3 );
00209 reader1->AddVariable( "var4", &var4 );
00210
00211 reader2->AddVariable( "var1", &var1 );
00212 reader2->AddVariable( "var2", &var2 );
00213 reader2->AddVariable( "var3", &var3 );
00214 reader2->AddVariable( "var4", &var4 );
00215
00216
00217 TString method = "BDT method";
00218 reader0->BookMVA( "BDT method", "weights/TMVAMultiBkg0_BDTG.weights.xml" );
00219 reader1->BookMVA( "BDT method", "weights/TMVAMultiBkg1_BDTG.weights.xml" );
00220 reader2->BookMVA( "BDT method", "weights/TMVAMultiBkg2_BDTG.weights.xml" );
00221
00222
00223 TFile *input(0);
00224 TString fname = "./tmva_example_multiple_background.root";
00225 input = TFile::Open( fname );
00226
00227 TTree* theTree = NULL;
00228
00229
00230 for( int treeNumber = 0; treeNumber < 4; ++treeNumber ) {
00231 if( treeNumber == 0 ){
00232 theTree = (TTree*)input->Get("TreeS");
00233 std::cout << "--- Select signal sample" << std::endl;
00234
00235 weight = 1;
00236 classID = 0;
00237 }else if( treeNumber == 1 ){
00238 theTree = (TTree*)input->Get("TreeB0");
00239 std::cout << "--- Select background 0 sample" << std::endl;
00240
00241 weight = 1;
00242 classID = 1;
00243 }else if( treeNumber == 2 ){
00244 theTree = (TTree*)input->Get("TreeB1");
00245 std::cout << "--- Select background 1 sample" << std::endl;
00246
00247 weight = 1;
00248 classID = 2;
00249 }else if( treeNumber == 3 ){
00250 theTree = (TTree*)input->Get("TreeB2");
00251 std::cout << "--- Select background 2 sample" << std::endl;
00252
00253 weight = 1;
00254 classID = 3;
00255 }
00256
00257
00258 theTree->SetBranchAddress( "var1", &var1 );
00259 theTree->SetBranchAddress( "var2", &var2 );
00260 theTree->SetBranchAddress( "var3", &var3 );
00261 theTree->SetBranchAddress( "var4", &var4 );
00262
00263
00264 std::cout << "--- Processing: " << theTree->GetEntries() << " events" << std::endl;
00265 TStopwatch sw;
00266 sw.Start();
00267 Int_t nEvent = theTree->GetEntries();
00268
00269 for (Long64_t ievt=0; ievt<nEvent; ievt++) {
00270
00271 if (ievt%1000 == 0){
00272 std::cout << "--- ... Processing event: " << ievt << std::endl;
00273 }
00274
00275 theTree->GetEntry(ievt);
00276
00277
00278 classifier0 = reader0->EvaluateMVA( method );
00279 classifier1 = reader1->EvaluateMVA( method );
00280 classifier2 = reader2->EvaluateMVA( method );
00281
00282 outputTree->Fill();
00283 }
00284
00285
00286
00287 sw.Stop();
00288 std::cout << "--- End of event loop: "; sw.Print();
00289 }
00290 input->Close();
00291
00292
00293
00294
00295
00296 outputFile->Write();
00297
00298 outputFile->Close();
00299
00300 std::cout << "--- Created root file: \"" << outfileName.Data() << "\" containing the MVA output histograms" << std::endl;
00301
00302 delete reader0;
00303 delete reader1;
00304 delete reader2;
00305
00306 std::cout << "==> Application of readers is done! combined tree created" << std::endl << std::endl;
00307
00308 }
00309
00310
00311
00312
00313
00314 class MyFitness : public IFitterTarget {
00315 public:
00316
00317 MyFitness( TChain* _chain ) : IFitterTarget() {
00318 chain = _chain;
00319
00320 hSignal = new TH1F("hsignal","hsignal",100,-1,1);
00321 hFP = new TH1F("hfp","hfp",100,-1,1);
00322 hTP = new TH1F("htp","htp",100,-1,1);
00323
00324 TString cutsAndWeightSignal = "weight*(classID==0)";
00325 nSignal = chain->Draw("Entry$/Entries$>>hsignal",cutsAndWeightSignal,"goff");
00326 weightsSignal = hSignal->Integral();
00327
00328 }
00329
00330
00331 Double_t EstimatorFunction( std::vector<Double_t> & factors ){
00332
00333 TString cutsAndWeightTruePositive = Form("weight*((classID==0) && cls0>%f && cls1>%f && cls2>%f )",factors.at(0), factors.at(1), factors.at(2));
00334 TString cutsAndWeightFalsePositive = Form("weight*((classID >0) && cls0>%f && cls1>%f && cls2>%f )",factors.at(0), factors.at(1), factors.at(2));
00335
00336
00337 Float_t nTP = chain->Draw("Entry$/Entries$>>htp",cutsAndWeightTruePositive,"goff");
00338 Float_t nFP = chain->Draw("Entry$/Entries$>>hfp",cutsAndWeightFalsePositive,"goff");
00339
00340 weightsTruePositive = hTP->Integral();
00341 weightsFalsePositive = hFP->Integral();
00342
00343 efficiency = 0;
00344 if( weightsSignal > 0 )
00345 efficiency = weightsTruePositive/weightsSignal;
00346
00347 purity = 0;
00348 if( weightsTruePositive+weightsFalsePositive > 0 )
00349 purity = weightsTruePositive/(weightsTruePositive+weightsFalsePositive);
00350
00351 Float_t effTimesPur = efficiency*purity;
00352
00353 Float_t toMinimize = std::numeric_limits<float>::max();
00354 if( effTimesPur > 0 )
00355 toMinimize = 1./(effTimesPur);
00356
00357
00358
00359 return toMinimize;
00360 }
00361
00362
00363 void Print(){
00364 std::cout << std::endl;
00365 std::cout << "======================" << std::endl
00366 << "Efficiency : " << efficiency << std::endl
00367 << "Purity : " << purity << std::endl << std::endl
00368 << "True positive weights : " << weightsTruePositive << std::endl
00369 << "False positive weights: " << weightsFalsePositive << std::endl
00370 << "Signal weights : " << weightsSignal << std::endl;
00371 }
00372
00373 Float_t nSignal;
00374
00375 Float_t efficiency;
00376 Float_t purity;
00377 Float_t weightsTruePositive;
00378 Float_t weightsFalsePositive;
00379 Float_t weightsSignal;
00380
00381
00382 private:
00383 TChain* chain;
00384 TH1F* hSignal;
00385 TH1F* hFP;
00386 TH1F* hTP;
00387
00388 };
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398 void MaximizeSignificance(){
00399
00400
00401
00402 vector<Interval*> ranges;
00403 ranges.push_back( new Interval(-1,1) );
00404 ranges.push_back( new Interval(-1,1) );
00405 ranges.push_back( new Interval(-1,1) );
00406
00407 std::cout << "Classifier ranges (defined by the user)" << std::endl;
00408 for( std::vector<Interval*>::iterator it = ranges.begin(); it != ranges.end(); it++ ){
00409 std::cout << " range: " << (*it)->GetMin() << " " << (*it)->GetMax() << std::endl;
00410 }
00411
00412 TChain* chain = new TChain("multiBkg");
00413 chain->Add("tmva_example_multiple_backgrounds__applied.root");
00414
00415 IFitterTarget* myFitness = new MyFitness( chain );
00416
00417
00418
00419
00420
00421
00422
00423 const TString name( "multipleBackgroundGA" );
00424 const TString opts( "PopSize=100:Steps=30" );
00425
00426 GeneticFitter mg( *myFitness, name, ranges, opts);
00427
00428
00429 std::vector<Double_t> result;
00430 Double_t estimator = mg.Run(result);
00431
00432 dynamic_cast<MyFitness*>(myFitness)->Print();
00433 std::cout << std::endl;
00434
00435 int n = 0;
00436 for( std::vector<Double_t>::iterator it = result.begin(); it<result.end(); it++ ){
00437 std::cout << " cutValue[" << n << "] = " << (*it) << ";"<< std::endl;
00438 n++;
00439 }
00440
00441
00442 }
00443
00444
00445 }
00446
00447
00448
00449 int main( int argc, char** argv )
00450 {
00451 cout << "Start Test TMVAGAexample" << endl
00452 << "========================" << endl
00453 << endl;
00454
00455 gROOT->ProcessLine(".L createData.C+");
00456 gROOT->ProcessLine("create_MultipleBackground(2000)");
00457
00458
00459 cout << endl;
00460 cout << "========================" << endl;
00461 cout << "--- Training" << endl;
00462 TMVA::Training();
00463
00464 cout << endl;
00465 cout << "========================" << endl;
00466 cout << "--- Application & create combined tree" << endl;
00467 TMVA::ApplicationCreateCombinedTree();
00468
00469 cout << endl;
00470 cout << "========================" << endl;
00471 cout << "--- maximize significance" << endl;
00472 TMVA::MaximizeSignificance();
00473 }