00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090 #include <iostream>
00091 #include <cstdlib>
00092
00093 #include "Riostream.h"
00094 #include "TH1F.h"
00095 #include "TObjString.h"
00096 #include "TDirectory.h"
00097 #include "TMath.h"
00098 #include "TGraph.h"
00099 #include "TSpline.h"
00100 #include "TRandom3.h"
00101
00102 #include "TMVA/ClassifierFactory.h"
00103 #include "TMVA/MethodCuts.h"
00104 #include "TMVA/GeneticFitter.h"
00105 #include "TMVA/MinuitFitter.h"
00106 #include "TMVA/MCFitter.h"
00107 #include "TMVA/SimulatedAnnealingFitter.h"
00108 #include "TMVA/PDF.h"
00109 #include "TMVA/Tools.h"
00110 #include "TMVA/Timer.h"
00111 #include "TMVA/Interval.h"
00112 #include "TMVA/TSpline1.h"
00113 #include "TMVA/Config.h"
00114 #include "TMVA/VariableTransformBase.h"
00115 #include "TMVA/Results.h"
00116
00117 REGISTER_METHOD(Cuts)
00118
00119 ClassImp(TMVA::MethodCuts)
00120
00121 const Double_t TMVA::MethodCuts::fgMaxAbsCutVal = 1.0e30;
00122
00123
00124 TMVA::MethodCuts::MethodCuts( const TString& jobName,
00125 const TString& methodTitle,
00126 DataSetInfo& theData,
00127 const TString& theOption,
00128 TDirectory* theTargetDir ) :
00129 MethodBase( jobName, Types::kCuts, methodTitle, theData, theOption, theTargetDir ),
00130 fFitMethod ( kUseGeneticAlgorithm ),
00131 fEffMethod ( kUseEventSelection ),
00132 fTestSignalEff(0.7),
00133 fEffSMin ( 0 ),
00134 fEffSMax ( 0 ),
00135 fCutRangeMin( 0 ),
00136 fCutRangeMax( 0 ),
00137 fBinaryTreeS( 0 ),
00138 fBinaryTreeB( 0 ),
00139 fCutMin ( 0 ),
00140 fCutMax ( 0 ),
00141 fTmpCutMin ( 0 ),
00142 fTmpCutMax ( 0 ),
00143 fAllVarsI ( 0 ),
00144 fNpar ( 0 ),
00145 fEffRef ( 0 ),
00146 fRangeSign ( 0 ),
00147 fRandom ( 0 ),
00148 fMeanS ( 0 ),
00149 fMeanB ( 0 ),
00150 fRmsS ( 0 ),
00151 fRmsB ( 0 ),
00152 fEffBvsSLocal( 0 ),
00153 fVarHistS ( 0 ),
00154 fVarHistB ( 0 ),
00155 fVarHistS_smooth( 0 ),
00156 fVarHistB_smooth( 0 ),
00157 fVarPdfS ( 0 ),
00158 fVarPdfB ( 0 ),
00159 fNegEffWarning( kFALSE )
00160 {
00161
00162 }
00163
00164
00165 TMVA::MethodCuts::MethodCuts( DataSetInfo& theData,
00166 const TString& theWeightFile,
00167 TDirectory* theTargetDir ) :
00168 MethodBase( Types::kCuts, theData, theWeightFile, theTargetDir ),
00169 fFitMethod ( kUseGeneticAlgorithm ),
00170 fEffMethod ( kUseEventSelection ),
00171 fTestSignalEff(0.7),
00172 fEffSMin ( 0 ),
00173 fEffSMax ( 0 ),
00174 fCutRangeMin( 0 ),
00175 fCutRangeMax( 0 ),
00176 fBinaryTreeS( 0 ),
00177 fBinaryTreeB( 0 ),
00178 fCutMin ( 0 ),
00179 fCutMax ( 0 ),
00180 fTmpCutMin ( 0 ),
00181 fTmpCutMax ( 0 ),
00182 fAllVarsI ( 0 ),
00183 fNpar ( 0 ),
00184 fEffRef ( 0 ),
00185 fRangeSign ( 0 ),
00186 fRandom ( 0 ),
00187 fMeanS ( 0 ),
00188 fMeanB ( 0 ),
00189 fRmsS ( 0 ),
00190 fRmsB ( 0 ),
00191 fEffBvsSLocal( 0 ),
00192 fVarHistS ( 0 ),
00193 fVarHistB ( 0 ),
00194 fVarHistS_smooth( 0 ),
00195 fVarHistB_smooth( 0 ),
00196 fVarPdfS ( 0 ),
00197 fVarPdfB ( 0 ),
00198 fNegEffWarning( kFALSE )
00199 {
00200
00201 }
00202
00203
00204 Bool_t TMVA::MethodCuts::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses,
00205 UInt_t )
00206 {
00207
00208 return (type == Types::kClassification && numberClasses == 2);
00209 }
00210
00211
00212 void TMVA::MethodCuts::Init( void )
00213 {
00214
00215 fVarHistS = fVarHistB = 0;
00216 fVarHistS_smooth = fVarHistB_smooth = 0;
00217 fVarPdfS = fVarPdfB = 0;
00218 fFitParams = 0;
00219 fBinaryTreeS = fBinaryTreeB = 0;
00220 fEffSMin = 0;
00221 fEffSMax = 0;
00222
00223
00224 fNpar = 2*GetNvar();
00225 fRangeSign = new vector<Int_t> ( GetNvar() );
00226 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*fRangeSign)[ivar] = +1;
00227
00228 fMeanS = new vector<Double_t>( GetNvar() );
00229 fMeanB = new vector<Double_t>( GetNvar() );
00230 fRmsS = new vector<Double_t>( GetNvar() );
00231 fRmsB = new vector<Double_t>( GetNvar() );
00232
00233
00234 fFitParams = new vector<EFitParameters>( GetNvar() );
00235 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*fFitParams)[ivar] = kNotEnforced;
00236
00237 fFitMethod = kUseMonteCarlo;
00238 fTestSignalEff = -1;
00239
00240
00241 fCutMin = new Double_t*[GetNvar()];
00242 fCutMax = new Double_t*[GetNvar()];
00243 for (UInt_t i=0; i<GetNvar(); i++) {
00244 fCutMin[i] = new Double_t[fNbins];
00245 fCutMax[i] = new Double_t[fNbins];
00246 }
00247
00248
00249 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00250 for (Int_t ibin=0; ibin<fNbins; ibin++) {
00251 fCutMin[ivar][ibin] = 0;
00252 fCutMax[ivar][ibin] = 0;
00253 }
00254 }
00255
00256 fTmpCutMin = new Double_t[GetNvar()];
00257 fTmpCutMax = new Double_t[GetNvar()];
00258 }
00259
00260
00261 TMVA::MethodCuts::~MethodCuts( void )
00262 {
00263
00264 delete fRangeSign;
00265 delete fMeanS;
00266 delete fMeanB;
00267 delete fRmsS;
00268 delete fRmsB;
00269 delete fFitParams;
00270 delete fEffBvsSLocal;
00271
00272 if (NULL != fCutRangeMin) delete [] fCutRangeMin;
00273 if (NULL != fCutRangeMax) delete [] fCutRangeMax;
00274 if (NULL != fAllVarsI) delete [] fAllVarsI;
00275
00276 for (UInt_t i=0;i<GetNvar();i++) {
00277 if (NULL != fCutMin[i] ) delete [] fCutMin[i];
00278 if (NULL != fCutMax[i] ) delete [] fCutMax[i];
00279 if (NULL != fCutRange[i]) delete fCutRange[i];
00280 }
00281
00282 if (NULL != fCutMin) delete [] fCutMin;
00283 if (NULL != fCutMax) delete [] fCutMax;
00284
00285 if (NULL != fTmpCutMin) delete [] fTmpCutMin;
00286 if (NULL != fTmpCutMax) delete [] fTmpCutMax;
00287
00288 if (NULL != fBinaryTreeS) delete fBinaryTreeS;
00289 if (NULL != fBinaryTreeB) delete fBinaryTreeB;
00290 }
00291
00292
00293 void TMVA::MethodCuts::DeclareOptions()
00294 {
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312 DeclareOptionRef(fFitMethodS = "GA", "FitMethod", "Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
00313 AddPreDefVal(TString("GA"));
00314 AddPreDefVal(TString("SA"));
00315 AddPreDefVal(TString("MC"));
00316 AddPreDefVal(TString("MCEvents"));
00317 AddPreDefVal(TString("MINUIT"));
00318 AddPreDefVal(TString("EventScan"));
00319
00320
00321 DeclareOptionRef(fEffMethodS = "EffSel", "EffMethod", "Selection Method");
00322 AddPreDefVal(TString("EffSel"));
00323 AddPreDefVal(TString("EffPDF"));
00324
00325
00326 fCutRange.resize(GetNvar());
00327 fCutRangeMin = new Double_t[GetNvar()];
00328 fCutRangeMax = new Double_t[GetNvar()];
00329 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00330 fCutRange[ivar] = 0;
00331 fCutRangeMin[ivar] = fCutRangeMax[ivar] = -1;
00332 }
00333
00334 DeclareOptionRef( fCutRangeMin, GetNvar(), "CutRangeMin", "Minimum of allowed cut range (set per variable)" );
00335 DeclareOptionRef( fCutRangeMax, GetNvar(), "CutRangeMax", "Maximum of allowed cut range (set per variable)" );
00336
00337 fAllVarsI = new TString[GetNvar()];
00338
00339 for (UInt_t i=0; i<GetNvar(); i++) fAllVarsI[i] = "NotEnforced";
00340
00341 DeclareOptionRef(fAllVarsI, GetNvar(), "VarProp", "Categorisation of cuts");
00342 AddPreDefVal(TString("NotEnforced"));
00343 AddPreDefVal(TString("FMax"));
00344 AddPreDefVal(TString("FMin"));
00345 AddPreDefVal(TString("FSmart"));
00346 }
00347
00348
00349 void TMVA::MethodCuts::ProcessOptions()
00350 {
00351
00352
00353
00354 if (IsNormalised()) {
00355 Log() << kWARNING << "Normalisation of the input variables for cut optimisation is not" << Endl;
00356 Log() << kWARNING << "supported because this provides intransparent cut values, and no" << Endl;
00357 Log() << kWARNING << "improvement in the performance of the algorithm." << Endl;
00358 Log() << kWARNING << "Please remove \"Normalise\" option from booking option string" << Endl;
00359 Log() << kWARNING << "==> Will reset normalisation flag to \"False\"" << Endl;
00360 SetNormalised( kFALSE );
00361 }
00362
00363 if (IgnoreEventsWithNegWeightsInTraining()) {
00364 Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
00365 << GetMethodTypeName()
00366 << " --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string."
00367 << Endl;
00368 }
00369
00370 if (fFitMethodS == "MC" ) fFitMethod = kUseMonteCarlo;
00371 else if (fFitMethodS == "MCEvents") fFitMethod = kUseMonteCarloEvents;
00372 else if (fFitMethodS == "GA" ) fFitMethod = kUseGeneticAlgorithm;
00373 else if (fFitMethodS == "SA" ) fFitMethod = kUseSimulatedAnnealing;
00374 else if (fFitMethodS == "MINUIT" ) {
00375 fFitMethod = kUseMinuit;
00376 Log() << kWARNING << "poor performance of MINUIT in MethodCuts; preferred fit method: GA" << Endl;
00377 }
00378 else if (fFitMethodS == "EventScan" ) fFitMethod = kUseEventScan;
00379 else Log() << kFATAL << "unknown minimisation method: " << fFitMethodS << Endl;
00380
00381 if (fEffMethodS == "EFFSEL" ) fEffMethod = kUseEventSelection;
00382 else if (fEffMethodS == "EFFPDF" ) fEffMethod = kUsePDFs;
00383 else fEffMethod = kUseEventSelection;
00384
00385
00386 Log() << kINFO << Form("Use optimization method: \"%s\"",
00387 (fFitMethod == kUseMonteCarlo) ? "Monte Carlo" :
00388 (fFitMethod == kUseMonteCarlo) ? "Monte-Carlo-Event sampling" :
00389 (fFitMethod == kUseEventScan) ? "Full Event Scan (slow)" :
00390 (fFitMethod == kUseMinuit) ? "MINUIT" : "Genetic Algorithm" ) << Endl;
00391 Log() << kINFO << Form("Use efficiency computation method: \"%s\"",
00392 (fEffMethod == kUseEventSelection) ? "Event Selection" : "PDF" ) << Endl;
00393
00394
00395 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00396 fCutRange[ivar] = new Interval( fCutRangeMin[ivar], fCutRangeMax[ivar] );
00397 }
00398
00399
00400 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00401 EFitParameters theFitP = kNotEnforced;
00402 if (fAllVarsI[ivar] == "" || fAllVarsI[ivar] == "NotEnforced") theFitP = kNotEnforced;
00403 else if (fAllVarsI[ivar] == "FMax" ) theFitP = kForceMax;
00404 else if (fAllVarsI[ivar] == "FMin" ) theFitP = kForceMin;
00405 else if (fAllVarsI[ivar] == "FSmart" ) theFitP = kForceSmart;
00406 else {
00407 Log() << kFATAL << "unknown value \'" << fAllVarsI[ivar]
00408 << "\' for fit parameter option " << Form("VarProp[%i]",ivar) << Endl;
00409 }
00410 (*fFitParams)[ivar] = theFitP;
00411
00412 if (theFitP != kNotEnforced)
00413 Log() << kINFO << "Use \"" << fAllVarsI[ivar]
00414 << "\" cuts for variable: " << "'" << (*fInputVars)[ivar] << "'" << Endl;
00415 }
00416 }
00417
00418
00419 Double_t TMVA::MethodCuts::GetMvaValue( Double_t* err, Double_t* errUpper )
00420 {
00421
00422
00423
00424 NoErrorCalc(err, errUpper);
00425
00426
00427 if (fCutMin == NULL || fCutMax == NULL || fNbins == 0) {
00428 Log() << kFATAL << "<Eval_Cuts> fCutMin/Max have zero pointer. "
00429 << "Did you book Cuts ?" << Endl;
00430 }
00431
00432 const Event* ev = GetEvent();
00433
00434
00435 if (fTestSignalEff > 0) {
00436
00437 Int_t ibin = fEffBvsSLocal->FindBin( fTestSignalEff );
00438 if (ibin < 0 ) ibin = 0;
00439 else if (ibin >= fNbins) ibin = fNbins - 1;
00440
00441 Bool_t passed = kTRUE;
00442 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
00443 passed &= ( (ev->GetValue(ivar) > fCutMin[ivar][ibin]) &&
00444 (ev->GetValue(ivar) <= fCutMax[ivar][ibin]) );
00445
00446 return passed ? 1. : 0. ;
00447 }
00448 else return 0;
00449 }
00450
00451
00452 void TMVA::MethodCuts::PrintCuts( Double_t effS ) const
00453 {
00454
00455
00456 std::vector<Double_t> cutsMin;
00457 std::vector<Double_t> cutsMax;
00458 Int_t ibin = fEffBvsSLocal->FindBin( effS );
00459
00460 Double_t trueEffS = GetCuts( effS, cutsMin, cutsMax );
00461
00462
00463 std::vector<TString>* varVec = 0;
00464 if (GetTransformationHandler().GetNumOfTransformations() == 0) {
00465
00466 varVec = new std::vector<TString>;
00467 for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
00468 varVec->push_back( DataInfo().GetVariableInfo(ivar).GetLabel() );
00469 }
00470 }
00471 else if (GetTransformationHandler().GetNumOfTransformations() == 1) {
00472
00473 varVec = GetTransformationHandler().GetTransformationStringsOfLastTransform();
00474 }
00475 else {
00476
00477 varVec = new std::vector<TString>;
00478 for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
00479 varVec->push_back( DataInfo().GetVariableInfo(ivar).GetLabel() + " [transformed]" );
00480 }
00481 }
00482
00483 UInt_t maxL = 0;
00484 for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
00485 if ((UInt_t)(*varVec)[ivar].Length() > maxL) maxL = (*varVec)[ivar].Length();
00486 }
00487 UInt_t maxLine = 20+maxL+16;
00488
00489 for (UInt_t i=0; i<maxLine; i++) Log() << "-";
00490 Log() << Endl;
00491 Log() << kINFO << "Cut values for requested signal efficiency: " << trueEffS << Endl;
00492 Log() << kINFO << "Corresponding background efficiency : " << fEffBvsSLocal->GetBinContent( ibin ) << Endl;
00493 if (GetTransformationHandler().GetNumOfTransformations() == 1) {
00494 Log() << kINFO << "Transformation applied to input variables : \""
00495 << GetTransformationHandler().GetNameOfLastTransform() << "\"" << Endl;
00496 }
00497 else if (GetTransformationHandler().GetNumOfTransformations() > 1) {
00498 Log() << kINFO << "[ More than one (=" << GetTransformationHandler().GetNumOfTransformations() << ") "
00499 << " transformations applied in transformation chain; cuts applied on transformed quantities ] " << Endl;
00500 }
00501 else {
00502 Log() << kINFO << "Transformation applied to input variables : None" << Endl;
00503 }
00504 for (UInt_t i=0; i<maxLine; i++) Log() << "-";
00505 Log() << Endl;
00506 for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
00507 Log() << kINFO
00508 << "Cut[" << setw(2) << ivar << "]: "
00509 << setw(10) << cutsMin[ivar]
00510 << " < "
00511 << setw(maxL) << (*varVec)[ivar]
00512 << " <= "
00513 << setw(10) << cutsMax[ivar] << Endl;
00514 }
00515 for (UInt_t i=0; i<maxLine; i++) Log() << "-";
00516 Log() << Endl;
00517
00518 delete varVec;
00519 }
00520
00521
00522 Double_t TMVA::MethodCuts::GetCuts( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const
00523 {
00524
00525
00526
00527 std::vector<Double_t> cMin( GetNvar() );
00528 std::vector<Double_t> cMax( GetNvar() );
00529 Double_t trueEffS = GetCuts( effS, cMin, cMax );
00530 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00531 cutMin[ivar] = cMin[ivar];
00532 cutMax[ivar] = cMax[ivar];
00533 }
00534 return trueEffS;
00535 }
00536
00537
00538 Double_t TMVA::MethodCuts::GetCuts( Double_t effS,
00539 std::vector<Double_t>& cutMin,
00540 std::vector<Double_t>& cutMax ) const
00541 {
00542
00543
00544
00545 Int_t ibin = fEffBvsSLocal->FindBin( effS );
00546
00547
00548 Double_t trueEffS = fEffBvsSLocal->GetBinLowEdge( ibin );
00549
00550 ibin--;
00551 if (ibin < 0 ) ibin = 0;
00552 else if (ibin >= fNbins) ibin = fNbins - 1;
00553
00554 cutMin.clear();
00555 cutMax.clear();
00556 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00557 cutMin.push_back( fCutMin[ivar][ibin] );
00558 cutMax.push_back( fCutMax[ivar][ibin] );
00559 }
00560
00561 return trueEffS;
00562 }
00563
00564
00565 void TMVA::MethodCuts::Train( void )
00566 {
00567
00568
00569 if (fEffMethod == kUsePDFs) CreateVariablePDFs();
00570
00571
00572 if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
00573 if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
00574
00575
00576
00577
00578
00579 fBinaryTreeS = new BinarySearchTree();
00580 fBinaryTreeS->Fill( GetEventCollection(Types::kTraining), fSignalClass );
00581 fBinaryTreeB = new BinarySearchTree();
00582 fBinaryTreeB->Fill( GetEventCollection(Types::kTraining), fBackgroundClass );
00583
00584 for (UInt_t ivar =0; ivar < Data()->GetNVariables(); ivar++) {
00585 (*fMeanS)[ivar] = fBinaryTreeS->Mean(Types::kSignal, ivar);
00586 (*fRmsS)[ivar] = fBinaryTreeS->RMS (Types::kSignal, ivar);
00587 (*fMeanB)[ivar] = fBinaryTreeB->Mean(Types::kBackground, ivar);
00588 (*fRmsB)[ivar] = fBinaryTreeB->RMS (Types::kBackground, ivar);
00589
00590
00591 Double_t xmin = TMath::Min(fBinaryTreeS->Min(Types::kSignal, ivar),
00592 fBinaryTreeB->Min(Types::kBackground, ivar));
00593 Double_t xmax = TMath::Max(fBinaryTreeS->Max(Types::kSignal, ivar),
00594 fBinaryTreeB->Max(Types::kBackground, ivar));
00595
00596
00597 Double_t eps = 0.01*(xmax - xmin);
00598 xmin -= eps;
00599 xmax += eps;
00600
00601 if (TMath::Abs(fCutRange[ivar]->GetMin() - fCutRange[ivar]->GetMax()) < 1.0e-300 ) {
00602 fCutRange[ivar]->SetMin( xmin );
00603 fCutRange[ivar]->SetMax( xmax );
00604 }
00605 else if (xmin > fCutRange[ivar]->GetMin()) fCutRange[ivar]->SetMin( xmin );
00606 else if (xmax < fCutRange[ivar]->GetMax()) fCutRange[ivar]->SetMax( xmax );
00607 }
00608
00609 vector<TH1F*> signalDist, bkgDist;
00610
00611
00612 delete fEffBvsSLocal;
00613 fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal",
00614 TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
00615 fEffBvsSLocal->SetDirectory(0);
00616
00617
00618 for (Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
00619
00620
00621 if (fFitMethod == kUseGeneticAlgorithm ||
00622 fFitMethod == kUseMonteCarlo ||
00623 fFitMethod == kUseMinuit ||
00624 fFitMethod == kUseSimulatedAnnealing) {
00625
00626
00627 vector<Interval*> ranges;
00628
00629 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00630
00631 Int_t nbins = 0;
00632 if (DataInfo().GetVariableInfo(ivar).GetVarType() == 'I') {
00633 nbins = Int_t(fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin()) + 1;
00634 }
00635
00636 if ((*fFitParams)[ivar] == kForceSmart) {
00637 if ((*fMeanS)[ivar] > (*fMeanB)[ivar]) (*fFitParams)[ivar] = kForceMax;
00638 else (*fFitParams)[ivar] = kForceMin;
00639 }
00640
00641 if ((*fFitParams)[ivar] == kForceMin) {
00642 ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMin(), nbins ) );
00643 ranges.push_back( new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
00644 }
00645 else if ((*fFitParams)[ivar] == kForceMax) {
00646 ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
00647 ranges.push_back( new Interval( fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(),
00648 fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
00649 }
00650 else {
00651 ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
00652 ranges.push_back( new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
00653 }
00654 }
00655
00656
00657 FitterBase* fitter = NULL;
00658
00659 switch (fFitMethod) {
00660 case kUseGeneticAlgorithm:
00661 fitter = new GeneticFitter( *this, Form("%sFitter_GA", GetName()), ranges, GetOptions() );
00662 break;
00663 case kUseMonteCarlo:
00664 fitter = new MCFitter ( *this, Form("%sFitter_MC", GetName()), ranges, GetOptions() );
00665 break;
00666 case kUseMinuit:
00667 fitter = new MinuitFitter ( *this, Form("%sFitter_MINUIT", GetName()), ranges, GetOptions() );
00668 break;
00669 case kUseSimulatedAnnealing:
00670 fitter = new SimulatedAnnealingFitter( *this, Form("%sFitter_SA", GetName()), ranges, GetOptions() );
00671 break;
00672 default:
00673 Log() << kFATAL << "Wrong fit method: " << fFitMethod << Endl;
00674 }
00675
00676 fitter->CheckForUnusedOptions();
00677
00678
00679 fitter->Run();
00680
00681
00682 for (UInt_t ivar=0; ivar<ranges.size(); ivar++) delete ranges[ivar];
00683 delete fitter;
00684
00685 }
00686
00687 else if (fFitMethod == kUseEventScan) {
00688
00689 Int_t nevents = Data()->GetNEvents();
00690 Int_t ic = 0;
00691
00692
00693 Int_t nsamples = Int_t(0.5*nevents*(nevents - 1));
00694 Timer timer( nsamples, GetName() );
00695
00696 Log() << kINFO << "Running full event scan: " << Endl;
00697 for (Int_t ievt1=0; ievt1<nevents; ievt1++) {
00698 for (Int_t ievt2=ievt1+1; ievt2<nevents; ievt2++) {
00699
00700 EstimatorFunction( ievt1, ievt2 );
00701
00702
00703 ic++;
00704 if ((nsamples<10000) || ic%10000 == 0) timer.DrawProgressBar( ic );
00705 }
00706 }
00707 }
00708
00709 else if (fFitMethod == kUseMonteCarloEvents) {
00710
00711 Int_t nsamples = 200000;
00712 UInt_t seed = 100;
00713 DeclareOptionRef( nsamples, "SampleSize", "Number of Monte-Carlo-Event samples" );
00714 DeclareOptionRef( seed, "Seed", "Seed for the random generator (0 takes random seeds)" );
00715 ParseOptions();
00716
00717 Int_t nevents = Data()->GetNEvents();
00718 Int_t ic = 0;
00719
00720
00721 Timer timer( nsamples, GetName() );
00722
00723
00724 TRandom3*rnd = new TRandom3( seed );
00725
00726 Log() << kINFO << "Running Monte-Carlo-Event sampling over " << nsamples << " events" << Endl;
00727 std::vector<Double_t> pars( 2*GetNvar() );
00728
00729 for (Int_t itoy=0; itoy<nsamples; itoy++) {
00730
00731 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00732
00733
00734
00735
00736 Bool_t isSignal = kFALSE;
00737 Int_t ievt1, ievt2;
00738 Double_t evt1, evt2;
00739 Int_t nbreak = 0;
00740 while (!isSignal) {
00741 ievt1 = Int_t(rnd->Uniform(0.,1.)*nevents);
00742 ievt2 = Int_t(rnd->Uniform(0.,1.)*nevents);
00743
00744 const Event *ev1 = GetEvent(ievt1);
00745 isSignal = DataInfo().IsSignal(ev1);
00746 evt1 = ev1->GetValue( ivar );
00747
00748 const Event *ev2 = GetEvent(ievt2);
00749 isSignal &= DataInfo().IsSignal(ev2);
00750 evt2 = ev2->GetValue( ivar );
00751
00752 if (nbreak++ > 10000) Log() << kFATAL << "<MCEvents>: could not find signal events"
00753 << " after 10000 trials - do you have signal events in your sample ?"
00754 << Endl;
00755 isSignal = 1;
00756 }
00757
00758
00759 if (evt1 > evt2) { Double_t z = evt1; evt1 = evt2; evt2 = z; }
00760 pars[2*ivar] = evt1;
00761 pars[2*ivar+1] = evt2 - evt1;
00762 }
00763
00764
00765 EstimatorFunction( pars );
00766
00767
00768 ic++;
00769 if ((nsamples<1000) || ic%1000 == 0) timer.DrawProgressBar( ic );
00770 }
00771
00772 delete rnd;
00773 }
00774
00775 else Log() << kFATAL << "Unknown minimisation method: " << fFitMethod << Endl;
00776
00777 if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
00778 if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
00779
00780
00781 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00782 for (Int_t ibin=0; ibin<fNbins; ibin++) {
00783
00784 if ((*fFitParams)[ivar] == kForceMin && fCutMin[ivar][ibin] > -fgMaxAbsCutVal) {
00785 fCutMin[ivar][ibin] = -fgMaxAbsCutVal;
00786 }
00787 if ((*fFitParams)[ivar] == kForceMax && fCutMax[ivar][ibin] < fgMaxAbsCutVal) {
00788 fCutMax[ivar][ibin] = fgMaxAbsCutVal;
00789 }
00790 }
00791 }
00792
00793
00794
00795
00796 Double_t epsilon = 0.0001;
00797 for (Double_t eff=0.1; eff<0.95; eff += 0.1) PrintCuts( eff+epsilon );
00798 }
00799
00800
00801 void TMVA::MethodCuts::TestClassification()
00802 {
00803
00804 }
00805
00806
00807 Double_t TMVA::MethodCuts::EstimatorFunction( Int_t ievt1, Int_t ievt2 )
00808 {
00809
00810 const Event *ev1 = GetEvent(ievt1);
00811 if (!DataInfo().IsSignal(ev1)) return -1;
00812
00813 const Event *ev2 = GetEvent(ievt2);
00814 if (!DataInfo().IsSignal(ev2)) return -1;
00815
00816 const Int_t nvar = GetNvar();
00817 Double_t* evt1 = new Double_t[nvar];
00818 Double_t* evt2 = new Double_t[nvar];
00819
00820 for (Int_t ivar=0; ivar<nvar; ivar++) {
00821 evt1[ivar] = ev1->GetValue( ivar );
00822 evt2[ivar] = ev2->GetValue( ivar );
00823 }
00824
00825
00826 std::vector<Double_t> pars;
00827 for (Int_t ivar=0; ivar<nvar; ivar++) {
00828 Double_t cutMin;
00829 Double_t cutMax;
00830 if (evt1[ivar] < evt2[ivar]) {
00831 cutMin = evt1[ivar];
00832 cutMax = evt2[ivar];
00833 }
00834 else {
00835 cutMin = evt2[ivar];
00836 cutMax = evt1[ivar];
00837 }
00838
00839 pars.push_back( cutMin );
00840 pars.push_back( cutMax - cutMin );
00841 }
00842
00843 delete [] evt1;
00844 delete [] evt2;
00845
00846 return ComputeEstimator( pars );
00847 }
00848
00849
00850 Double_t TMVA::MethodCuts::EstimatorFunction( std::vector<Double_t>& pars )
00851 {
00852
00853 return ComputeEstimator( pars );
00854 }
00855
00856
00857 Double_t TMVA::MethodCuts::ComputeEstimator( std::vector<Double_t>& pars )
00858 {
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870 Double_t effS = 0, effB = 0;
00871 this->MatchParsToCuts( pars, &fTmpCutMin[0], &fTmpCutMax[0] );
00872
00873
00874 switch (fEffMethod) {
00875 case kUsePDFs:
00876 this->GetEffsfromPDFs( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB );
00877 break;
00878 case kUseEventSelection:
00879 this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
00880 break;
00881 default:
00882 this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
00883 }
00884
00885 Double_t eta = 0;
00886
00887
00888
00889
00890
00891
00892
00893 Int_t ibinS = fEffBvsSLocal->FindBin( effS );
00894
00895 Double_t effBH = fEffBvsSLocal->GetBinContent( ibinS );
00896 Double_t effBH_left = (ibinS > 1 ) ? fEffBvsSLocal->GetBinContent( ibinS-1 ) : effBH;
00897 Double_t effBH_right = (ibinS < fNbins) ? fEffBvsSLocal->GetBinContent( ibinS+1 ) : effBH;
00898
00899 Double_t average = 0.5*(effBH_left + effBH_right);
00900 if (effBH < effB) average = effBH;
00901
00902
00903
00904 eta = ( -TMath::Abs(effBH-average) + (1.0 - (effBH - effB))) / (1.0 + effS);
00905
00906
00907
00908
00909
00910
00911 if (effBH < 0 || effBH > effB) {
00912 fEffBvsSLocal->SetBinContent( ibinS, effB );
00913 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00914 fCutMin[ivar][ibinS-1] = fTmpCutMin[ivar];
00915 fCutMax[ivar][ibinS-1] = fTmpCutMax[ivar];
00916 }
00917 }
00918
00919
00920
00921
00922
00923
00924 if (ibinS<=1) {
00925
00926
00927
00928 Double_t penalty=0.,diff=0.;
00929 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00930 diff=(fCutRange[ivar]->GetMax()-fTmpCutMax[ivar])/(fCutRange[ivar]->GetMax()-fCutRange[ivar]->GetMin());
00931 penalty+=diff*diff;
00932 diff=(fCutRange[ivar]->GetMin()-fTmpCutMin[ivar])/(fCutRange[ivar]->GetMax()-fCutRange[ivar]->GetMin());
00933 penalty+=4.*diff*diff;
00934 }
00935
00936 if (effS<1.e-4) return 10.0+penalty;
00937 else return 10.*(1.-10.*effS);
00938 }
00939 return eta;
00940 }
00941
00942
00943 void TMVA::MethodCuts::MatchParsToCuts( const std::vector<Double_t> & pars,
00944 Double_t* cutMin, Double_t* cutMax )
00945 {
00946
00947 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00948 Int_t ipar = 2*ivar;
00949 cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
00950 cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar];
00951 }
00952 }
00953
00954
00955 void TMVA::MethodCuts::MatchCutsToPars( std::vector<Double_t>& pars,
00956 Double_t** cutMinAll, Double_t** cutMaxAll, Int_t ibin )
00957 {
00958
00959 if (ibin < 1 || ibin > fNbins) Log() << kFATAL << "::MatchCutsToPars: bin error: "
00960 << ibin << Endl;
00961
00962 const UInt_t nvar = GetNvar();
00963 Double_t *cutMin = new Double_t[nvar];
00964 Double_t *cutMax = new Double_t[nvar];
00965 for (UInt_t ivar=0; ivar<nvar; ivar++) {
00966 cutMin[ivar] = cutMinAll[ivar][ibin-1];
00967 cutMax[ivar] = cutMaxAll[ivar][ibin-1];
00968 }
00969
00970 MatchCutsToPars( pars, cutMin, cutMax );
00971 delete [] cutMin;
00972 delete [] cutMax;
00973 }
00974
00975
00976 void TMVA::MethodCuts::MatchCutsToPars( std::vector<Double_t>& pars,
00977 Double_t* cutMin, Double_t* cutMax )
00978 {
00979
00980 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00981 Int_t ipar = 2*ivar;
00982 pars[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
00983 pars[ipar+1] = cutMax[ivar] - cutMin[ivar];
00984 }
00985 }
00986
00987
00988 void TMVA::MethodCuts::GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
00989 Double_t& effS, Double_t& effB )
00990 {
00991
00992
00993 effS = 1.0;
00994 effB = 1.0;
00995 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00996 effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
00997 effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
00998 }
00999
01000
01001 if( effS < 0.0 ) {
01002 effS = 0.0;
01003 if( !fNegEffWarning ) Log() << kWARNING << "Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
01004 fNegEffWarning = kTRUE;
01005 }
01006 if( effB < 0.0 ) {
01007 effB = 0.0;
01008 if( !fNegEffWarning ) Log() << kWARNING << "Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
01009 fNegEffWarning = kTRUE;
01010 }
01011 }
01012
01013
01014 void TMVA::MethodCuts::GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
01015 Double_t& effS, Double_t& effB)
01016 {
01017
01018
01019 Float_t nTotS = 0, nTotB = 0;
01020 Float_t nSelS = 0, nSelB = 0;
01021
01022 Volume* volume = new Volume( cutMin, cutMax, GetNvar() );
01023
01024
01025 nSelS = fBinaryTreeS->SearchVolume( volume );
01026 nSelB = fBinaryTreeB->SearchVolume( volume );
01027
01028 delete volume;
01029
01030
01031 nTotS = fBinaryTreeS->GetSumOfWeights();
01032 nTotB = fBinaryTreeB->GetSumOfWeights();
01033
01034
01035 if (nTotS == 0 && nTotB == 0) {
01036 Log() << kFATAL << "<GetEffsfromSelection> fatal error in zero total number of events:"
01037 << " nTotS, nTotB: " << nTotS << " " << nTotB << " ***" << Endl;
01038 }
01039
01040
01041 if (nTotS == 0 ) {
01042 effS = 0;
01043 effB = nSelB/nTotB;
01044 Log() << kWARNING << "<ComputeEstimator> zero number of signal events" << Endl;
01045 }
01046 else if (nTotB == 0) {
01047 effB = 0;
01048 effS = nSelS/nTotS;
01049 Log() << kWARNING << "<ComputeEstimator> zero number of background events" << Endl;
01050 }
01051 else {
01052 effS = nSelS/nTotS;
01053 effB = nSelB/nTotB;
01054 }
01055
01056
01057 if( effS < 0.0 ) {
01058 effS = 0.0;
01059 if( !fNegEffWarning ) Log() << kWARNING << "Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
01060 fNegEffWarning = kTRUE;
01061 }
01062 if( effB < 0.0 ) {
01063 effB = 0.0;
01064 if( !fNegEffWarning ) Log() << kWARNING << "Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
01065 fNegEffWarning = kTRUE;
01066 }
01067 }
01068
01069
01070 void TMVA::MethodCuts::CreateVariablePDFs( void )
01071 {
01072
01073
01074
01075 fVarHistS = new vector<TH1*>( GetNvar() );
01076 fVarHistB = new vector<TH1*>( GetNvar() );
01077 fVarHistS_smooth = new vector<TH1*>( GetNvar() );
01078 fVarHistB_smooth = new vector<TH1*>( GetNvar() );
01079 fVarPdfS = new vector<PDF*>( GetNvar() );
01080 fVarPdfB = new vector<PDF*>( GetNvar() );
01081
01082 Int_t nsmooth = 0;
01083
01084
01085 Double_t minVal = DBL_MAX;
01086 Double_t maxVal = -DBL_MAX;
01087 for( UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++ ){
01088 const Event *ev = GetEvent(ievt);
01089 Float_t val = ev->GetValue(ievt);
01090 if( val > minVal ) minVal = val;
01091 if( val < maxVal ) maxVal = val;
01092 }
01093
01094 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01095
01096
01097 TString histTitle = (*fInputVars)[ivar] + " signal training";
01098 TString histName = (*fInputVars)[ivar] + "_sig";
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109 (*fVarHistS)[ivar] = new TH1F(histName.Data(), histTitle.Data(), fNbins, minVal, maxVal );
01110
01111
01112 histTitle = (*fInputVars)[ivar] + " background training";
01113 histName = (*fInputVars)[ivar] + "_bgd";
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124 (*fVarHistB)[ivar] = new TH1F(histName.Data(), histTitle.Data(), fNbins, minVal, maxVal );
01125
01126 for( UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++ ){
01127 const Event *ev = GetEvent(ievt);
01128 Float_t val = ev->GetValue(ievt);
01129 if( DataInfo().IsSignal(ev) ){
01130 (*fVarHistS)[ivar]->Fill( val );
01131 }else{
01132 (*fVarHistB)[ivar]->Fill( val );
01133 }
01134 }
01135
01136
01137
01138
01139 (*fVarHistS_smooth)[ivar] = (TH1F*)(*fVarHistS)[ivar]->Clone();
01140 histTitle = (*fInputVars)[ivar] + " signal training smoothed ";
01141 histTitle += nsmooth;
01142 histTitle +=" times";
01143 histName = (*fInputVars)[ivar] + "_sig_smooth";
01144 (*fVarHistS_smooth)[ivar]->SetName(histName);
01145 (*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
01146
01147
01148 (*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163 (*fVarHistB_smooth)[ivar] = (TH1F*)(*fVarHistB)[ivar]->Clone();
01164 histTitle = (*fInputVars)[ivar]+" background training smoothed ";
01165 histTitle += nsmooth;
01166 histTitle +=" times";
01167 histName = (*fInputVars)[ivar]+"_bgd_smooth";
01168 (*fVarHistB_smooth)[ivar]->SetName(histName);
01169 (*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
01170
01171
01172 (*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
01173
01174
01175 (*fVarPdfS)[ivar] = new PDF( TString(GetName()) + " PDF Var Sig " + GetInputVar( ivar ), (*fVarHistS_smooth)[ivar], PDF::kSpline2 );
01176 (*fVarPdfB)[ivar] = new PDF( TString(GetName()) + " PDF Var Bkg " + GetInputVar( ivar ), (*fVarHistB_smooth)[ivar], PDF::kSpline2 );
01177 }
01178 }
01179
01180
01181 void TMVA::MethodCuts::ReadWeightsFromStream( istream& istr )
01182 {
01183
01184 TString dummy;
01185 UInt_t dummyInt;
01186
01187
01188 istr >> dummy >> dummy;
01189
01190 istr >> dummy >> fNbins;
01191
01192
01193 istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> dummyInt >> dummy ;
01194
01195
01196 if (dummyInt != Data()->GetNVariables()) {
01197 Log() << kFATAL << "<ReadWeightsFromStream> fatal error: mismatch "
01198 << "in number of variables: " << dummyInt << " != " << Data()->GetNVariables() << Endl;
01199 }
01200
01201
01202
01203 if (fFitMethod == kUseMonteCarlo) {
01204 Log() << kINFO << "Read cuts optimised using sample of MC events" << Endl;
01205 }
01206 else if (fFitMethod == kUseMonteCarloEvents) {
01207 Log() << kINFO << "Read cuts optimised using sample of MC events" << Endl;
01208 }
01209 else if (fFitMethod == kUseGeneticAlgorithm) {
01210 Log() << kINFO << "Read cuts optimised using Genetic Algorithm" << Endl;
01211 }
01212 else if (fFitMethod == kUseSimulatedAnnealing) {
01213 Log() << kINFO << "Read cuts optimised using Simulated Annealing algorithm" << Endl;
01214 }
01215 else if (fFitMethod == kUseEventScan) {
01216 Log() << kINFO << "Read cuts optimised using Full Event Scan" << Endl;
01217 }
01218 else {
01219 Log() << kWARNING << "unknown method: " << fFitMethod << Endl;
01220 }
01221 Log() << kINFO << "in " << fNbins << " signal efficiency bins and for " << GetNvar() << " variables" << Endl;
01222
01223
01224 char buffer[200];
01225 istr.getline(buffer,200);
01226 istr.getline(buffer,200);
01227
01228 Int_t tmpbin;
01229 Float_t tmpeffS, tmpeffB;
01230 if (fEffBvsSLocal != 0) delete fEffBvsSLocal;
01231 fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal",
01232 TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
01233 fEffBvsSLocal->SetDirectory(0);
01234
01235 for (Int_t ibin=0; ibin<fNbins; ibin++) {
01236 istr >> tmpbin >> tmpeffS >> tmpeffB;
01237 fEffBvsSLocal->SetBinContent( ibin+1, tmpeffB );
01238
01239 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01240 istr >> fCutMin[ivar][ibin] >> fCutMax[ivar][ibin];
01241 }
01242 }
01243
01244 fEffSMin = fEffBvsSLocal->GetBinCenter(1);
01245 fEffSMax = fEffBvsSLocal->GetBinCenter(fNbins);
01246 }
01247
01248
01249 void TMVA::MethodCuts::AddWeightsXMLTo( void* parent ) const
01250 {
01251
01252
01253
01254
01255 std::vector<Double_t> cutsMin;
01256 std::vector<Double_t> cutsMax;
01257
01258 void* wght = gTools().AddChild(parent, "Weights");
01259 gTools().AddAttr( wght, "OptimisationMethod", (Int_t)fEffMethod);
01260 gTools().AddAttr( wght, "FitMethod", (Int_t)fFitMethod );
01261 gTools().AddAttr( wght, "nbins", fNbins );
01262 gTools().AddComment( wght, Form( "Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]", GetNvar() ) );
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272 for (Int_t ibin=0; ibin<fNbins; ibin++) {
01273 Double_t effS = fEffBvsSLocal->GetBinCenter ( ibin + 1 );
01274 Double_t trueEffS = GetCuts( effS, cutsMin, cutsMax );
01275 if (TMath::Abs(trueEffS) < 1e-10) trueEffS = 0;
01276
01277 void* binxml = gTools().AddChild( wght, "Bin" );
01278 gTools().AddAttr( binxml, "ibin", ibin+1 );
01279 gTools().AddAttr( binxml, "effS", trueEffS );
01280 gTools().AddAttr( binxml, "effB", fEffBvsSLocal->GetBinContent( ibin + 1 ) );
01281 void* cutsxml = gTools().AddChild( binxml, "Cuts" );
01282 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01283 gTools().AddAttr( cutsxml, Form( "cutMin_%i", ivar ), cutsMin[ivar] );
01284 gTools().AddAttr( cutsxml, Form( "cutMax_%i", ivar ), cutsMax[ivar] );
01285 }
01286 }
01287 }
01288
01289
01290 void TMVA::MethodCuts::ReadWeightsFromXML( void* wghtnode )
01291 {
01292
01293
01294
01295 for (UInt_t i=0; i<GetNvar(); i++) {
01296 if (fCutMin[i] != 0) delete [] fCutMin[i];
01297 if (fCutMax[i] != 0) delete [] fCutMax[i];
01298 }
01299 if (fCutMin != 0) delete [] fCutMin;
01300 if (fCutMax != 0) delete [] fCutMax;
01301
01302 Int_t tmpEffMethod, tmpFitMethod;
01303 gTools().ReadAttr( wghtnode, "OptimisationMethod", tmpEffMethod );
01304 gTools().ReadAttr( wghtnode, "FitMethod", tmpFitMethod );
01305 gTools().ReadAttr( wghtnode, "nbins", fNbins );
01306
01307 fEffMethod = (EEffMethod)tmpEffMethod;
01308 fFitMethod = (EFitMethodType)tmpFitMethod;
01309
01310
01311 if (fFitMethod == kUseMonteCarlo) {
01312 Log() << kINFO << "Read cuts optimised using sample of MC events" << Endl;
01313 }
01314 else if (fFitMethod == kUseMonteCarloEvents) {
01315 Log() << kINFO << "Read cuts optimised using sample of MC-Event events" << Endl;
01316 }
01317 else if (fFitMethod == kUseGeneticAlgorithm) {
01318 Log() << kINFO << "Read cuts optimised using Genetic Algorithm" << Endl;
01319 }
01320 else if (fFitMethod == kUseSimulatedAnnealing) {
01321 Log() << kINFO << "Read cuts optimised using Simulated Annealing algorithm" << Endl;
01322 }
01323 else if (fFitMethod == kUseEventScan) {
01324 Log() << kINFO << "Read cuts optimised using Full Event Scan" << Endl;
01325 }
01326 else {
01327 Log() << kWARNING << "unknown method: " << fFitMethod << Endl;
01328 }
01329 Log() << kINFO << "Reading " << fNbins << " signal efficiency bins for " << GetNvar() << " variables" << Endl;
01330
01331 delete fEffBvsSLocal;
01332 fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal",
01333 TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
01334 fEffBvsSLocal->SetDirectory(0);
01335 for (Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
01336
01337 fCutMin = new Double_t*[GetNvar()];
01338 fCutMax = new Double_t*[GetNvar()];
01339 for (UInt_t i=0;i<GetNvar();i++) {
01340 fCutMin[i] = new Double_t[fNbins];
01341 fCutMax[i] = new Double_t[fNbins];
01342 }
01343
01344
01345 Int_t tmpbin;
01346 Float_t tmpeffS, tmpeffB;
01347 void* ch = gTools().GetChild(wghtnode,"Bin");
01348 while (ch) {
01349
01350
01351
01352
01353
01354 gTools().ReadAttr( ch, "ibin", tmpbin );
01355 gTools().ReadAttr( ch, "effS", tmpeffS );
01356 gTools().ReadAttr( ch, "effB", tmpeffB );
01357
01358
01359 if (tmpbin-1 >= fNbins || tmpbin-1 < 0) {
01360 Log() << kFATAL << "Mismatch in bins: " << tmpbin-1 << " >= " << fNbins << Endl;
01361 }
01362
01363 fEffBvsSLocal->SetBinContent( tmpbin, tmpeffB );
01364 void* ct = gTools().GetChild(ch);
01365 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01366 gTools().ReadAttr( ct, Form( "cutMin_%i", ivar ), fCutMin[ivar][tmpbin-1] );
01367 gTools().ReadAttr( ct, Form( "cutMax_%i", ivar ), fCutMax[ivar][tmpbin-1] );
01368 }
01369 ch = gTools().GetNextChild(ch, "Bin");
01370 }
01371 }
01372
01373
01374 void TMVA::MethodCuts::WriteMonitoringHistosToFile( void ) const
01375 {
01376
01377
01378 Log() << kINFO << "Write monitoring histograms to file: " << BaseDir()->GetPath() << Endl;
01379
01380 fEffBvsSLocal->Write();
01381
01382
01383 if (fEffMethod == kUsePDFs) {
01384 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01385 (*fVarHistS)[ivar]->Write();
01386 (*fVarHistB)[ivar]->Write();
01387 (*fVarHistS_smooth)[ivar]->Write();
01388 (*fVarHistB_smooth)[ivar]->Write();
01389 (*fVarPdfS)[ivar]->GetPDFHist()->Write();
01390 (*fVarPdfB)[ivar]->GetPDFHist()->Write();
01391 }
01392 }
01393 }
01394
01395
01396 Double_t TMVA::MethodCuts::GetTrainingEfficiency(const TString& theString)
01397 {
01398
01399
01400
01401
01402
01403
01404
01405
01406
01407
01408
01409 TList* list = gTools().ParseFormatLine( theString );
01410
01411 if (list->GetSize() != 2) {
01412 Log() << kFATAL << "<GetTrainingEfficiency> wrong number of arguments"
01413 << " in string: " << theString
01414 << " | required format, e.g., Efficiency:0.05" << Endl;
01415 return -1;
01416 }
01417
01418 Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, GetAnalysisType());
01419
01420
01421
01422 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
01423
01424 delete list;
01425
01426
01427 if (results->GetHist("EFF_BVSS_TR")==0) {
01428
01429 if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
01430 if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
01431
01432 fBinaryTreeS = new BinarySearchTree();
01433 fBinaryTreeS->Fill( GetEventCollection(Types::kTraining), fSignalClass );
01434 fBinaryTreeB = new BinarySearchTree();
01435 fBinaryTreeB->Fill( GetEventCollection(Types::kTraining), fBackgroundClass );
01436
01437
01438
01439
01440
01441
01442
01443 TH1* eff_bvss_tr = new TH1F( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
01444 for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_bvss_tr->SetBinContent( ibin, -0.1 );
01445 TH1* rej_bvss_tr = new TH1F( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
01446 for (Int_t ibin=1; ibin<=fNbins; ibin++) rej_bvss_tr->SetBinContent( ibin, 0. );
01447 results->Store(eff_bvss_tr, "EFF_BVSS_TR");
01448 results->Store(rej_bvss_tr, "REJ_BVSS_TR");
01449
01450
01451
01452
01453 Double_t* tmpCutMin = new Double_t[GetNvar()];
01454 Double_t* tmpCutMax = new Double_t[GetNvar()];
01455 Int_t nFailedBins=0;
01456 for (Int_t bini=1; bini<=fNbins; bini++) {
01457 for (UInt_t ivar=0; ivar <GetNvar(); ivar++){
01458 tmpCutMin[ivar] = fCutMin[ivar][bini-1];
01459 tmpCutMax[ivar] = fCutMax[ivar][bini-1];
01460 }
01461
01462 Double_t effS, effB;
01463 this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
01464
01465 Int_t effBin = eff_bvss_tr->GetXaxis()->FindBin(effS);
01466 if (effBin != bini){
01467 Log()<< kVERBOSE << "unable to fill efficiency bin " << bini<< " " << effBin <<Endl;
01468 nFailedBins++;
01469 }
01470 else{
01471
01472 eff_bvss_tr->SetBinContent( bini, effB );
01473 rej_bvss_tr->SetBinContent( bini, 1.0-effB );
01474 }
01475 }
01476 if (nFailedBins>0) Log()<< kWARNING << " unable to fill "<< nFailedBins <<" efficiency bins " <<Endl;
01477
01478 delete [] tmpCutMin;
01479 delete [] tmpCutMax;
01480
01481
01482 fSplTrainEffBvsS = new TSpline1( "trainEffBvsS", new TGraph( eff_bvss_tr ) );
01483 }
01484
01485
01486 if (NULL == fSplTrainEffBvsS) return 0.0;
01487
01488
01489 Double_t effS, effB, effS_ = 0, effB_ = 0;
01490 Int_t nbins_ = 1000;
01491
01492
01493 for (Int_t bini=1; bini<=nbins_; bini++) {
01494
01495 effS = (bini - 0.5)/Float_t(nbins_);
01496 effB = fSplTrainEffBvsS->Eval( effS );
01497
01498
01499 if ((effB - effBref)*(effB_ - effBref) < 0) break;
01500 effS_ = effS;
01501 effB_ = effB;
01502 }
01503
01504 return 0.5*(effS + effS_);
01505 }
01506
01507
01508 Double_t TMVA::MethodCuts::GetEfficiency( const TString& theString, Types::ETreeType type, Double_t& effSerr )
01509 {
01510
01511
01512
01513
01514
01515
01516
01517
01518
01519
01520 Data()->SetCurrentType(type);
01521
01522 Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, GetAnalysisType() );
01523
01524
01525 TList* list = gTools().ParseFormatLine( theString, ":" );
01526
01527 if (list->GetSize() > 2) {
01528 delete list;
01529 Log() << kFATAL << "<GetEfficiency> wrong number of arguments"
01530 << " in string: " << theString
01531 << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
01532 return -1;
01533 }
01534
01535
01536 Bool_t computeArea = (list->GetSize() < 2);
01537
01538
01539
01540 Float_t effBref = (computeArea?1.:atof( ((TObjString*)list->At(1))->GetString() ));
01541
01542 delete list;
01543
01544
01545
01546 if (results->GetHist("MVA_EFF_BvsS")==0) {
01547
01548 if (fBinaryTreeS!=0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
01549 if (fBinaryTreeB!=0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
01550
01551
01552
01553
01554 fBinaryTreeS = new BinarySearchTree();
01555 fBinaryTreeS->Fill( GetEventCollection(Types::kTesting), fSignalClass );
01556 fBinaryTreeB = new BinarySearchTree();
01557 fBinaryTreeB->Fill( GetEventCollection(Types::kTesting), fBackgroundClass );
01558
01559
01560
01561
01562
01563
01564
01565
01566 TH1* eff_BvsS = new TH1F( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
01567 for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_BvsS->SetBinContent( ibin, -0.1 );
01568 TH1* rej_BvsS = new TH1F( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
01569 for (Int_t ibin=1; ibin<=fNbins; ibin++) rej_BvsS->SetBinContent( ibin, 0.0 );
01570 results->Store(eff_BvsS, "MVA_EFF_BvsS");
01571 results->Store(rej_BvsS);
01572
01573 Double_t xmin = 0.;
01574 Double_t xmax = 1.000001;
01575
01576 TH1* eff_s = new TH1F( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbins, xmin, xmax);
01577 for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_s->SetBinContent( ibin, -0.1 );
01578 TH1* eff_b = new TH1F( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbins, xmin, xmax);
01579 for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_b->SetBinContent( ibin, -0.1 );
01580 results->Store(eff_s, "MVA_S");
01581 results->Store(eff_b, "MVA_B");
01582
01583
01584
01585
01586 Double_t* tmpCutMin = new Double_t[GetNvar()];
01587 Double_t* tmpCutMax = new Double_t[GetNvar()];
01588 TGraph* tmpBvsS = new TGraph(fNbins+1);
01589 tmpBvsS->SetPoint(0, 0., 0.);
01590
01591 for (Int_t bini=1; bini<=fNbins; bini++) {
01592 for (UInt_t ivar=0; ivar <GetNvar(); ivar++) {
01593 tmpCutMin[ivar] = fCutMin[ivar][bini-1];
01594 tmpCutMax[ivar] = fCutMax[ivar][bini-1];
01595 }
01596
01597 Double_t effS, effB;
01598 this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
01599 tmpBvsS->SetPoint(bini, effS, effB);
01600
01601 eff_s->SetBinContent(bini, effS);
01602 eff_b->SetBinContent(bini, effB);
01603 }
01604 tmpBvsS->SetPoint(fNbins+1, 1., 1.);
01605
01606 delete [] tmpCutMin;
01607 delete [] tmpCutMax;
01608
01609
01610 fSpleffBvsS = new TSpline1( "effBvsS", tmpBvsS );
01611 for (Int_t bini=1; bini<=fNbins; bini++) {
01612 Double_t effS = (bini - 0.5)/Float_t(fNbins);
01613 Double_t effB = fSpleffBvsS->Eval( effS );
01614 eff_BvsS->SetBinContent( bini, effB );
01615 rej_BvsS->SetBinContent( bini, 1.0-effB );
01616 }
01617 }
01618
01619
01620 if (NULL == fSpleffBvsS) return 0.0;
01621
01622
01623 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
01624 Int_t nbins_ = 1000;
01625
01626 if (computeArea) {
01627
01628
01629 Double_t integral = 0;
01630 for (Int_t bini=1; bini<=nbins_; bini++) {
01631
01632
01633 effS = (bini - 0.5)/Float_t(nbins_);
01634 effB = fSpleffBvsS->Eval( effS );
01635 integral += (1.0 - effB);
01636 }
01637 integral /= nbins_;
01638
01639 return integral;
01640 }
01641 else {
01642
01643
01644 for (Int_t bini=1; bini<=nbins_; bini++) {
01645
01646 effS = (bini - 0.5)/Float_t(nbins_);
01647 effB = fSpleffBvsS->Eval( effS );
01648
01649
01650 if ((effB - effBref)*(effB_ - effBref) < 0) break;
01651 effS_ = effS;
01652 effB_ = effB;
01653 }
01654
01655 effS = 0.5*(effS + effS_);
01656 effSerr = 0;
01657 if (Data()->GetNEvtSigTest() > 0)
01658 effSerr = TMath::Sqrt( effS*(1.0 - effS)/Double_t(Data()->GetNEvtSigTest()) );
01659
01660 return effS;
01661
01662 }
01663
01664 return -1;
01665 }
01666
01667
01668 void TMVA::MethodCuts::MakeClassSpecific( std::ostream& fout, const TString& className ) const
01669 {
01670
01671 fout << " // not implemented for class: \"" << className << "\"" << endl;
01672 fout << "};" << endl;
01673 }
01674
01675
01676 void TMVA::MethodCuts::GetHelpMessage() const
01677 {
01678
01679
01680
01681
01682 TString bold = gConfig().WriteOptionsReference() ? "<b>" : "";
01683 TString resbold = gConfig().WriteOptionsReference() ? "</b>" : "";
01684 TString brk = gConfig().WriteOptionsReference() ? "<br>" : "";
01685
01686 Log() << Endl;
01687 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
01688 Log() << Endl;
01689 Log() << "The optimisation of rectangular cuts performed by TMVA maximises " << Endl;
01690 Log() << "the background rejection at given signal efficiency, and scans " << Endl;
01691 Log() << "over the full range of the latter quantity. Three optimisation" << Endl;
01692 Log() << "methods are optional: Monte Carlo sampling (MC), a Genetics" << Endl;
01693 Log() << "Algorithm (GA), and Simulated Annealing (SA). GA and SA are" << Endl;
01694 Log() << "expected to perform best." << Endl;
01695 Log() << Endl;
01696 Log() << "The difficulty to find the optimal cuts strongly increases with" << Endl;
01697 Log() << "the dimensionality (number of input variables) of the problem." << Endl;
01698 Log() << "This behavior is due to the non-uniqueness of the solution space."<< Endl;
01699 Log() << Endl;
01700 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
01701 Log() << Endl;
01702 Log() << "If the dimensionality exceeds, say, 4 input variables, it is " << Endl;
01703 Log() << "advisable to scrutinize the separation power of the variables," << Endl;
01704 Log() << "and to remove the weakest ones. If some among the input variables" << Endl;
01705 Log() << "can be described by a single cut (e.g., because signal tends to be" << Endl;
01706 Log() << "larger than background), this can be indicated to MethodCuts via" << Endl;
01707 Log() << "the \"Fsmart\" options (see option string). Choosing this option" << Endl;
01708 Log() << "reduces the number of requirements for the variable from 2 (min/max)" << Endl;
01709 Log() << "to a single one (TMVA finds out whether it is to be interpreted as" << Endl;
01710 Log() << "min or max)." << Endl;
01711 Log() << Endl;
01712 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
01713 Log() << "" << Endl;
01714 Log() << bold << "Monte Carlo sampling:" << resbold << Endl;
01715 Log() << "" << Endl;
01716 Log() << "Apart form the \"Fsmart\" option for the variables, the only way" << Endl;
01717 Log() << "to improve the MC sampling is to increase the sampling rate. This" << Endl;
01718 Log() << "is done via the configuration option \"MC_NRandCuts\". The execution" << Endl;
01719 Log() << "time scales linearly with the sampling rate." << Endl;
01720 Log() << "" << Endl;
01721 Log() << bold << "Genetic Algorithm:" << resbold << Endl;
01722 Log() << "" << Endl;
01723 Log() << "The algorithm terminates if no significant fitness increase has" << Endl;
01724 Log() << "been achieved within the last \"nsteps\" steps of the calculation." << Endl;
01725 Log() << "Wiggles in the ROC curve or constant background rejection of 1" << Endl;
01726 Log() << "indicate that the GA failed to always converge at the true maximum" << Endl;
01727 Log() << "fitness. In such a case, it is recommended to broaden the search " << Endl;
01728 Log() << "by increasing the population size (\"popSize\") and to give the GA " << Endl;
01729 Log() << "more time to find improvements by increasing the number of steps" << Endl;
01730 Log() << "(\"nsteps\")" << Endl;
01731 Log() << " -> increase \"popSize\" (at least >10 * number of variables)" << Endl;
01732 Log() << " -> increase \"nsteps\"" << Endl;
01733 Log() << "" << Endl;
01734 Log() << bold << "Simulated Annealing (SA) algorithm:" << resbold << Endl;
01735 Log() << "" << Endl;
01736 Log() << "\"Increasing Adaptive\" approach:" << Endl;
01737 Log() << "" << Endl;
01738 Log() << "The algorithm seeks local minima and explores their neighborhood, while" << Endl;
01739 Log() << "changing the ambient temperature depending on the number of failures" << Endl;
01740 Log() << "in the previous steps. The performance can be improved by increasing" << Endl;
01741 Log() << "the number of iteration steps (\"MaxCalls\"), or by adjusting the" << Endl;
01742 Log() << "minimal temperature (\"MinTemperature\"). Manual adjustments of the" << Endl;
01743 Log() << "speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" << Endl;
01744 Log() << "to individual data sets should also help. Summary:" << brk << Endl;
01745 Log() << " -> increase \"MaxCalls\"" << brk << Endl;
01746 Log() << " -> adjust \"MinTemperature\"" << brk << Endl;
01747 Log() << " -> adjust \"TemperatureScale\"" << brk << Endl;
01748 Log() << " -> adjust \"AdaptiveSpeed\"" << Endl;
01749 Log() << "" << Endl;
01750 Log() << "\"Decreasing Adaptive\" approach:" << Endl;
01751 Log() << "" << Endl;
01752 Log() << "The algorithm calculates the initial temperature (based on the effect-" << Endl;
01753 Log() << "iveness of large steps) and the multiplier that ensures to reach the" << Endl;
01754 Log() << "minimal temperature with the requested number of iteration steps." << Endl;
01755 Log() << "The performance can be improved by adjusting the minimal temperature" << Endl;
01756 Log() << " (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" << brk << Endl;
01757 Log() << " -> increase \"MaxCalls\"" << brk << Endl;
01758 Log() << " -> adjust \"MinTemperature\"" << Endl;
01759 Log() << " " << Endl;
01760 Log() << "Other kernels:" << Endl;
01761 Log() << "" << Endl;
01762 Log() << "Alternative ways of counting the temperature change are implemented. " << Endl;
01763 Log() << "Each of them starts with the maximum temperature (\"MaxTemperature\")" << Endl;
01764 Log() << "and descreases while changing the temperature according to a given" << Endl;
01765 Log() << "prescription:" << brk << Endl;
01766 Log() << "CurrentTemperature =" << brk << Endl;
01767 Log() << " - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" << brk << Endl;
01768 Log() << " - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" << brk << Endl;
01769 Log() << " - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" << brk << Endl;
01770 Log() << " - Sin: ( Sin( StepNumber / TemperatureScale ) + 1 ) / (StepNumber + 1) * InitialTemperature + Eps" << brk << Endl;
01771 Log() << " - Geo: CurrentTemperature * TemperatureScale" << Endl;
01772 Log() << "" << Endl;
01773 Log() << "Their performance can be improved by adjusting initial temperature" << Endl;
01774 Log() << "(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," << Endl;
01775 Log() << "and the multiplier that scales the termperature descrease" << Endl;
01776 Log() << "(\"TemperatureScale\")" << brk << Endl;
01777 Log() << " -> increase \"MaxCalls\"" << brk << Endl;
01778 Log() << " -> adjust \"InitialTemperature\"" << brk << Endl;
01779 Log() << " -> adjust \"TemperatureScale\"" << brk << Endl;
01780 Log() << " -> adjust \"KernelTemperature\"" << Endl;
01781 }