MethodCuts.cxx

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodCuts.cxx 37097 2010-11-30 12:28:05Z evt $
00002 // Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Eckhard von Toerne, Helge Voss, Kai Voss
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : TMVA::MethodCuts                                                      *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Implementation (see header for description)                               *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
00015  *      Matt Jachowski  <jachowski@stanford.edu> - Stanford University, USA       *
00016  *      Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland              *
00017  *      Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany         *
00018  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
00019  *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada         *
00020  *                                                                                *
00021  * Copyright (c) 2005:                                                            *
00022  *      CERN, Switzerland                                                         *
00023  *      U. of Victoria, Canada                                                    *
00024  *      MPI-K Heidelberg, Germany                                                 *
00025  *                                                                                *
00026  * Redistribution and use in source and binary forms, with or without             *
00027  * modification, are permitted according to the terms listed in LICENSE           *
00028  * (http://tmva.sourceforge.net/LICENSE)                                          *
00029  **********************************************************************************/
00030 
00031 //_______________________________________________________________________
00032 /* Begin_Html
00033   Multivariate optimisation of signal efficiency for given background
00034   efficiency, applying rectangular minimum and maximum requirements.
00035 
00036   <p>
00037   Also implemented is a "decorrelate/diagonlized cuts approach",
00038   which improves over the uncorrelated cuts ansatz by
00039   transforming linearly the input variables into a diagonal space,
00040   using the square-root of the covariance matrix.
00041 
00042   <p>
00043   <font size="-1">
00044   Other optimisation criteria, such as maximising the signal significance-
00045   squared, S^2/(S+B), with S and B being the signal and background yields,
00046   correspond to a particular point in the optimised background rejection
00047   versus signal efficiency curve. This working point requires the knowledge
00048   of the expected yields, which is not the case in general. Note also that
00049   for rare signals, Poissonian statistics should be used, which modifies
00050   the significance criterion.
00051   </font>
00052 
00053   <p>
00054   The rectangular cut of a volume in the variable space is performed using
00055   a binary tree to sort the training events. This provides a significant
00056   reduction in computing time (up to several orders of magnitudes, depending
00057   on the complexity of the problem at hand).
00058 
00059   <p>
00060   Technically, optimisation is achieved in TMVA by two methods:
00061 
00062   <ol>
00063   <li>Monte Carlo generation using uniform priors for the lower cut value,
00064   and the cut width, thrown within the variable ranges.
00065 
00066   <li>A Genetic Algorithm (GA) searches for the optimal ("fittest") cut sample.
00067   The GA is configurable by many external settings through the option
00068   string. For difficult cases (such as many variables), some tuning
00069   may be necessary to achieve satisfying results
00070   </ol>
00071 
00072   <p>
00073   <font size="-1">
00074   Attempts to use Minuit fits (Simplex ot Migrad) instead have not shown
00075   superior results, and often failed due to convergence at local minima.
00076   </font>
00077 
00078   <p>
00079   The tests we have performed so far showed that in generic applications,
00080   the GA is superior to MC sampling, and hence GA is the default method.
00081   It is worthwhile trying both anyway.
00082 
00083   <b>Decorrelated (or "diagonalized") Cuts</b>
00084 
00085   <p>
00086   See class description for Method Likelihood for a detailed explanation.
00087 End_Html */
00088 //
00089 
00090 #include <iostream>
00091 #include <cstdlib>
00092 
00093 #include "Riostream.h"
00094 #include "TH1F.h"
00095 #include "TObjString.h"
00096 #include "TDirectory.h"
00097 #include "TMath.h"
00098 #include "TGraph.h"
00099 #include "TSpline.h"
00100 #include "TRandom3.h"
00101 
00102 #include "TMVA/ClassifierFactory.h"
00103 #include "TMVA/MethodCuts.h"
00104 #include "TMVA/GeneticFitter.h"
00105 #include "TMVA/MinuitFitter.h"
00106 #include "TMVA/MCFitter.h"
00107 #include "TMVA/SimulatedAnnealingFitter.h"
00108 #include "TMVA/PDF.h"
00109 #include "TMVA/Tools.h"
00110 #include "TMVA/Timer.h"
00111 #include "TMVA/Interval.h"
00112 #include "TMVA/TSpline1.h"
00113 #include "TMVA/Config.h"
00114 #include "TMVA/VariableTransformBase.h"
00115 #include "TMVA/Results.h"
00116 
00117 REGISTER_METHOD(Cuts)
00118 
00119 ClassImp(TMVA::MethodCuts)
00120 
00121 const Double_t TMVA::MethodCuts::fgMaxAbsCutVal = 1.0e30;
00122 
00123 //_______________________________________________________________________
00124 TMVA::MethodCuts::MethodCuts( const TString& jobName,
00125                               const TString& methodTitle,
00126                               DataSetInfo& theData,
00127                               const TString& theOption,
00128                               TDirectory* theTargetDir ) :
00129    MethodBase( jobName, Types::kCuts, methodTitle, theData, theOption, theTargetDir ),
00130    fFitMethod  ( kUseGeneticAlgorithm ),
00131    fEffMethod  ( kUseEventSelection ),
00132    fTestSignalEff(0.7),
00133    fEffSMin    ( 0 ),
00134    fEffSMax    ( 0 ),
00135    fCutRangeMin( 0 ),
00136    fCutRangeMax( 0 ),
00137    fBinaryTreeS( 0 ),
00138    fBinaryTreeB( 0 ),
00139    fCutMin     ( 0 ),
00140    fCutMax     ( 0 ),
00141    fTmpCutMin  ( 0 ),
00142    fTmpCutMax  ( 0 ),
00143    fAllVarsI   ( 0 ),
00144    fNpar       ( 0 ),
00145    fEffRef     ( 0 ),
00146    fRangeSign  ( 0 ),
00147    fRandom     ( 0 ),
00148    fMeanS      ( 0 ),
00149    fMeanB      ( 0 ),
00150    fRmsS       ( 0 ),
00151    fRmsB       ( 0 ),
00152    fEffBvsSLocal( 0 ),
00153    fVarHistS   ( 0 ),
00154    fVarHistB   ( 0 ),
00155    fVarHistS_smooth( 0 ),
00156    fVarHistB_smooth( 0 ),
00157    fVarPdfS    ( 0 ),
00158    fVarPdfB    ( 0 ),
00159    fNegEffWarning( kFALSE )
00160 { 
00161    // standard constructor
00162 }
00163 
00164 //_______________________________________________________________________
00165 TMVA::MethodCuts::MethodCuts( DataSetInfo& theData, 
00166                               const TString& theWeightFile,  
00167                               TDirectory* theTargetDir ) :
00168    MethodBase( Types::kCuts, theData, theWeightFile, theTargetDir ), 
00169    fFitMethod  ( kUseGeneticAlgorithm ),
00170    fEffMethod  ( kUseEventSelection ),
00171    fTestSignalEff(0.7),
00172    fEffSMin    ( 0 ),
00173    fEffSMax    ( 0 ),
00174    fCutRangeMin( 0 ),
00175    fCutRangeMax( 0 ),
00176    fBinaryTreeS( 0 ),
00177    fBinaryTreeB( 0 ),
00178    fCutMin     ( 0 ),
00179    fCutMax     ( 0 ),
00180    fTmpCutMin  ( 0 ),
00181    fTmpCutMax  ( 0 ),
00182    fAllVarsI   ( 0 ),
00183    fNpar       ( 0 ),
00184    fEffRef     ( 0 ),
00185    fRangeSign  ( 0 ),
00186    fRandom     ( 0 ),
00187    fMeanS      ( 0 ),
00188    fMeanB      ( 0 ),
00189    fRmsS       ( 0 ),
00190    fRmsB       ( 0 ),
00191    fEffBvsSLocal( 0 ),
00192    fVarHistS   ( 0 ),
00193    fVarHistB   ( 0 ),
00194    fVarHistS_smooth( 0 ),
00195    fVarHistB_smooth( 0 ),
00196    fVarPdfS    ( 0 ),
00197    fVarPdfB    ( 0 ),
00198    fNegEffWarning( kFALSE )
00199 {
00200    // construction from weight file
00201 }
00202 
00203 //_______________________________________________________________________
00204 Bool_t TMVA::MethodCuts::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, 
00205                                           UInt_t /*numberTargets*/ )
00206 {
00207    // Cuts can only handle classification with 2 classes
00208    return (type == Types::kClassification && numberClasses == 2);
00209 }
00210 
00211 //_______________________________________________________________________
00212 void TMVA::MethodCuts::Init( void ) 
00213 {
00214    // default initialisation called by all constructors
00215    fVarHistS          = fVarHistB = 0;                 
00216    fVarHistS_smooth   = fVarHistB_smooth = 0;
00217    fVarPdfS           = fVarPdfB = 0; 
00218    fFitParams         = 0;
00219    fBinaryTreeS       = fBinaryTreeB = 0;
00220    fEffSMin           = 0;
00221    fEffSMax           = 0; 
00222 
00223    // vector with fit results
00224    fNpar      = 2*GetNvar();
00225    fRangeSign = new vector<Int_t>   ( GetNvar() );
00226    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*fRangeSign)[ivar] = +1;
00227 
00228    fMeanS     = new vector<Double_t>( GetNvar() ); 
00229    fMeanB     = new vector<Double_t>( GetNvar() ); 
00230    fRmsS      = new vector<Double_t>( GetNvar() );  
00231    fRmsB      = new vector<Double_t>( GetNvar() );  
00232 
00233    // get the variable specific options, first initialize default
00234    fFitParams = new vector<EFitParameters>( GetNvar() );
00235    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*fFitParams)[ivar] = kNotEnforced;
00236 
00237    fFitMethod = kUseMonteCarlo;
00238    fTestSignalEff = -1;
00239 
00240    // create LUT for cuts
00241    fCutMin = new Double_t*[GetNvar()];
00242    fCutMax = new Double_t*[GetNvar()];
00243    for (UInt_t i=0; i<GetNvar(); i++) {
00244       fCutMin[i] = new Double_t[fNbins];
00245       fCutMax[i] = new Double_t[fNbins];
00246    }
00247   
00248    // init
00249    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00250       for (Int_t ibin=0; ibin<fNbins; ibin++) {
00251          fCutMin[ivar][ibin] = 0;
00252          fCutMax[ivar][ibin] = 0;
00253       }
00254    }
00255 
00256    fTmpCutMin = new Double_t[GetNvar()];
00257    fTmpCutMax = new Double_t[GetNvar()];
00258 }
00259 
00260 //_______________________________________________________________________
00261 TMVA::MethodCuts::~MethodCuts( void )
00262 {
00263    // destructor
00264    delete fRangeSign;
00265    delete fMeanS;
00266    delete fMeanB;
00267    delete fRmsS;
00268    delete fRmsB;
00269    delete fFitParams;
00270    delete fEffBvsSLocal;
00271 
00272    if (NULL != fCutRangeMin) delete [] fCutRangeMin;
00273    if (NULL != fCutRangeMax) delete [] fCutRangeMax;
00274    if (NULL != fAllVarsI)    delete [] fAllVarsI;
00275 
00276    for (UInt_t i=0;i<GetNvar();i++) {
00277       if (NULL != fCutMin[i]  ) delete [] fCutMin[i];
00278       if (NULL != fCutMax[i]  ) delete [] fCutMax[i];
00279       if (NULL != fCutRange[i]) delete fCutRange[i];
00280    }
00281 
00282    if (NULL != fCutMin) delete [] fCutMin;
00283    if (NULL != fCutMax) delete [] fCutMax;
00284 
00285    if (NULL != fTmpCutMin) delete [] fTmpCutMin;
00286    if (NULL != fTmpCutMax) delete [] fTmpCutMax;
00287 
00288    if (NULL != fBinaryTreeS) delete fBinaryTreeS;
00289    if (NULL != fBinaryTreeB) delete fBinaryTreeB;
00290 }
00291 
00292 //_______________________________________________________________________
00293 void TMVA::MethodCuts::DeclareOptions() 
00294 {
00295    // define the options (their key words) that can be set in the option string 
00296    // know options:
00297    // Method             <string> Minimisation method
00298    //    available values are:        MC Monte Carlo <default>
00299    //                                 GA Genetic Algorithm
00300    //                                 SA Simulated annealing
00301    //
00302    // EffMethod          <string> Efficiency selection method
00303    //    available values are:        EffSel <default>
00304    //                                 EffPDF
00305    //
00306    // VarProp            <string> Property of variable 1 for the MC method (taking precedence over the
00307    //    globale setting. The same values as for the global option are available. Variables 1..10 can be
00308    //    set this way
00309    //
00310    // CutRangeMin/Max    <float>  user-defined ranges in which cuts are varied
00311 
00312    DeclareOptionRef(fFitMethodS = "GA", "FitMethod", "Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
00313    AddPreDefVal(TString("GA"));
00314    AddPreDefVal(TString("SA"));
00315    AddPreDefVal(TString("MC"));
00316    AddPreDefVal(TString("MCEvents"));
00317    AddPreDefVal(TString("MINUIT"));
00318    AddPreDefVal(TString("EventScan"));
00319 
00320    // selection type
00321    DeclareOptionRef(fEffMethodS = "EffSel", "EffMethod", "Selection Method");
00322    AddPreDefVal(TString("EffSel"));
00323    AddPreDefVal(TString("EffPDF"));
00324 
00325    // cut ranges 
00326    fCutRange.resize(GetNvar());
00327    fCutRangeMin = new Double_t[GetNvar()];
00328    fCutRangeMax = new Double_t[GetNvar()];
00329    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00330       fCutRange[ivar] = 0;
00331       fCutRangeMin[ivar] = fCutRangeMax[ivar] = -1;
00332    }
00333 
00334    DeclareOptionRef( fCutRangeMin, GetNvar(), "CutRangeMin", "Minimum of allowed cut range (set per variable)" );
00335    DeclareOptionRef( fCutRangeMax, GetNvar(), "CutRangeMax", "Maximum of allowed cut range (set per variable)" );   
00336 
00337    fAllVarsI = new TString[GetNvar()];
00338 
00339    for (UInt_t i=0; i<GetNvar(); i++) fAllVarsI[i] = "NotEnforced";  
00340 
00341    DeclareOptionRef(fAllVarsI, GetNvar(), "VarProp", "Categorisation of cuts");  
00342    AddPreDefVal(TString("NotEnforced"));
00343    AddPreDefVal(TString("FMax"));
00344    AddPreDefVal(TString("FMin"));
00345    AddPreDefVal(TString("FSmart"));
00346 }
00347 
00348 //_______________________________________________________________________
00349 void TMVA::MethodCuts::ProcessOptions() 
00350 {
00351    // process user options
00352    // sanity check, do not allow the input variables to be normalised, because this 
00353    // only creates problems when interpreting the cuts
00354    if (IsNormalised()) {
00355       Log() << kWARNING << "Normalisation of the input variables for cut optimisation is not" << Endl;
00356       Log() << kWARNING << "supported because this provides intransparent cut values, and no" << Endl;
00357       Log() << kWARNING << "improvement in the performance of the algorithm." << Endl;
00358       Log() << kWARNING << "Please remove \"Normalise\" option from booking option string" << Endl;
00359       Log() << kWARNING << "==> Will reset normalisation flag to \"False\"" << Endl;
00360       SetNormalised( kFALSE );
00361    }
00362 
00363    if (IgnoreEventsWithNegWeightsInTraining()) {
00364       Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
00365             << GetMethodTypeName() 
00366             << " --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string."
00367             << Endl;
00368    }
00369 
00370    if      (fFitMethodS == "MC"      ) fFitMethod = kUseMonteCarlo;
00371    else if (fFitMethodS == "MCEvents") fFitMethod = kUseMonteCarloEvents;
00372    else if (fFitMethodS == "GA"      ) fFitMethod = kUseGeneticAlgorithm;
00373    else if (fFitMethodS == "SA"      ) fFitMethod = kUseSimulatedAnnealing;
00374    else if (fFitMethodS == "MINUIT"  ) {
00375       fFitMethod = kUseMinuit;
00376       Log() << kWARNING << "poor performance of MINUIT in MethodCuts; preferred fit method: GA" << Endl;
00377    }
00378    else if (fFitMethodS == "EventScan" ) fFitMethod = kUseEventScan;
00379    else Log() << kFATAL << "unknown minimisation method: " << fFitMethodS << Endl;
00380 
00381    if      (fEffMethodS == "EFFSEL" ) fEffMethod = kUseEventSelection; // highly recommended
00382    else if (fEffMethodS == "EFFPDF" ) fEffMethod = kUsePDFs;
00383    else                               fEffMethod = kUseEventSelection;
00384 
00385    // options output
00386    Log() << kINFO << Form("Use optimization method: \"%s\"", 
00387                             (fFitMethod == kUseMonteCarlo) ? "Monte Carlo" : 
00388                             (fFitMethod == kUseMonteCarlo) ? "Monte-Carlo-Event sampling" : 
00389                             (fFitMethod == kUseEventScan)  ? "Full Event Scan (slow)" :
00390                             (fFitMethod == kUseMinuit)     ? "MINUIT" : "Genetic Algorithm" ) << Endl;
00391    Log() << kINFO << Form("Use efficiency computation method: \"%s\"", 
00392                             (fEffMethod == kUseEventSelection) ? "Event Selection" : "PDF" ) << Endl;
00393 
00394    // cut ranges
00395    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00396       fCutRange[ivar] = new Interval( fCutRangeMin[ivar], fCutRangeMax[ivar] );
00397    }
00398 
00399    // individual options
00400    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00401       EFitParameters theFitP = kNotEnforced;      
00402       if (fAllVarsI[ivar] == "" || fAllVarsI[ivar] == "NotEnforced") theFitP = kNotEnforced;
00403       else if (fAllVarsI[ivar] == "FMax" )                           theFitP = kForceMax;
00404       else if (fAllVarsI[ivar] == "FMin" )                           theFitP = kForceMin;
00405       else if (fAllVarsI[ivar] == "FSmart" )                         theFitP = kForceSmart;
00406       else {
00407          Log() << kFATAL << "unknown value \'" << fAllVarsI[ivar]
00408                  << "\' for fit parameter option " << Form("VarProp[%i]",ivar) << Endl;
00409       }
00410       (*fFitParams)[ivar] = theFitP;
00411       
00412       if (theFitP != kNotEnforced) 
00413          Log() << kINFO << "Use \"" << fAllVarsI[ivar] 
00414                  << "\" cuts for variable: " << "'" << (*fInputVars)[ivar] << "'" << Endl;
00415    }
00416 }
00417 
00418 //_______________________________________________________________________
00419 Double_t TMVA::MethodCuts::GetMvaValue( Double_t* err, Double_t* errUpper )
00420 {
00421    // cut evaluation: returns 1.0 if event passed, 0.0 otherwise
00422 
00423    // cannot determine error
00424    NoErrorCalc(err, errUpper);
00425 
00426    // sanity check
00427    if (fCutMin == NULL || fCutMax == NULL || fNbins == 0) {
00428       Log() << kFATAL << "<Eval_Cuts> fCutMin/Max have zero pointer. "
00429               << "Did you book Cuts ?" << Endl;
00430    }
00431 
00432    const Event* ev = GetEvent();
00433 
00434    // sanity check
00435    if (fTestSignalEff > 0) {
00436       // get efficiency bin
00437       Int_t ibin = fEffBvsSLocal->FindBin( fTestSignalEff );
00438       if (ibin < 0      ) ibin = 0;
00439       else if (ibin >= fNbins) ibin = fNbins - 1;
00440 
00441       Bool_t passed = kTRUE;
00442       for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
00443          passed &= ( (ev->GetValue(ivar) >  fCutMin[ivar][ibin]) &&
00444                      (ev->GetValue(ivar) <= fCutMax[ivar][ibin]) );
00445 
00446       return passed ? 1. : 0. ;
00447    }
00448    else return 0;
00449 }
00450 
00451 //_______________________________________________________________________
00452 void TMVA::MethodCuts::PrintCuts( Double_t effS ) const
00453 {
00454    // print cuts
00455 
00456    std::vector<Double_t> cutsMin;
00457    std::vector<Double_t> cutsMax;
00458    Int_t ibin = fEffBvsSLocal->FindBin( effS );
00459 
00460    Double_t trueEffS = GetCuts( effS, cutsMin, cutsMax );
00461 
00462    // retrieve variable expressions (could be transformations)
00463    std::vector<TString>* varVec = 0;
00464    if (GetTransformationHandler().GetNumOfTransformations() == 0) {
00465       // no transformation applied, replace by current variables
00466       varVec = new std::vector<TString>;
00467       for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
00468          varVec->push_back( DataInfo().GetVariableInfo(ivar).GetLabel() );
00469       }
00470    }
00471    else if (GetTransformationHandler().GetNumOfTransformations() == 1) {
00472       // get transformation string
00473       varVec = GetTransformationHandler().GetTransformationStringsOfLastTransform();
00474    }
00475    else {
00476       // replace transformation print by current variables and indicated incompleteness
00477       varVec = new std::vector<TString>;
00478       for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
00479          varVec->push_back( DataInfo().GetVariableInfo(ivar).GetLabel() + " [transformed]" );
00480       }
00481    }
00482    
00483    UInt_t maxL = 0;
00484    for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
00485       if ((UInt_t)(*varVec)[ivar].Length() > maxL) maxL = (*varVec)[ivar].Length();
00486    }
00487    UInt_t maxLine = 20+maxL+16;
00488 
00489    for (UInt_t i=0; i<maxLine; i++) Log() << "-";
00490    Log() << Endl;
00491    Log() << kINFO << "Cut values for requested signal efficiency: " << trueEffS << Endl;
00492    Log() << kINFO << "Corresponding background efficiency       : " << fEffBvsSLocal->GetBinContent( ibin ) << Endl;
00493    if (GetTransformationHandler().GetNumOfTransformations() == 1) {
00494       Log() << kINFO << "Transformation applied to input variables : \"" 
00495               << GetTransformationHandler().GetNameOfLastTransform() << "\"" << Endl;
00496    }
00497    else if (GetTransformationHandler().GetNumOfTransformations() > 1) {
00498       Log() << kINFO << "[ More than one (=" << GetTransformationHandler().GetNumOfTransformations() << ") "
00499               << " transformations applied in transformation chain; cuts applied on transformed quantities ] " << Endl;
00500    }
00501    else {
00502       Log() << kINFO << "Transformation applied to input variables : None"  << Endl;
00503    }
00504    for (UInt_t i=0; i<maxLine; i++) Log() << "-";
00505    Log() << Endl;
00506    for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
00507       Log() << kINFO 
00508               << "Cut[" << setw(2) << ivar << "]: " 
00509               << setw(10) << cutsMin[ivar] 
00510               << " < " 
00511               << setw(maxL) << (*varVec)[ivar]
00512               << " <= " 
00513               << setw(10) << cutsMax[ivar] << Endl;
00514    }
00515    for (UInt_t i=0; i<maxLine; i++) Log() << "-";
00516    Log() << Endl;
00517 
00518    delete varVec; // yes, ownership has been given to us 
00519 }
00520 
00521 //_______________________________________________________________________
00522 Double_t TMVA::MethodCuts::GetCuts( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const
00523 {
00524    // retrieve cut values for given signal efficiency
00525    // assume vector of correct size !!
00526 
00527    std::vector<Double_t> cMin( GetNvar() );
00528    std::vector<Double_t> cMax( GetNvar() );
00529    Double_t trueEffS = GetCuts( effS, cMin, cMax );
00530    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00531       cutMin[ivar] = cMin[ivar];
00532       cutMax[ivar] = cMax[ivar];
00533    }   
00534    return trueEffS;
00535 }
00536 
00537 //_______________________________________________________________________
00538 Double_t TMVA::MethodCuts::GetCuts( Double_t effS, 
00539                                     std::vector<Double_t>& cutMin, 
00540                                     std::vector<Double_t>& cutMax ) const
00541 {
00542    // retrieve cut values for given signal efficiency
00543 
00544    // find corresponding bin
00545    Int_t ibin = fEffBvsSLocal->FindBin( effS );
00546 
00547    // get the true efficiency which is the one on the "left hand" side of the bin
00548    Double_t trueEffS = fEffBvsSLocal->GetBinLowEdge( ibin );
00549 
00550    ibin--; // the 'cut' vector has 0...fNbins indices
00551    if      (ibin < 0      ) ibin = 0;
00552    else if (ibin >= fNbins) ibin = fNbins - 1;
00553 
00554    cutMin.clear();
00555    cutMax.clear();
00556    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00557       cutMin.push_back( fCutMin[ivar][ibin]  );
00558       cutMax.push_back( fCutMax[ivar][ibin] );
00559    }   
00560 
00561    return trueEffS;
00562 }
00563 
00564 //_______________________________________________________________________
00565 void  TMVA::MethodCuts::Train( void )
00566 {
00567    // training method: here the cuts are optimised for the training sample
00568    
00569    if (fEffMethod == kUsePDFs) CreateVariablePDFs(); // create PDFs for variables
00570 
00571    // create binary trees (global member variables) for signal and background
00572    if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
00573    if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
00574 
00575    // the variables may be transformed by a transformation method: to coherently 
00576    // treat signal and background one must decide which transformation type shall 
00577    // be used: our default is signal-type
00578    
00579    fBinaryTreeS = new BinarySearchTree();
00580    fBinaryTreeS->Fill( GetEventCollection(Types::kTraining), fSignalClass );
00581    fBinaryTreeB = new BinarySearchTree();
00582    fBinaryTreeB->Fill( GetEventCollection(Types::kTraining), fBackgroundClass );
00583 
00584    for (UInt_t ivar =0; ivar < Data()->GetNVariables(); ivar++) {
00585       (*fMeanS)[ivar] = fBinaryTreeS->Mean(Types::kSignal, ivar);
00586       (*fRmsS)[ivar]  = fBinaryTreeS->RMS (Types::kSignal, ivar);
00587       (*fMeanB)[ivar] = fBinaryTreeB->Mean(Types::kBackground, ivar);
00588       (*fRmsB)[ivar]  = fBinaryTreeB->RMS (Types::kBackground, ivar);
00589 
00590       // update interval ?
00591       Double_t xmin = TMath::Min(fBinaryTreeS->Min(Types::kSignal,     ivar), 
00592                                  fBinaryTreeB->Min(Types::kBackground, ivar));
00593       Double_t xmax = TMath::Max(fBinaryTreeS->Max(Types::kSignal,     ivar), 
00594                                  fBinaryTreeB->Max(Types::kBackground, ivar));
00595 
00596       // redefine ranges to be slightly smaller and larger than xmin and xmax, respectively
00597       Double_t eps = 0.01*(xmax - xmin);
00598       xmin -= eps;
00599       xmax += eps;
00600 
00601       if (TMath::Abs(fCutRange[ivar]->GetMin() - fCutRange[ivar]->GetMax()) < 1.0e-300 ) {
00602          fCutRange[ivar]->SetMin( xmin );
00603          fCutRange[ivar]->SetMax( xmax );
00604       }         
00605       else if (xmin > fCutRange[ivar]->GetMin()) fCutRange[ivar]->SetMin( xmin );
00606       else if (xmax < fCutRange[ivar]->GetMax()) fCutRange[ivar]->SetMax( xmax );
00607    }   
00608 
00609    vector<TH1F*> signalDist, bkgDist;
00610 
00611    // this is important: reset the branch addresses of the training tree to the current event
00612    delete fEffBvsSLocal;
00613    fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal", 
00614                              TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
00615    fEffBvsSLocal->SetDirectory(0); // it's local
00616 
00617    // init
00618    for (Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 ); 
00619 
00620    // --------------------------------------------------------------------------
00621    if (fFitMethod == kUseGeneticAlgorithm || 
00622        fFitMethod == kUseMonteCarlo       || 
00623        fFitMethod == kUseMinuit           || 
00624        fFitMethod == kUseSimulatedAnnealing) {
00625 
00626       // ranges
00627       vector<Interval*> ranges;
00628 
00629       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00630 
00631          Int_t nbins = 0;
00632          if (DataInfo().GetVariableInfo(ivar).GetVarType() == 'I') {
00633             nbins = Int_t(fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin()) + 1;
00634          }
00635 
00636          if ((*fFitParams)[ivar] == kForceSmart) {
00637             if ((*fMeanS)[ivar] > (*fMeanB)[ivar]) (*fFitParams)[ivar] = kForceMax;
00638             else                                   (*fFitParams)[ivar] = kForceMin;          
00639          }         
00640 
00641          if ((*fFitParams)[ivar] == kForceMin) {
00642             ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMin(), nbins ) );
00643             ranges.push_back( new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
00644          }
00645          else if ((*fFitParams)[ivar] == kForceMax) {
00646             ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
00647             ranges.push_back( new Interval( fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), 
00648                                             fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
00649          }
00650          else {
00651             ranges.push_back( new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
00652             ranges.push_back( new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
00653          }
00654       }
00655 
00656       // create the fitter
00657       FitterBase* fitter = NULL;
00658       
00659       switch (fFitMethod) {
00660       case kUseGeneticAlgorithm:
00661          fitter = new GeneticFitter( *this, Form("%sFitter_GA",     GetName()), ranges, GetOptions() );
00662          break;
00663       case kUseMonteCarlo:
00664          fitter = new MCFitter     ( *this, Form("%sFitter_MC",     GetName()), ranges, GetOptions() );
00665          break;
00666       case kUseMinuit:
00667          fitter = new MinuitFitter ( *this, Form("%sFitter_MINUIT", GetName()), ranges, GetOptions() );
00668          break;
00669       case kUseSimulatedAnnealing:
00670          fitter = new SimulatedAnnealingFitter( *this, Form("%sFitter_SA", GetName()), ranges, GetOptions() );
00671          break;
00672       default:
00673          Log() << kFATAL << "Wrong fit method: " << fFitMethod << Endl;
00674       }
00675 
00676       fitter->CheckForUnusedOptions();
00677 
00678       // perform the fit
00679       fitter->Run();      
00680 
00681       // clean up
00682       for (UInt_t ivar=0; ivar<ranges.size(); ivar++) delete ranges[ivar];
00683       delete fitter;
00684       
00685    }
00686    // --------------------------------------------------------------------------
00687    else if (fFitMethod == kUseEventScan) {
00688 
00689       Int_t nevents = Data()->GetNEvents();
00690       Int_t ic = 0;
00691 
00692       // timing of MC
00693       Int_t nsamples = Int_t(0.5*nevents*(nevents - 1));
00694       Timer timer( nsamples, GetName() ); 
00695 
00696       Log() << kINFO << "Running full event scan: " << Endl;
00697       for (Int_t ievt1=0; ievt1<nevents; ievt1++) {
00698          for (Int_t ievt2=ievt1+1; ievt2<nevents; ievt2++) {
00699 
00700             EstimatorFunction( ievt1, ievt2 );
00701 
00702             // what's the time please?
00703             ic++;
00704             if ((nsamples<10000) || ic%10000 == 0) timer.DrawProgressBar( ic );
00705          }
00706       }
00707    }
00708    // --------------------------------------------------------------------------
00709    else if (fFitMethod == kUseMonteCarloEvents) {
00710 
00711       Int_t  nsamples = 200000;
00712       UInt_t seed     = 100;
00713       DeclareOptionRef( nsamples, "SampleSize", "Number of Monte-Carlo-Event samples" );  
00714       DeclareOptionRef( seed,     "Seed",       "Seed for the random generator (0 takes random seeds)" );  
00715       ParseOptions();
00716 
00717       Int_t nevents = Data()->GetNEvents();
00718       Int_t ic = 0;
00719 
00720       // timing of MC
00721       Timer timer( nsamples, GetName() ); 
00722 
00723       // random generator
00724       TRandom3*rnd = new TRandom3( seed );
00725 
00726       Log() << kINFO << "Running Monte-Carlo-Event sampling over " << nsamples << " events" << Endl;
00727       std::vector<Double_t> pars( 2*GetNvar() );
00728       
00729       for (Int_t itoy=0; itoy<nsamples; itoy++) {
00730 
00731          for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00732             
00733             // generate minimum and delta cuts for this variable
00734 
00735             // retrieve signal events
00736             Bool_t isSignal = kFALSE;
00737             Int_t    ievt1, ievt2;
00738             Double_t evt1, evt2;
00739             Int_t nbreak = 0;
00740             while (!isSignal) {
00741                ievt1 = Int_t(rnd->Uniform(0.,1.)*nevents);
00742                ievt2 = Int_t(rnd->Uniform(0.,1.)*nevents);
00743 
00744                const Event *ev1 = GetEvent(ievt1);
00745                isSignal = DataInfo().IsSignal(ev1);
00746                evt1 = ev1->GetValue( ivar );
00747 
00748                const Event *ev2 = GetEvent(ievt2);
00749                isSignal &= DataInfo().IsSignal(ev2);
00750                evt2 = ev2->GetValue( ivar );
00751                
00752                if (nbreak++ > 10000) Log() << kFATAL << "<MCEvents>: could not find signal events" 
00753                                              << " after 10000 trials - do you have signal events in your sample ?" 
00754                                              << Endl;
00755                isSignal = 1;
00756             }
00757 
00758             // sort
00759             if (evt1 > evt2) { Double_t z = evt1; evt1 = evt2; evt2 = z; }
00760             pars[2*ivar]   = evt1;
00761             pars[2*ivar+1] = evt2 - evt1;
00762          }
00763 
00764          // compute estimator
00765          EstimatorFunction( pars );
00766          
00767          // what's the time please?
00768          ic++;
00769          if ((nsamples<1000) || ic%1000 == 0) timer.DrawProgressBar( ic );
00770       }
00771 
00772       delete rnd;
00773    }
00774    // --------------------------------------------------------------------------
00775    else Log() << kFATAL << "Unknown minimisation method: " << fFitMethod << Endl;
00776 
00777    if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
00778    if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
00779 
00780    // force cut ranges within limits
00781    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00782       for (Int_t ibin=0; ibin<fNbins; ibin++) {
00783 
00784          if ((*fFitParams)[ivar] == kForceMin && fCutMin[ivar][ibin] > -fgMaxAbsCutVal) {
00785             fCutMin[ivar][ibin] = -fgMaxAbsCutVal;
00786          }
00787          if ((*fFitParams)[ivar] == kForceMax && fCutMax[ivar][ibin] < fgMaxAbsCutVal) {
00788             fCutMax[ivar][ibin] = fgMaxAbsCutVal;
00789          }
00790       }
00791    }
00792 
00793    // some output
00794    // the efficiency which is asked for has to be slightly higher than the bin-borders.
00795    // if not, then the wrong bin is taken in some cases.
00796    Double_t epsilon = 0.0001;
00797    for (Double_t eff=0.1; eff<0.95; eff += 0.1) PrintCuts( eff+epsilon );
00798 }
00799 
00800 //_______________________________________________________________________
00801 void TMVA::MethodCuts::TestClassification()
00802 {
00803    // nothing to test
00804 }
00805 
00806 //_______________________________________________________________________
00807 Double_t TMVA::MethodCuts::EstimatorFunction( Int_t ievt1, Int_t ievt2 )
00808 {
00809    // for full event scan
00810    const Event *ev1 = GetEvent(ievt1);
00811    if (!DataInfo().IsSignal(ev1)) return -1;
00812 
00813    const Event *ev2 = GetEvent(ievt2);
00814    if (!DataInfo().IsSignal(ev2)) return -1;
00815 
00816    const Int_t nvar = GetNvar();
00817    Double_t* evt1 = new Double_t[nvar];
00818    Double_t* evt2 = new Double_t[nvar];
00819 
00820    for (Int_t ivar=0; ivar<nvar; ivar++) {
00821       evt1[ivar] = ev1->GetValue( ivar );
00822       evt2[ivar] = ev2->GetValue( ivar );
00823    }
00824 
00825    // determine cuts
00826    std::vector<Double_t> pars;
00827    for (Int_t ivar=0; ivar<nvar; ivar++) {
00828       Double_t cutMin;
00829       Double_t cutMax;
00830       if (evt1[ivar] < evt2[ivar]) {
00831          cutMin = evt1[ivar];
00832          cutMax = evt2[ivar];
00833       }
00834       else {
00835          cutMin = evt2[ivar];
00836          cutMax = evt1[ivar];
00837       }
00838 
00839       pars.push_back( cutMin );
00840       pars.push_back( cutMax - cutMin );
00841    }
00842 
00843    delete [] evt1;
00844    delete [] evt2;
00845 
00846    return ComputeEstimator( pars );
00847 }
00848 
00849 //_______________________________________________________________________
00850 Double_t TMVA::MethodCuts::EstimatorFunction( std::vector<Double_t>& pars )
00851 {
00852    // returns estimator for "cut fitness" used by GA
00853    return ComputeEstimator( pars );
00854 }
00855 
00856 //_______________________________________________________________________
00857 Double_t TMVA::MethodCuts::ComputeEstimator( std::vector<Double_t>& pars )
00858 {
00859    // returns estimator for "cut fitness" used by GA
00860    // there are two requirements:
00861    // 1) the signal efficiency must be equal to the required one in the 
00862    //    efficiency scan
00863    // 2) the background efficiency must be as small as possible
00864    // the requirement 1) has priority over 2)
00865 
00866    // caution: the npar gives the _free_ parameters
00867    // however: the "pars" array contains all parameters
00868 
00869    // determine cuts
00870    Double_t effS = 0, effB = 0;
00871    this->MatchParsToCuts( pars, &fTmpCutMin[0], &fTmpCutMax[0] );
00872 
00873    // retrieve signal and background efficiencies for given cut
00874    switch (fEffMethod) {
00875    case kUsePDFs:
00876       this->GetEffsfromPDFs( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB );
00877       break;
00878    case kUseEventSelection:
00879       this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
00880       break;
00881    default:
00882       this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
00883    }
00884 
00885    Double_t eta = 0;      
00886    
00887    // test for a estimator function which optimizes on the whole background-rejection signal-efficiency plot
00888    
00889    // get the backg-reject. and sig-eff for the parameters given to this function
00890    // effS, effB
00891       
00892    // get best background rejection for given signal efficiency
00893    Int_t ibinS = fEffBvsSLocal->FindBin( effS );      
00894       
00895    Double_t effBH       = fEffBvsSLocal->GetBinContent( ibinS );
00896    Double_t effBH_left  = (ibinS > 1     ) ? fEffBvsSLocal->GetBinContent( ibinS-1 ) : effBH;
00897    Double_t effBH_right = (ibinS < fNbins) ? fEffBvsSLocal->GetBinContent( ibinS+1 ) : effBH;
00898 
00899    Double_t average = 0.5*(effBH_left + effBH_right);
00900    if (effBH < effB) average = effBH;
00901 
00902    // if the average of the bin right and left is larger than this one, add the difference to 
00903    // the current value of the estimator (because you can do at least so much better)
00904    eta = ( -TMath::Abs(effBH-average) + (1.0 - (effBH - effB))) / (1.0 + effS); 
00905    // alternative idea
00906    //if (effBH<0) eta = (1.e-6+effB)/(1.0 + effS);
00907    //else eta =  (effB - effBH) * (1.0 + 10.* effS);
00908 
00909    // if a point is found which is better than an existing one, ... replace it. 
00910    // preliminary best event -> backup
00911    if (effBH < 0 || effBH > effB) {
00912       fEffBvsSLocal->SetBinContent( ibinS, effB );
00913       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00914          fCutMin[ivar][ibinS-1] = fTmpCutMin[ivar]; // bin 1 stored in index 0
00915          fCutMax[ivar][ibinS-1] = fTmpCutMax[ivar];
00916       }
00917    }
00918    
00919    // caution (!) this value is not good for a decision for MC, .. it is designed for GA
00920    // but .. it doesn't matter, as MC samplings are independent from the former ones
00921    // and the replacement of the best variables by better ones is done about 10 lines above. 
00922    // ( if (effBH < 0 || effBH > effB) { .... )
00923 
00924    if (ibinS<=1) {
00925       // add penalty for effS=0 bin 
00926       // to avoid that the minimizer gets stuck in the zero-bin
00927       // force it towards higher efficiency
00928       Double_t penalty=0.,diff=0.;
00929       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00930          diff=(fCutRange[ivar]->GetMax()-fTmpCutMax[ivar])/(fCutRange[ivar]->GetMax()-fCutRange[ivar]->GetMin());
00931          penalty+=diff*diff;
00932          diff=(fCutRange[ivar]->GetMin()-fTmpCutMin[ivar])/(fCutRange[ivar]->GetMax()-fCutRange[ivar]->GetMin());
00933          penalty+=4.*diff*diff;
00934       }
00935       //Log() << kINFO<<"special treatment of "<<ibinS<<" bin penalty="<< penalty<<" effS="<<effS<<Endl;
00936       if (effS<1.e-4) return 10.0+penalty;
00937       else return 10.*(1.-10.*effS);
00938    }
00939    return eta;
00940 }
00941 
00942 //_______________________________________________________________________
00943 void TMVA::MethodCuts::MatchParsToCuts( const std::vector<Double_t> & pars, 
00944                                         Double_t* cutMin, Double_t* cutMax )
00945 {
00946    // translates parameters into cuts
00947    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00948       Int_t ipar = 2*ivar;
00949       cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
00950       cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar]; 
00951    }
00952 }
00953 
00954 //_______________________________________________________________________
00955 void TMVA::MethodCuts::MatchCutsToPars( std::vector<Double_t>& pars, 
00956                                         Double_t** cutMinAll, Double_t** cutMaxAll, Int_t ibin )
00957 {
00958    // translate the cuts into parameters (obsolete function)
00959    if (ibin < 1 || ibin > fNbins) Log() << kFATAL << "::MatchCutsToPars: bin error: "
00960                                           << ibin << Endl;
00961    
00962    const UInt_t nvar = GetNvar();
00963    Double_t *cutMin = new Double_t[nvar];
00964    Double_t *cutMax = new Double_t[nvar];
00965    for (UInt_t ivar=0; ivar<nvar; ivar++) {
00966       cutMin[ivar] = cutMinAll[ivar][ibin-1];
00967       cutMax[ivar] = cutMaxAll[ivar][ibin-1];
00968    }
00969    
00970    MatchCutsToPars( pars, cutMin, cutMax );
00971    delete [] cutMin;
00972    delete [] cutMax;
00973 }
00974 
00975 //_______________________________________________________________________
00976 void TMVA::MethodCuts::MatchCutsToPars( std::vector<Double_t>& pars, 
00977                                         Double_t* cutMin, Double_t* cutMax )
00978 {
00979    // translates cuts into parameters
00980    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00981       Int_t ipar = 2*ivar;
00982       pars[ipar]   = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
00983       pars[ipar+1] = cutMax[ivar] - cutMin[ivar];
00984    }
00985 }
00986 
00987 //_______________________________________________________________________
00988 void TMVA::MethodCuts::GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
00989                                         Double_t& effS, Double_t& effB )
00990 {
00991    // compute signal and background efficiencies from PDFs 
00992    // for given cut sample
00993    effS = 1.0;
00994    effB = 1.0;
00995    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00996       effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
00997       effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
00998    }
00999 
01000    // quick fix to prevent from efficiencies < 0
01001    if( effS < 0.0 ) {
01002       effS = 0.0;
01003       if( !fNegEffWarning ) Log() << kWARNING << "Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
01004       fNegEffWarning = kTRUE;
01005    }
01006    if( effB < 0.0 ) {
01007       effB = 0.0;
01008       if( !fNegEffWarning ) Log() << kWARNING << "Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
01009       fNegEffWarning = kTRUE;
01010    }
01011 }
01012 
01013 //_______________________________________________________________________
01014 void TMVA::MethodCuts::GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
01015                                              Double_t& effS, Double_t& effB)
01016 {
01017    // compute signal and background efficiencies from event counting 
01018    // for given cut sample
01019    Float_t nTotS = 0, nTotB = 0;
01020    Float_t nSelS = 0, nSelB = 0;  
01021    
01022    Volume* volume = new Volume( cutMin, cutMax, GetNvar() );
01023   
01024    // search for all events lying in the volume, and add up their weights
01025    nSelS = fBinaryTreeS->SearchVolume( volume );
01026    nSelB = fBinaryTreeB->SearchVolume( volume );
01027 
01028    delete volume;
01029 
01030    // total number of "events" (sum of weights) as reference to compute efficiency
01031    nTotS = fBinaryTreeS->GetSumOfWeights();
01032    nTotB = fBinaryTreeB->GetSumOfWeights();
01033    
01034    // sanity check
01035    if (nTotS == 0 && nTotB == 0) {
01036       Log() << kFATAL << "<GetEffsfromSelection> fatal error in zero total number of events:"
01037               << " nTotS, nTotB: " << nTotS << " " << nTotB << " ***" << Endl;
01038    }
01039 
01040    // efficiencies
01041    if (nTotS == 0 ) {
01042       effS = 0;
01043       effB = nSelB/nTotB;
01044       Log() << kWARNING << "<ComputeEstimator> zero number of signal events" << Endl;
01045    }
01046    else if (nTotB == 0) {
01047       effB = 0;
01048       effS = nSelS/nTotS;
01049       Log() << kWARNING << "<ComputeEstimator> zero number of background events" << Endl;
01050    }
01051    else {
01052       effS = nSelS/nTotS;
01053       effB = nSelB/nTotB;
01054    }  
01055 
01056    // quick fix to prevent from efficiencies < 0
01057    if( effS < 0.0 ) {
01058       effS = 0.0;
01059       if( !fNegEffWarning ) Log() << kWARNING << "Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
01060       fNegEffWarning = kTRUE;
01061    }
01062    if( effB < 0.0 ) {
01063       effB = 0.0;
01064       if( !fNegEffWarning ) Log() << kWARNING << "Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
01065       fNegEffWarning = kTRUE;
01066    }
01067 }
01068 
01069 //_______________________________________________________________________
01070 void TMVA::MethodCuts::CreateVariablePDFs( void )
01071 {
01072    // for PDF method: create efficiency reference histograms and PDFs
01073 
01074    // create list of histograms and PDFs
01075    fVarHistS        = new vector<TH1*>( GetNvar() );
01076    fVarHistB        = new vector<TH1*>( GetNvar() );
01077    fVarHistS_smooth = new vector<TH1*>( GetNvar() );
01078    fVarHistB_smooth = new vector<TH1*>( GetNvar() );
01079    fVarPdfS         = new vector<PDF*>( GetNvar() );
01080    fVarPdfB         = new vector<PDF*>( GetNvar() );
01081 
01082    Int_t nsmooth = 0;
01083 
01084    // get min and max values of all events
01085    Double_t minVal = DBL_MAX;
01086    Double_t maxVal = -DBL_MAX;
01087    for( UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++ ){
01088       const Event *ev = GetEvent(ievt);
01089       Float_t val = ev->GetValue(ievt);
01090       if( val > minVal ) minVal = val;
01091       if( val < maxVal ) maxVal = val;
01092    }
01093 
01094    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) { 
01095 
01096       // ---- signal
01097       TString histTitle = (*fInputVars)[ivar] + " signal training";
01098       TString histName  = (*fInputVars)[ivar] + "_sig";
01099       //      TString drawOpt   = (*fInputVars)[ivar] + ">>h(";
01100       //      drawOpt += fNbins;
01101       //      drawOpt += ")";
01102 
01103       // selection
01104       //      Data().GetTrainingTree()->Draw( drawOpt, "type==1", "goff" );
01105       //      (*fVarHistS)[ivar] = (TH1F*)gDirectory->Get("h");
01106       //      (*fVarHistS)[ivar]->SetName(histName);
01107       //      (*fVarHistS)[ivar]->SetTitle(histTitle);
01108 
01109       (*fVarHistS)[ivar] = new TH1F(histName.Data(), histTitle.Data(), fNbins, minVal, maxVal );
01110 
01111       // ---- background
01112       histTitle = (*fInputVars)[ivar] + " background training";
01113       histName  = (*fInputVars)[ivar] + "_bgd";
01114       //      drawOpt   = (*fInputVars)[ivar] + ">>h(";
01115       //      drawOpt += fNbins;
01116       //      drawOpt += ")";
01117 
01118       //      Data().GetTrainingTree()->Draw( drawOpt, "type==0", "goff" );
01119       //      (*fVarHistB)[ivar] = (TH1F*)gDirectory->Get("h");
01120       //      (*fVarHistB)[ivar]->SetName(histName);
01121       //      (*fVarHistB)[ivar]->SetTitle(histTitle);
01122 
01123 
01124       (*fVarHistB)[ivar] = new TH1F(histName.Data(), histTitle.Data(), fNbins, minVal, maxVal );
01125       
01126       for( UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++ ){
01127          const Event *ev = GetEvent(ievt);
01128          Float_t val = ev->GetValue(ievt);
01129          if( DataInfo().IsSignal(ev) ){
01130             (*fVarHistS)[ivar]->Fill( val );
01131          }else{
01132             (*fVarHistB)[ivar]->Fill( val );
01133          }
01134       }
01135 
01136 
01137 
01138       // make copy for smoothed histos
01139       (*fVarHistS_smooth)[ivar] = (TH1F*)(*fVarHistS)[ivar]->Clone();
01140       histTitle =  (*fInputVars)[ivar] + " signal training  smoothed ";
01141       histTitle += nsmooth;
01142       histTitle +=" times";
01143       histName =  (*fInputVars)[ivar] + "_sig_smooth";
01144       (*fVarHistS_smooth)[ivar]->SetName(histName);
01145       (*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
01146 
01147       // smooth
01148       (*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
01149 
01150       // ---- background
01151       //      histTitle = (*fInputVars)[ivar] + " background training";
01152       //      histName  = (*fInputVars)[ivar] + "_bgd";
01153       //      drawOpt   = (*fInputVars)[ivar] + ">>h(";
01154       //      drawOpt += fNbins;
01155       //      drawOpt += ")";
01156 
01157       //      Data().GetTrainingTree()->Draw( drawOpt, "type==0", "goff" );
01158       //      (*fVarHistB)[ivar] = (TH1F*)gDirectory->Get("h");
01159       //      (*fVarHistB)[ivar]->SetName(histName);
01160       //      (*fVarHistB)[ivar]->SetTitle(histTitle);
01161 
01162       // make copy for smoothed histos
01163       (*fVarHistB_smooth)[ivar] = (TH1F*)(*fVarHistB)[ivar]->Clone();
01164       histTitle  = (*fInputVars)[ivar]+" background training  smoothed ";
01165       histTitle += nsmooth;
01166       histTitle +=" times";
01167       histName   = (*fInputVars)[ivar]+"_bgd_smooth";
01168       (*fVarHistB_smooth)[ivar]->SetName(histName);
01169       (*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
01170 
01171       // smooth
01172       (*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
01173 
01174       // create PDFs
01175       (*fVarPdfS)[ivar] = new PDF( TString(GetName()) + " PDF Var Sig " + GetInputVar( ivar ), (*fVarHistS_smooth)[ivar], PDF::kSpline2 );
01176       (*fVarPdfB)[ivar] = new PDF( TString(GetName()) + " PDF Var Bkg " + GetInputVar( ivar ), (*fVarHistB_smooth)[ivar], PDF::kSpline2 );
01177    }
01178 }
01179 
01180 //_______________________________________________________________________
01181 void  TMVA::MethodCuts::ReadWeightsFromStream( istream& istr )
01182 {
01183    // read the cuts from stream
01184    TString dummy;
01185    UInt_t  dummyInt;
01186 
01187    // first the dimensions
01188    istr >> dummy >> dummy;
01189    // coverity[tainted_data_argument]
01190    istr >> dummy >> fNbins;
01191 
01192    // get rid of one read-in here because we read in once all ready to check for decorrelation
01193    istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> dummyInt >> dummy ;
01194 
01195    // sanity check
01196    if (dummyInt != Data()->GetNVariables()) {
01197       Log() << kFATAL << "<ReadWeightsFromStream> fatal error: mismatch "
01198               << "in number of variables: " << dummyInt << " != " << Data()->GetNVariables() << Endl;
01199    }
01200    //SetNvar(dummyInt);
01201 
01202    // print some information
01203    if (fFitMethod == kUseMonteCarlo) {
01204       Log() << kINFO << "Read cuts optimised using sample of MC events" << Endl;
01205    }
01206    else if (fFitMethod == kUseMonteCarloEvents) {
01207       Log() << kINFO << "Read cuts optimised using sample of MC events" << Endl;
01208    }
01209    else if (fFitMethod == kUseGeneticAlgorithm) {
01210       Log() << kINFO << "Read cuts optimised using Genetic Algorithm" << Endl;
01211    }
01212    else if (fFitMethod == kUseSimulatedAnnealing) {
01213       Log() << kINFO << "Read cuts optimised using Simulated Annealing algorithm" << Endl;
01214    }
01215    else if (fFitMethod == kUseEventScan) {
01216       Log() << kINFO << "Read cuts optimised using Full Event Scan" << Endl;
01217    }
01218    else {
01219       Log() << kWARNING << "unknown method: " << fFitMethod << Endl;
01220    }
01221    Log() << kINFO << "in " << fNbins << " signal efficiency bins and for " << GetNvar() << " variables" << Endl;
01222    
01223    // now read the cuts
01224    char buffer[200];
01225    istr.getline(buffer,200);
01226    istr.getline(buffer,200);
01227 
01228    Int_t   tmpbin;
01229    Float_t tmpeffS, tmpeffB;
01230    if (fEffBvsSLocal != 0) delete fEffBvsSLocal;
01231    fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal", 
01232                              TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
01233    fEffBvsSLocal->SetDirectory(0); // it's local
01234 
01235    for (Int_t ibin=0; ibin<fNbins; ibin++) {
01236       istr >> tmpbin >> tmpeffS >> tmpeffB;
01237       fEffBvsSLocal->SetBinContent( ibin+1, tmpeffB );
01238 
01239       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01240          istr >> fCutMin[ivar][ibin] >> fCutMax[ivar][ibin];
01241       }
01242    }
01243 
01244    fEffSMin = fEffBvsSLocal->GetBinCenter(1);
01245    fEffSMax = fEffBvsSLocal->GetBinCenter(fNbins);
01246 }
01247 
01248 //_______________________________________________________________________
01249 void TMVA::MethodCuts::AddWeightsXMLTo( void* parent ) const 
01250 {
01251    // create XML description for LD classification and regression 
01252    // (for arbitrary number of output classes/targets)
01253 
01254    // write all necessary information to the stream
01255    std::vector<Double_t> cutsMin;
01256    std::vector<Double_t> cutsMax;
01257 
01258    void* wght = gTools().AddChild(parent, "Weights");
01259    gTools().AddAttr( wght, "OptimisationMethod", (Int_t)fEffMethod);
01260    gTools().AddAttr( wght, "FitMethod",          (Int_t)fFitMethod );
01261    gTools().AddAttr( wght, "nbins",              fNbins );
01262    gTools().AddComment( wght, Form( "Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]", GetNvar() ) );
01263 
01264    // NOTE: The signal efficiency written out into 
01265    //       the weight file does not correspond to the center of the bin within which the 
01266    //       background rejection is maximised (as before) but to the lower left edge of it. 
01267    //       This is because the cut optimisation algorithm determines the best background 
01268    //       rejection for all signal efficiencies belonging into a bin. Since the best background 
01269    //       rejection is in general obtained for the lowest possible signal efficiency, the 
01270    //       reference signal efficeincy is the lowest value in the bin.
01271 
01272    for (Int_t ibin=0; ibin<fNbins; ibin++) {
01273       Double_t effS     = fEffBvsSLocal->GetBinCenter ( ibin + 1 );
01274       Double_t trueEffS = GetCuts( effS, cutsMin, cutsMax );
01275       if (TMath::Abs(trueEffS) < 1e-10) trueEffS = 0;
01276       
01277       void* binxml = gTools().AddChild( wght, "Bin" );
01278       gTools().AddAttr( binxml, "ibin", ibin+1   );
01279       gTools().AddAttr( binxml, "effS", trueEffS );
01280       gTools().AddAttr( binxml, "effB", fEffBvsSLocal->GetBinContent( ibin + 1 ) );
01281       void* cutsxml = gTools().AddChild( binxml, "Cuts" );
01282       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01283          gTools().AddAttr( cutsxml, Form( "cutMin_%i", ivar ), cutsMin[ivar] );
01284          gTools().AddAttr( cutsxml, Form( "cutMax_%i", ivar ), cutsMax[ivar] );
01285       }      
01286    }
01287 }
01288 
01289 //_______________________________________________________________________
01290 void TMVA::MethodCuts::ReadWeightsFromXML( void* wghtnode ) 
01291 {
01292    // read coefficients from xml weight file
01293 
01294    // delete old min and max
01295    for (UInt_t i=0; i<GetNvar(); i++) {
01296       if (fCutMin[i] != 0) delete [] fCutMin[i];
01297       if (fCutMax[i] != 0) delete [] fCutMax[i];
01298    }
01299    if (fCutMin != 0) delete [] fCutMin;
01300    if (fCutMax != 0) delete [] fCutMax;
01301 
01302    Int_t tmpEffMethod, tmpFitMethod;
01303    gTools().ReadAttr( wghtnode, "OptimisationMethod", tmpEffMethod );
01304    gTools().ReadAttr( wghtnode, "FitMethod",          tmpFitMethod );
01305    gTools().ReadAttr( wghtnode, "nbins",              fNbins       );
01306    
01307    fEffMethod = (EEffMethod)tmpEffMethod;
01308    fFitMethod = (EFitMethodType)tmpFitMethod;
01309 
01310    // print some information
01311    if (fFitMethod == kUseMonteCarlo) {
01312       Log() << kINFO << "Read cuts optimised using sample of MC events" << Endl;
01313    }
01314    else if (fFitMethod == kUseMonteCarloEvents) {
01315       Log() << kINFO << "Read cuts optimised using sample of MC-Event events" << Endl;
01316    }
01317    else if (fFitMethod == kUseGeneticAlgorithm) {
01318       Log() << kINFO << "Read cuts optimised using Genetic Algorithm" << Endl;
01319    }
01320    else if (fFitMethod == kUseSimulatedAnnealing) {
01321       Log() << kINFO << "Read cuts optimised using Simulated Annealing algorithm" << Endl;
01322    }
01323    else if (fFitMethod == kUseEventScan) {
01324       Log() << kINFO << "Read cuts optimised using Full Event Scan" << Endl;
01325    }
01326    else {
01327       Log() << kWARNING << "unknown method: " << fFitMethod << Endl;
01328    }
01329    Log() << kINFO << "Reading " << fNbins << " signal efficiency bins for " << GetNvar() << " variables" << Endl;
01330 
01331    delete fEffBvsSLocal;
01332    fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal", 
01333                              TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
01334    fEffBvsSLocal->SetDirectory(0); // it's local
01335    for (Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 ); // Init
01336 
01337    fCutMin = new Double_t*[GetNvar()];
01338    fCutMax = new Double_t*[GetNvar()];
01339    for (UInt_t i=0;i<GetNvar();i++) {
01340       fCutMin[i] = new Double_t[fNbins];
01341       fCutMax[i] = new Double_t[fNbins];
01342    }
01343 
01344    // read efficeincies and cuts
01345    Int_t   tmpbin;
01346    Float_t tmpeffS, tmpeffB;
01347    void* ch = gTools().GetChild(wghtnode,"Bin");
01348    while (ch) {
01349 //       if (strcmp(gTools().GetName(ch),"Bin") !=0) {
01350 //          ch = gTools().GetNextChild(ch);
01351 //          continue;
01352 //       }
01353 
01354       gTools().ReadAttr( ch, "ibin", tmpbin  );
01355       gTools().ReadAttr( ch, "effS", tmpeffS );
01356       gTools().ReadAttr( ch, "effB", tmpeffB );
01357 
01358       // sanity check
01359       if (tmpbin-1 >= fNbins || tmpbin-1 < 0) {
01360          Log() << kFATAL << "Mismatch in bins: " << tmpbin-1 << " >= " << fNbins << Endl;
01361       }
01362 
01363       fEffBvsSLocal->SetBinContent( tmpbin, tmpeffB );
01364       void* ct = gTools().GetChild(ch);
01365       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01366          gTools().ReadAttr( ct, Form( "cutMin_%i", ivar ), fCutMin[ivar][tmpbin-1] );
01367          gTools().ReadAttr( ct, Form( "cutMax_%i", ivar ), fCutMax[ivar][tmpbin-1] );
01368       }
01369       ch = gTools().GetNextChild(ch, "Bin");
01370    }
01371 }
01372 
01373 //_______________________________________________________________________
01374 void TMVA::MethodCuts::WriteMonitoringHistosToFile( void ) const
01375 {
01376    // write histograms and PDFs to file for monitoring purposes
01377 
01378    Log() << kINFO << "Write monitoring histograms to file: " << BaseDir()->GetPath() << Endl;
01379   
01380    fEffBvsSLocal->Write();
01381 
01382    // save reference histograms to file
01383    if (fEffMethod == kUsePDFs) {
01384       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) { 
01385          (*fVarHistS)[ivar]->Write();    
01386          (*fVarHistB)[ivar]->Write();
01387          (*fVarHistS_smooth)[ivar]->Write();    
01388          (*fVarHistB_smooth)[ivar]->Write();
01389          (*fVarPdfS)[ivar]->GetPDFHist()->Write();
01390          (*fVarPdfB)[ivar]->GetPDFHist()->Write();
01391       }
01392    }  
01393 }
01394 
01395 //_______________________________________________________________________
01396 Double_t TMVA::MethodCuts::GetTrainingEfficiency(const TString& theString)
01397 {
01398    // - overloaded function to create background efficiency (rejection) versus
01399    //   signal efficiency plot (first call of this function)
01400    // - the function returns the signal efficiency at background efficiency
01401    //   indicated in theString
01402    //
01403    // "theString" must have two entries:
01404    // [0]: "Efficiency"
01405    // [1]: the value of background efficiency at which the signal efficiency 
01406    //      is to be returned
01407 
01408    // parse input string for required background efficiency
01409    TList* list  = gTools().ParseFormatLine( theString );
01410    // sanity check
01411    if (list->GetSize() != 2) {
01412       Log() << kFATAL << "<GetTrainingEfficiency> wrong number of arguments"
01413               << " in string: " << theString
01414               << " | required format, e.g., Efficiency:0.05" << Endl;
01415       return -1;
01416    }
01417 
01418    Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, GetAnalysisType());
01419    
01420    // that will be the value of the efficiency retured (does not affect
01421    // the efficiency-vs-bkg plot which is done anyway.
01422    Float_t effBref  = atof( ((TObjString*)list->At(1))->GetString() );
01423 
01424    delete list;
01425 
01426    // first round ? --> create histograms
01427    if (results->GetHist("EFF_BVSS_TR")==0) {
01428 
01429       if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
01430       if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
01431 
01432       fBinaryTreeS = new BinarySearchTree();
01433       fBinaryTreeS->Fill( GetEventCollection(Types::kTraining), fSignalClass );
01434       fBinaryTreeB = new BinarySearchTree();
01435       fBinaryTreeB->Fill( GetEventCollection(Types::kTraining), fBackgroundClass );
01436       // there is no really good equivalent to the fEffS; fEffB (efficiency vs cutvalue)
01437       // for the "Cuts" method (unless we had only one cut). Maybe later I might add here
01438       // histograms for each of the cuts...but this would require also a change in the 
01439       // base class, and it is not really necessary, as we get exactly THIS info from the
01440       // "evaluateAllVariables" anyway.
01441 
01442       // now create efficiency curve: background versus signal
01443       TH1* eff_bvss_tr = new TH1F( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
01444       for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_bvss_tr->SetBinContent( ibin, -0.1 ); // Init
01445       TH1* rej_bvss_tr = new TH1F( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
01446       for (Int_t ibin=1; ibin<=fNbins; ibin++) rej_bvss_tr->SetBinContent( ibin, 0. ); // Init
01447       results->Store(eff_bvss_tr, "EFF_BVSS_TR");
01448       results->Store(rej_bvss_tr, "REJ_BVSS_TR");
01449 
01450       // use root finder
01451 
01452       // make the background-vs-signal efficiency plot
01453       Double_t* tmpCutMin = new Double_t[GetNvar()];
01454       Double_t* tmpCutMax = new Double_t[GetNvar()];
01455       Int_t nFailedBins=0;
01456       for (Int_t bini=1; bini<=fNbins; bini++) {
01457          for (UInt_t ivar=0; ivar <GetNvar(); ivar++){
01458             tmpCutMin[ivar] = fCutMin[ivar][bini-1];
01459             tmpCutMax[ivar] = fCutMax[ivar][bini-1];
01460          }
01461          // find cut value corresponding to a given signal efficiency
01462          Double_t effS, effB;
01463          this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);    
01464          // check that effS matches bini
01465          Int_t effBin = eff_bvss_tr->GetXaxis()->FindBin(effS);
01466          if (effBin != bini){
01467             Log()<< kVERBOSE << "unable to fill efficiency bin " << bini<< " " << effBin <<Endl; 
01468             nFailedBins++;
01469          }
01470          else{
01471             // and fill histograms
01472             eff_bvss_tr->SetBinContent( bini, effB     );    
01473             rej_bvss_tr->SetBinContent( bini, 1.0-effB ); 
01474          }
01475       }
01476       if (nFailedBins>0) Log()<< kWARNING << " unable to fill "<< nFailedBins <<" efficiency bins " <<Endl;  
01477 
01478       delete [] tmpCutMin;
01479       delete [] tmpCutMax;
01480 
01481       // create splines for histogram
01482       fSplTrainEffBvsS = new TSpline1( "trainEffBvsS", new TGraph( eff_bvss_tr ) );
01483    }
01484 
01485    // must exist...
01486    if (NULL == fSplTrainEffBvsS) return 0.0;
01487 
01488    // now find signal efficiency that corresponds to required background efficiency
01489    Double_t effS, effB, effS_ = 0, effB_ = 0;
01490    Int_t    nbins_ = 1000;
01491 
01492    // loop over efficiency bins until the background eff. matches the requirement
01493    for (Int_t bini=1; bini<=nbins_; bini++) {
01494       // get corresponding signal and background efficiencies
01495       effS = (bini - 0.5)/Float_t(nbins_);
01496       effB = fSplTrainEffBvsS->Eval( effS );
01497 
01498       // find signal efficiency that corresponds to required background efficiency
01499       if ((effB - effBref)*(effB_ - effBref) < 0) break;
01500       effS_ = effS;
01501       effB_ = effB;  
01502    }
01503 
01504    return 0.5*(effS + effS_);
01505 }
01506 
01507 //_______________________________________________________________________
01508 Double_t TMVA::MethodCuts::GetEfficiency( const TString& theString, Types::ETreeType type, Double_t& effSerr )
01509 {
01510    // - overloaded function to create background efficiency (rejection) versus
01511    //   signal efficiency plot (first call of this function)
01512    // - the function returns the signal efficiency at background efficiency
01513    //   indicated in theString
01514    //
01515    // "theString" must have two entries:
01516    // [0]: "Efficiency"
01517    // [1]: the value of background efficiency at which the signal efficiency 
01518    //      is to be returned
01519 
01520    Data()->SetCurrentType(type);
01521 
01522    Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, GetAnalysisType() );
01523 
01524    // parse input string for required background efficiency
01525    TList* list  = gTools().ParseFormatLine( theString, ":" );
01526 
01527    if (list->GetSize() > 2) {
01528       delete list;
01529       Log() << kFATAL << "<GetEfficiency> wrong number of arguments"
01530               << " in string: " << theString
01531               << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
01532       return -1;
01533    }
01534    
01535    // sanity check
01536    Bool_t computeArea = (list->GetSize() < 2); // the area is computed 
01537 
01538    // that will be the value of the efficiency retured (does not affect
01539    // the efficiency-vs-bkg plot which is done anyway.
01540    Float_t effBref = (computeArea?1.:atof( ((TObjString*)list->At(1))->GetString() ));
01541 
01542    delete list;
01543 
01544 
01545    // first round ? --> create histograms
01546    if (results->GetHist("MVA_EFF_BvsS")==0) {
01547 
01548       if (fBinaryTreeS!=0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
01549       if (fBinaryTreeB!=0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
01550 
01551       // the variables may be transformed by a transformation method: to coherently 
01552       // treat signal and background one must decide which transformation type shall 
01553       // be used: our default is signal-type
01554       fBinaryTreeS = new BinarySearchTree();
01555       fBinaryTreeS->Fill( GetEventCollection(Types::kTesting), fSignalClass );
01556       fBinaryTreeB = new BinarySearchTree();
01557       fBinaryTreeB->Fill( GetEventCollection(Types::kTesting), fBackgroundClass );
01558 
01559       // there is no really good equivalent to the fEffS; fEffB (efficiency vs cutvalue)
01560       // for the "Cuts" method (unless we had only one cut). Maybe later I might add here
01561       // histograms for each of the cuts...but this would require also a change in the 
01562       // base class, and it is not really necessary, as we get exactly THIS info from the
01563       // "evaluateAllVariables" anyway.
01564 
01565       // now create efficiency curve: background versus signal
01566       TH1* eff_BvsS = new TH1F( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
01567       for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_BvsS->SetBinContent( ibin, -0.1 ); // Init
01568       TH1* rej_BvsS = new TH1F( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
01569       for (Int_t ibin=1; ibin<=fNbins; ibin++) rej_BvsS->SetBinContent( ibin, 0.0 ); // Init
01570       results->Store(eff_BvsS, "MVA_EFF_BvsS");
01571       results->Store(rej_BvsS);
01572 
01573       Double_t xmin = 0.;
01574       Double_t xmax = 1.000001;
01575       
01576       TH1* eff_s = new TH1F( GetTestvarName() + "_effS", GetTestvarName() + " (signal)",     fNbins, xmin, xmax);
01577       for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_s->SetBinContent( ibin, -0.1 ); // Init
01578       TH1* eff_b = new TH1F( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbins, xmin, xmax);
01579       for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_b->SetBinContent( ibin, -0.1 ); // Init
01580       results->Store(eff_s, "MVA_S");
01581       results->Store(eff_b, "MVA_B");
01582 
01583       // use root finder
01584 
01585       // make the background-vs-signal efficiency plot
01586       Double_t* tmpCutMin = new Double_t[GetNvar()];
01587       Double_t* tmpCutMax = new Double_t[GetNvar()];
01588       TGraph* tmpBvsS = new TGraph(fNbins+1);      
01589       tmpBvsS->SetPoint(0, 0., 0.);
01590 
01591       for (Int_t bini=1; bini<=fNbins; bini++) {
01592          for (UInt_t ivar=0; ivar <GetNvar(); ivar++) {
01593             tmpCutMin[ivar] = fCutMin[ivar][bini-1];
01594             tmpCutMax[ivar] = fCutMax[ivar][bini-1];
01595          }
01596          // find cut value corresponding to a given signal efficiency
01597          Double_t effS, effB;
01598          this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);             
01599          tmpBvsS->SetPoint(bini, effS, effB);
01600 
01601          eff_s->SetBinContent(bini, effS);
01602          eff_b->SetBinContent(bini, effB);
01603       }
01604       tmpBvsS->SetPoint(fNbins+1, 1., 1.);
01605 
01606       delete [] tmpCutMin;
01607       delete [] tmpCutMax;
01608 
01609       // create splines for histogram
01610       fSpleffBvsS = new TSpline1( "effBvsS", tmpBvsS );
01611       for (Int_t bini=1; bini<=fNbins; bini++) {
01612          Double_t effS = (bini - 0.5)/Float_t(fNbins);
01613          Double_t effB = fSpleffBvsS->Eval( effS );
01614          eff_BvsS->SetBinContent( bini, effB     );    
01615          rej_BvsS->SetBinContent( bini, 1.0-effB ); 
01616       }
01617    }
01618 
01619    // must exist...
01620    if (NULL == fSpleffBvsS) return 0.0;
01621 
01622    // now find signal efficiency that corresponds to required background efficiency
01623    Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
01624    Int_t    nbins_ = 1000;
01625 
01626    if (computeArea) {
01627 
01628       // compute area of rej-vs-eff plot
01629       Double_t integral = 0;
01630       for (Int_t bini=1; bini<=nbins_; bini++) {
01631          
01632          // get corresponding signal and background efficiencies
01633          effS = (bini - 0.5)/Float_t(nbins_);
01634          effB = fSpleffBvsS->Eval( effS );
01635          integral += (1.0 - effB);
01636       }   
01637       integral /= nbins_;      
01638       
01639       return integral;
01640    }
01641    else {
01642 
01643       // loop over efficiency bins until the background eff. matches the requirement
01644       for (Int_t bini=1; bini<=nbins_; bini++) {
01645          // get corresponding signal and background efficiencies
01646          effS = (bini - 0.5)/Float_t(nbins_);
01647          effB = fSpleffBvsS->Eval( effS );
01648          
01649          // find signal efficiency that corresponds to required background efficiency
01650          if ((effB - effBref)*(effB_ - effBref) < 0) break;
01651          effS_ = effS;
01652          effB_ = effB;  
01653       }
01654 
01655       effS    = 0.5*(effS + effS_);
01656       effSerr = 0;
01657       if (Data()->GetNEvtSigTest() > 0) 
01658          effSerr = TMath::Sqrt( effS*(1.0 - effS)/Double_t(Data()->GetNEvtSigTest()) );
01659    
01660       return effS;
01661 
01662    }
01663 
01664    return -1;
01665 }
01666  
01667 //_______________________________________________________________________
01668 void TMVA::MethodCuts::MakeClassSpecific( std::ostream& fout, const TString& className ) const
01669 {
01670    // write specific classifier response
01671    fout << "   // not implemented for class: \"" << className << "\"" << endl;
01672    fout << "};" << endl;
01673 }
01674 
01675 //_______________________________________________________________________
01676 void TMVA::MethodCuts::GetHelpMessage() const
01677 {
01678    // get help message text
01679    //
01680    // typical length of text line: 
01681    //         "|--------------------------------------------------------------|"
01682    TString bold    = gConfig().WriteOptionsReference() ? "<b>" : "";
01683    TString resbold = gConfig().WriteOptionsReference() ? "</b>" : "";
01684    TString brk     = gConfig().WriteOptionsReference() ? "<br>" : "";
01685 
01686    Log() << Endl;
01687    Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
01688    Log() << Endl;
01689    Log() << "The optimisation of rectangular cuts performed by TMVA maximises " << Endl;
01690    Log() << "the background rejection at given signal efficiency, and scans " << Endl;
01691    Log() << "over the full range of the latter quantity. Three optimisation" << Endl;
01692    Log() << "methods are optional: Monte Carlo sampling (MC), a Genetics" << Endl;
01693    Log() << "Algorithm (GA), and Simulated Annealing (SA). GA and SA are"  << Endl;
01694    Log() << "expected to perform best." << Endl;
01695    Log() << Endl;
01696    Log() << "The difficulty to find the optimal cuts strongly increases with" << Endl;
01697    Log() << "the dimensionality (number of input variables) of the problem." << Endl;
01698    Log() << "This behavior is due to the non-uniqueness of the solution space."<<  Endl;
01699    Log() << Endl;
01700    Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
01701    Log() << Endl;
01702    Log() << "If the dimensionality exceeds, say, 4 input variables, it is " << Endl;
01703    Log() << "advisable to scrutinize the separation power of the variables," << Endl;
01704    Log() << "and to remove the weakest ones. If some among the input variables" << Endl;
01705    Log() << "can be described by a single cut (e.g., because signal tends to be" << Endl;
01706    Log() << "larger than background), this can be indicated to MethodCuts via" << Endl;
01707    Log() << "the \"Fsmart\" options (see option string). Choosing this option" << Endl;
01708    Log() << "reduces the number of requirements for the variable from 2 (min/max)" << Endl;
01709    Log() << "to a single one (TMVA finds out whether it is to be interpreted as" << Endl;
01710    Log() << "min or max)." << Endl;
01711    Log() << Endl;
01712    Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
01713    Log() << "" << Endl;
01714    Log() << bold << "Monte Carlo sampling:" << resbold << Endl;
01715    Log() << "" << Endl;
01716    Log() << "Apart form the \"Fsmart\" option for the variables, the only way" << Endl;
01717    Log() << "to improve the MC sampling is to increase the sampling rate. This" << Endl;
01718    Log() << "is done via the configuration option \"MC_NRandCuts\". The execution" << Endl;
01719    Log() << "time scales linearly with the sampling rate." << Endl;
01720    Log() << "" << Endl;
01721    Log() << bold << "Genetic Algorithm:" << resbold << Endl;
01722    Log() << "" << Endl;
01723    Log() << "The algorithm terminates if no significant fitness increase has" << Endl;
01724    Log() << "been achieved within the last \"nsteps\" steps of the calculation." << Endl;
01725    Log() << "Wiggles in the ROC curve or constant background rejection of 1" << Endl;
01726    Log() << "indicate that the GA failed to always converge at the true maximum" << Endl;
01727    Log() << "fitness. In such a case, it is recommended to broaden the search " << Endl;
01728    Log() << "by increasing the population size (\"popSize\") and to give the GA " << Endl;
01729    Log() << "more time to find improvements by increasing the number of steps" << Endl;
01730    Log() << "(\"nsteps\")" << Endl;
01731    Log() << "  -> increase \"popSize\" (at least >10 * number of variables)" << Endl;
01732    Log() << "  -> increase \"nsteps\"" << Endl;
01733    Log() << "" << Endl;
01734    Log() << bold << "Simulated Annealing (SA) algorithm:" << resbold << Endl;
01735    Log() << "" << Endl;
01736    Log() << "\"Increasing Adaptive\" approach:" << Endl;
01737    Log() << "" << Endl;
01738    Log() << "The algorithm seeks local minima and explores their neighborhood, while" << Endl;
01739    Log() << "changing the ambient temperature depending on the number of failures" << Endl;
01740    Log() << "in the previous steps. The performance can be improved by increasing" << Endl;
01741    Log() << "the number of iteration steps (\"MaxCalls\"), or by adjusting the" << Endl;
01742    Log() << "minimal temperature (\"MinTemperature\"). Manual adjustments of the" << Endl;
01743    Log() << "speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" << Endl;
01744    Log() << "to individual data sets should also help. Summary:" << brk << Endl;
01745    Log() << "  -> increase \"MaxCalls\"" << brk << Endl;
01746    Log() << "  -> adjust   \"MinTemperature\"" << brk << Endl;
01747    Log() << "  -> adjust   \"TemperatureScale\"" << brk << Endl;
01748    Log() << "  -> adjust   \"AdaptiveSpeed\"" << Endl;
01749    Log() << "" << Endl;   
01750    Log() << "\"Decreasing Adaptive\" approach:" << Endl;
01751    Log() << "" << Endl;
01752    Log() << "The algorithm calculates the initial temperature (based on the effect-" << Endl;
01753    Log() << "iveness of large steps) and the multiplier that ensures to reach the" << Endl;
01754    Log() << "minimal temperature with the requested number of iteration steps." << Endl;
01755    Log() << "The performance can be improved by adjusting the minimal temperature" << Endl;
01756    Log() << " (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" << brk << Endl;
01757    Log() << "  -> increase \"MaxCalls\"" << brk << Endl;
01758    Log() << "  -> adjust   \"MinTemperature\"" << Endl;
01759    Log() << " " << Endl;
01760    Log() << "Other kernels:" << Endl;
01761    Log() << "" << Endl;
01762    Log() << "Alternative ways of counting the temperature change are implemented. " << Endl;
01763    Log() << "Each of them starts with the maximum temperature (\"MaxTemperature\")" << Endl;
01764    Log() << "and descreases while changing the temperature according to a given" << Endl;
01765    Log() << "prescription:" << brk << Endl;
01766    Log() << "CurrentTemperature =" << brk << Endl;
01767    Log() << "  - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" << brk << Endl;
01768    Log() << "  - Log:  InitialTemperature / Log(StepNumber+2) * TemperatureScale" << brk << Endl;
01769    Log() << "  - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" << brk << Endl;
01770    Log() << "  - Sin:  ( Sin( StepNumber / TemperatureScale ) + 1 ) / (StepNumber + 1) * InitialTemperature + Eps" << brk << Endl;
01771    Log() << "  - Geo:  CurrentTemperature * TemperatureScale" << Endl;
01772    Log() << "" << Endl;
01773    Log() << "Their performance can be improved by adjusting initial temperature" << Endl;
01774    Log() << "(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," << Endl;
01775    Log() << "and the multiplier that scales the termperature descrease" << Endl;
01776    Log() << "(\"TemperatureScale\")" << brk << Endl;
01777    Log() << "  -> increase \"MaxCalls\"" << brk << Endl;
01778    Log() << "  -> adjust   \"InitialTemperature\"" << brk << Endl;
01779    Log() << "  -> adjust   \"TemperatureScale\"" << brk << Endl;
01780    Log() << "  -> adjust   \"KernelTemperature\"" << Endl;
01781 }

Generated on Tue Jul 5 15:25:02 2011 for ROOT_528-00b_version by  doxygen 1.5.1