MethodPDEFoam.cxx

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodPDEFoam.cxx 37986 2011-02-04 21:42:15Z pcanal $
00002 // Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodPDEFoam                                                         *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Implementation (see header for description)                               *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Tancredi Carli   - CERN, Switzerland                                      *
00015  *      Dominik Dannheim - CERN, Switzerland                                      *
00016  *      Alexander Voigt  - CERN, Switzerland                                      *
00017  *      Peter Speckmayer - CERN, Switzerland                                      *
00018  *                                                                                *
00019  * Original author of the TFoam implementation:                                   *
00020  *      S. Jadach - Institute of Nuclear Physics, Cracow, Poland                  *
00021  *                                                                                *
00022  * Copyright (c) 2008:                                                            *
00023  *      CERN, Switzerland                                                         *
00024  *      MPI-K Heidelberg, Germany                                                 *
00025  *                                                                                *
00026  * Redistribution and use in source and binary forms, with or without             *
00027  * modification, are permitted according to the terms listed in LICENSE           *
00028  * (http://tmva.sourceforge.net/LICENSE)                                          *
00029  **********************************************************************************/
00030 
00031 //_______________________________________________________________________
00032 
00033 #include <iomanip>
00034 #include <cassert>
00035 
00036 #include "TMath.h"
00037 #include "Riostream.h"
00038 #include "TFile.h"
00039 
00040 #include "TMVA/MethodPDEFoam.h"
00041 #include "TMVA/Tools.h"
00042 #include "TMatrix.h"
00043 #include "TMVA/Ranking.h"
00044 #include "TMVA/Types.h"
00045 #include "TMVA/ClassifierFactory.h"
00046 #include "TMVA/Config.h"
00047 
00048 REGISTER_METHOD(PDEFoam)
00049 
00050 ClassImp(TMVA::MethodPDEFoam)
00051 
00052 //_______________________________________________________________________
00053 TMVA::MethodPDEFoam::MethodPDEFoam( const TString& jobName,
00054                                     const TString& methodTitle,
00055                                     DataSetInfo& dsi,
00056                                     const TString& theOption,
00057                                     TDirectory* theTargetDir ) :
00058    MethodBase( jobName, Types::kPDEFoam, methodTitle, dsi, theOption, theTargetDir )
00059    , fSigBgSeparated(kFALSE)
00060    , fFrac(0.001)
00061    , fDiscrErrCut(-1.0)
00062    , fVolFrac(30.0)
00063    , fVolFracInv(1.0/30.0)
00064    , fnCells(999)
00065    , fnActiveCells(500)
00066    , fnSampl(2000)
00067    , fnBin(5)
00068    , fEvPerBin(10000)
00069    , fCompress(kTRUE)
00070    , fMultiTargetRegression(kFALSE)
00071    , fNmin(100)
00072    , fCutNmin(kTRUE)
00073    , fMaxDepth(0)
00074    , fKernelStr("None")
00075    , fKernel(kNone)
00076    , fTargetSelectionStr("Mean")
00077    , fTargetSelection(kMean)
00078    , fFillFoamWithOrigWeights(kFALSE)
00079    , fUseYesNoCell(kFALSE)
00080    , fDTLogic("None")
00081    , fDTSeparation(kFoam)
00082    , fPeekMax(kTRUE)
00083    , fXmin(std::vector<Double_t>())
00084    , fXmax(std::vector<Double_t>())
00085    , fFoam(std::vector<PDEFoam*>())
00086 {
00087    // init PDEFoam objects
00088 }
00089 
00090 //_______________________________________________________________________
00091 TMVA::MethodPDEFoam::MethodPDEFoam( DataSetInfo& dsi,
00092                                     const TString& theWeightFile,
00093                                     TDirectory* theTargetDir ) :
00094    MethodBase( Types::kPDEFoam, dsi, theWeightFile, theTargetDir )
00095    , fSigBgSeparated(kFALSE)
00096    , fFrac(0.001)
00097    , fDiscrErrCut(-1.0)
00098    , fVolFrac(30.0)
00099    , fVolFracInv(1.0/30.0)
00100    , fnCells(999)
00101    , fnActiveCells(500)
00102    , fnSampl(2000)
00103    , fnBin(5)
00104    , fEvPerBin(10000)
00105    , fCompress(kTRUE)
00106    , fMultiTargetRegression(kFALSE)
00107    , fNmin(100)
00108    , fCutNmin(kTRUE)
00109    , fMaxDepth(0)
00110    , fKernelStr("None")
00111    , fKernel(kNone)
00112    , fTargetSelectionStr("Mean")
00113    , fTargetSelection(kMean)
00114    , fFillFoamWithOrigWeights(kFALSE)
00115    , fUseYesNoCell(kFALSE)
00116    , fDTLogic("None")
00117    , fDTSeparation(kFoam)
00118    , fPeekMax(kTRUE)
00119    , fXmin(std::vector<Double_t>())
00120    , fXmax(std::vector<Double_t>())
00121    , fFoam(std::vector<PDEFoam*>())
00122 {
00123    // constructor from weight file
00124 }
00125 
00126 //_______________________________________________________________________
00127 Bool_t TMVA::MethodPDEFoam::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ )
00128 {
00129    // PDEFoam can handle classification with 2 classes and regression
00130    // with one or more regression-targets
00131    if (type == Types::kClassification && numberClasses == 2) return kTRUE;
00132    if (type == Types::kRegression) return kTRUE;
00133    return kFALSE;
00134 }
00135 
00136 //_______________________________________________________________________
00137 void TMVA::MethodPDEFoam::Init( void )
00138 {
00139    // default initialization called by all constructors
00140 
00141    // init PDEFoam options
00142    fSigBgSeparated = kFALSE;   // default: unified foam
00143    fFrac           = 0.001;    // fraction of outlier events
00144    fDiscrErrCut    = -1.;      // cut on discriminator error
00145    fVolFrac        = 30.0;     // inverse range searching box size
00146    fVolFracInv     = 1./30.;   // range searching box size
00147    fnActiveCells   = 500;      // number of active cells to create
00148    fnCells         = fnActiveCells*2-1; // total number of cells
00149    fnSampl         = 2000;     // number of sampling points in cell
00150    fnBin           = 5;        // number of bins in edge histogram
00151    fEvPerBin       = 10000;    // number of events per bin
00152    fNmin           = 100;      // minimum number of events in cell
00153    fMaxDepth       = 0;        // cell tree depth (default: unlimited)
00154    fFillFoamWithOrigWeights = kFALSE; // fill orig. weights into foam
00155    fUseYesNoCell   = kFALSE;   // return -1 or 1 for bg or signal events
00156    fDTLogic        = "None";   // decision tree algorithmus
00157    fDTSeparation   = kFoam;    // separation type
00158    fPeekMax        = kTRUE;    // peek cell with max separation
00159 
00160    fKernel         = kNone; // default: use no kernel
00161    fTargetSelection= kMean; // default: use mean for target selection (only multi target regression!)
00162 
00163    fCompress              = kTRUE;  // compress ROOT output file
00164    fMultiTargetRegression = kFALSE; // multi-target regression
00165 
00166    for (UInt_t i=0; i<fFoam.size(); i++) 
00167       if (fFoam.at(i)) delete fFoam.at(i);
00168    fFoam.clear();
00169 
00170    if (fUseYesNoCell)
00171       SetSignalReferenceCut( 0.0 ); // MVA output in [-1, 1]
00172    else
00173       SetSignalReferenceCut( 0.5 ); // MVA output in [0, 1]
00174 }
00175 
00176 //_______________________________________________________________________
00177 void TMVA::MethodPDEFoam::DeclareOptions()
00178 {
00179    //
00180    // Declare MethodPDEFoam options
00181    //
00182    DeclareOptionRef( fSigBgSeparated = kFALSE, "SigBgSeparate", "Separate foams for signal and background" );
00183    DeclareOptionRef( fFrac = 0.001,           "TailCut",  "Fraction of outlier events that are excluded from the foam in each dimension" );
00184    DeclareOptionRef( fVolFracInv = 1./30.,    "VolFrac",  "Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
00185    DeclareOptionRef( fnActiveCells = 500,     "nActiveCells",  "Maximum number of active cells to be created by the foam");
00186    DeclareOptionRef( fnSampl = 2000,          "nSampl",   "Number of generated MC events per cell");
00187    DeclareOptionRef( fnBin = 5,               "nBin",     "Number of bins in edge histograms");
00188    DeclareOptionRef( fCompress = kTRUE,       "Compress", "Compress foam output file");
00189    DeclareOptionRef( fMultiTargetRegression = kFALSE,     "MultiTargetRegression", "Do regression with multiple targets");
00190    DeclareOptionRef( fNmin = 100,             "Nmin",     "Number of events in cell required to split cell");
00191    DeclareOptionRef( fMaxDepth = 0,           "MaxDepth",  "Maximum depth of cell tree (0=unlimited)");
00192    DeclareOptionRef( fFillFoamWithOrigWeights = kFALSE, "FillFoamWithOrigWeights", "Fill foam with original or boost weights");
00193    DeclareOptionRef( fUseYesNoCell = kFALSE, "UseYesNoCell", "Return -1 or 1 for bkg or signal like events");
00194    DeclareOptionRef( fDTLogic = "None", "DTLogic", "Use decision tree algorithm to split cells");
00195    AddPreDefVal(TString("None"));
00196    AddPreDefVal(TString("GiniIndex"));
00197    AddPreDefVal(TString("MisClassificationError"));
00198    AddPreDefVal(TString("CrossEntropy"));
00199    DeclareOptionRef( fPeekMax = kTRUE, "PeekMax", "Peek up cell with max. driver integral for the next split");
00200 
00201    DeclareOptionRef( fKernelStr = "None",     "Kernel",   "Kernel type used");
00202    AddPreDefVal(TString("None"));
00203    AddPreDefVal(TString("Gauss"));
00204    AddPreDefVal(TString("LinNeighbors"));
00205    DeclareOptionRef( fTargetSelectionStr = "Mean", "TargetSelection", "Target selection method");
00206    AddPreDefVal(TString("Mean"));
00207    AddPreDefVal(TString("Mpv"));
00208 }
00209 
00210 
00211 void TMVA::MethodPDEFoam::DeclareCompatibilityOptions() {
00212    MethodBase::DeclareCompatibilityOptions();
00213    DeclareOptionRef(fCutNmin = kTRUE, "CutNmin",  "Requirement for minimal number of events in cell");
00214 }
00215 
00216 //_______________________________________________________________________
00217 void TMVA::MethodPDEFoam::ProcessOptions()
00218 {
00219    // process user options
00220    if (!(fFrac>=0. && fFrac<=1.)) {
00221       Log() << kWARNING << "TailCut not in [0.,1] ==> using 0.001 instead" << Endl;
00222       fFrac = 0.001;
00223    }
00224 
00225    if (fnActiveCells < 1) {
00226       Log() << kWARNING << "invalid number of active cells specified: "
00227             << fnActiveCells << "; setting nActiveCells=2" << Endl;
00228       fnActiveCells = 2;
00229    }
00230    fnCells = fnActiveCells*2-1;
00231 
00232    fVolFrac = 1./fVolFracInv;
00233 
00234    // DT logic is only applicable if a single foam is trained
00235    if (fSigBgSeparated && fDTLogic != "None") {
00236       Log() << kWARNING << "Decision tree logic works only for a single foam (SigBgSeparate=F)" << Endl;
00237       fDTLogic = "None";
00238       fDTSeparation = kFoam;
00239    }
00240 
00241    // set separation to use
00242    if (fDTLogic == "None")
00243       fDTSeparation = kFoam;
00244    else if (fDTLogic == "GiniIndex")
00245       fDTSeparation = kGiniIndex;
00246    else if (fDTLogic == "MisClassificationError")
00247       fDTSeparation = kMisClassificationError;
00248    else if (fDTLogic == "CrossEntropy")
00249       fDTSeparation = kCrossEntropy;
00250    else {
00251       Log() << kWARNING << "Unknown separation type: " << fDTLogic 
00252             << ", setting to None" << Endl;
00253       fDTLogic = "None";
00254       fDTSeparation = kFoam;
00255    }
00256 
00257    if (fKernelStr == "None" ) fKernel = kNone;
00258    else if (fKernelStr == "Gauss" ) fKernel = kGaus;
00259    else if (fKernelStr == "LinNeighbors") fKernel = kLinN;
00260 
00261    if (fTargetSelectionStr == "Mean" ) fTargetSelection = kMean;
00262    else                                fTargetSelection = kMpv;
00263 }
00264 
00265 //_______________________________________________________________________
00266 TMVA::MethodPDEFoam::~MethodPDEFoam( void )
00267 {
00268    // destructor
00269    for (UInt_t i=0; i<fFoam.size(); i++) {
00270       if (fFoam.at(i)) delete fFoam.at(i);
00271    }
00272    fFoam.clear();
00273 }
00274 
00275 //_______________________________________________________________________
00276 void TMVA::MethodPDEFoam::CalcXminXmax() 
00277 {
00278    // Determine foam range [fXmin, fXmax] for all dimensions, such
00279    // that fFrac events lie outside the foam.
00280 
00281    fXmin.clear();
00282    fXmax.clear();
00283    UInt_t kDim = GetNvar(); // == Data()->GetNVariables();
00284    UInt_t tDim = Data()->GetNTargets();
00285    UInt_t vDim = Data()->GetNVariables();
00286    if (fMultiTargetRegression)
00287       kDim += tDim;
00288 
00289    Double_t *xmin = new Double_t[kDim];
00290    Double_t *xmax = new Double_t[kDim];
00291 
00292    // set default values
00293    for (UInt_t dim=0; dim<kDim; dim++) {
00294       xmin[dim] =  1.e100;
00295       xmax[dim] = -1.e100;
00296    }
00297 
00298    Log() << kDEBUG << "Number of training events: " << Data()->GetNTrainingEvents() << Endl;
00299    Int_t nevoutside = (Int_t)((Data()->GetNTrainingEvents())*(fFrac)); // number of events that are outside the range
00300    Int_t rangehistbins = 10000;                               // number of bins in histos
00301   
00302    // loop over all testing singnal and BG events and clac minimal and
00303    // maximal value of every variable
00304    for (Long64_t i=0; i<(GetNEvents()); i++) { // events loop
00305       const Event* ev = GetEvent(i);    
00306       for (UInt_t dim=0; dim<kDim; dim++) { // variables loop
00307          Double_t val;
00308          if (fMultiTargetRegression) {
00309             if (dim < vDim)
00310                val = ev->GetValue(dim);
00311             else 
00312                val = ev->GetTarget(dim-vDim);
00313          }
00314          else
00315             val = ev->GetValue(dim);
00316 
00317          if (val<xmin[dim])
00318             xmin[dim] = val;
00319          if (val>xmax[dim])
00320             xmax[dim] = val;
00321       }
00322    }
00323 
00324    // Create and fill histograms for each dimension (with same events
00325    // as before), to determine range based on number of events outside
00326    // the range
00327    TH1F **range_h = new TH1F*[kDim]; 
00328    for (UInt_t dim=0; dim<kDim; dim++) {
00329       range_h[dim]  = new TH1F(Form("range%i", dim), "range", rangehistbins, xmin[dim], xmax[dim]);
00330    }
00331 
00332    // fill all testing events into histos 
00333    for (Long64_t i=0; i<GetNEvents(); i++) {
00334       const Event* ev = GetEvent(i);
00335       for (UInt_t dim=0; dim<kDim; dim++) {
00336          if (fMultiTargetRegression) {
00337             if (dim < vDim)
00338                range_h[dim]->Fill(ev->GetValue(dim));
00339             else
00340                range_h[dim]->Fill(ev->GetTarget(dim-vDim));
00341          }
00342          else
00343             range_h[dim]->Fill(ev->GetValue(dim));
00344       }
00345    }
00346 
00347    // calc Xmin, Xmax from Histos
00348    for (UInt_t dim=0; dim<kDim; dim++) { 
00349       for (Int_t i=1; i<(rangehistbins+1); i++) { // loop over bins 
00350          if (range_h[dim]->Integral(0, i) > nevoutside) { // calc left limit (integral over bins 0..i = nevoutside)
00351             xmin[dim]=range_h[dim]->GetBinLowEdge(i);
00352             break;
00353          }
00354       }
00355       for (Int_t i=rangehistbins; i>0; i--) { // calc right limit (integral over bins i..max = nevoutside)
00356          if (range_h[dim]->Integral(i, (rangehistbins+1)) > nevoutside) {
00357             xmax[dim]=range_h[dim]->GetBinLowEdge(i+1);
00358             break;
00359          }
00360       }
00361    }  
00362    // now xmin[] and xmax[] contain upper/lower limits for every dimension
00363 
00364    // copy xmin[], xmax[] values to the class variable
00365    fXmin.clear();
00366    fXmax.clear();
00367    for (UInt_t dim=0; dim<kDim; dim++) { 
00368       fXmin.push_back(xmin[dim]);
00369       fXmax.push_back(xmax[dim]);
00370    }
00371 
00372 
00373    delete[] xmin;
00374    delete[] xmax;
00375 
00376    // delete histos
00377    for (UInt_t dim=0; dim<kDim; dim++)
00378       delete range_h[dim];
00379    delete[] range_h;
00380 
00381    return;
00382 }
00383 
00384 //_______________________________________________________________________
00385 void TMVA::MethodPDEFoam::Train( void )
00386 {
00387    // Train PDE-Foam depending on the set options
00388 
00389    Log() << kVERBOSE << "Calculate Xmin and Xmax for every dimension" << Endl;
00390    CalcXminXmax();
00391 
00392    // delete foams
00393    for (UInt_t i=0; i<fFoam.size(); i++) 
00394       if (fFoam.at(i)) delete fFoam.at(i);
00395    fFoam.clear();
00396 
00397    // start training
00398    if (DoRegression()) {
00399       if (fMultiTargetRegression)
00400          TrainMultiTargetRegression();
00401       else
00402          TrainMonoTargetRegression();
00403    }
00404    else {
00405       if (DataInfo().GetNormalization() != "EQUALNUMEVENTS" ) { 
00406          Log() << kINFO << "NormMode=" << DataInfo().GetNormalization() 
00407                << " chosen. Note that only NormMode=EqualNumEvents" 
00408                << " ensures that Discriminant values correspond to"
00409                << " signal probabilities." << Endl;
00410       }
00411 
00412       Log() << kDEBUG << "N_sig for training events: " << Data()->GetNEvtSigTrain() << Endl;
00413       Log() << kDEBUG << "N_bg for training events:  " << Data()->GetNEvtBkgdTrain() << Endl;
00414       Log() << kDEBUG << "User normalization: " << DataInfo().GetNormalization().Data() << Endl;
00415 
00416       if (fSigBgSeparated)
00417          TrainSeparatedClassification();
00418       else
00419          TrainUnifiedClassification();
00420    }
00421 
00422    // check cells and delete the binary search tree in order to save
00423    // memory
00424    for(UInt_t i=0; i<fFoam.size(); i++) {
00425       Log() << kVERBOSE << "Check all cells and remove cells with volume 0" << Endl;
00426       fFoam.at(i)->CheckCells(true);
00427       if(fFoam.at(i)) fFoam.at(i)->DeleteBinarySearchTree();
00428    }
00429 }
00430 
00431 //_______________________________________________________________________
00432 void TMVA::MethodPDEFoam::TrainSeparatedClassification() 
00433 {
00434    // Creation of 2 separated foams: one for signal events, one for 
00435    // backgound events.
00436 
00437    TString foamcaption[2];
00438    foamcaption[0] = "SignalFoam";
00439    foamcaption[1] = "BgFoam";
00440 
00441    for(int i=0; i<2; i++) {
00442       // create 2 PDEFoams
00443       fFoam.push_back( new PDEFoam(foamcaption[i]) );
00444       InitFoam(fFoam.back(), kSeparate);
00445 
00446       Log() << kVERBOSE << "Filling binary search tree of " << foamcaption[i] 
00447             << " with events" << Endl;
00448       // insert event to BinarySearchTree
00449       for (Long64_t k=0; k<GetNEvents(); k++) {
00450          const Event* ev = GetEvent(k);
00451          if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
00452             fFoam.back()->FillBinarySearchTree(ev, IgnoreEventsWithNegWeightsInTraining());
00453       }
00454 
00455       Log() << kINFO << "Build up " << foamcaption[i] << Endl;
00456       fFoam.back()->Create(); // build foam
00457 
00458       Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00459       // loop over all events -> fill foam cells
00460       for (Long64_t k=0; k<GetNEvents(); k++) {
00461          const Event* ev = GetEvent(k); 
00462          if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
00463             fFoam.back()->FillFoamCells(ev, IgnoreEventsWithNegWeightsInTraining());
00464       }
00465    }
00466 }
00467 
00468 //_______________________________________________________________________
00469 void TMVA::MethodPDEFoam::TrainUnifiedClassification() 
00470 {
00471    // Create only one unified foam which contains discriminator
00472    // (N_sig)/(N_sig + N_bg)
00473 
00474    fFoam.push_back( new PDEFoam("DiscrFoam") );
00475    InitFoam(fFoam.back(), kDiscr);
00476 
00477    Log() << kVERBOSE << "Filling binary search tree of discriminator foam with events" << Endl;
00478    // insert event to BinarySearchTree
00479    for (Long64_t k=0; k<GetNEvents(); k++)
00480       fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00481 
00482    Log() << kINFO << "Build up discriminator foam" << Endl;
00483    fFoam.back()->Create(); // build foam
00484 
00485    Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00486    // loop over all training events -> fill foam cells with N_sig and N_Bg
00487    for (UInt_t k=0; k<GetNEvents(); k++)
00488       fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00489 
00490    Log() << kVERBOSE << "Calculate cell discriminator"<< Endl;
00491    // calc discriminator (and it's error) for each cell
00492    fFoam.back()->CalcCellDiscr();
00493 }
00494 
00495 //_______________________________________________________________________
00496 void TMVA::MethodPDEFoam::TrainMonoTargetRegression() 
00497 {
00498    // Training mono target regression foam
00499    // - foam density = average Target(0)
00500    // - dimension of foam = number of non-targets
00501    // - cell content = average target 0
00502 
00503    if (Data()->GetNTargets() < 1) {
00504       Log() << kFATAL << "Error: number of targets = " << Data()->GetNTargets() << Endl;
00505       return;
00506    }
00507    else if (Data()->GetNTargets() > 1) {
00508       Log() << kWARNING << "Warning: number of targets = " << Data()->GetNTargets()
00509             << "  --> using only first target" << Endl;
00510    }
00511    else 
00512       Log() << kDEBUG << "MethodPDEFoam: number of Targets: " << Data()->GetNTargets() << Endl;
00513 
00514    TString foamcaption = "MonoTargetRegressionFoam";
00515    fFoam.push_back( new PDEFoam(foamcaption) );
00516    InitFoam(fFoam.back(), kMonoTarget);
00517 
00518    Log() << kVERBOSE << "Filling binary search tree with events" << Endl;
00519    // insert event to BinarySearchTree
00520    for (Long64_t k=0; k<GetNEvents(); k++)
00521       fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00522 
00523    Log() << kINFO << "Build mono target regression foam" << Endl;
00524    fFoam.back()->Create(); // build foam
00525 
00526    Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00527    // loop over all events -> fill foam cells with target
00528    for (UInt_t k=0; k<GetNEvents(); k++)
00529       fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00530 
00531    Log() << kVERBOSE << "Calculate average cell targets"<< Endl;
00532    // calc weight (and it's error) for each cell
00533    fFoam.back()->CalcCellTarget();
00534 }
00535 
00536 //_______________________________________________________________________
00537 void TMVA::MethodPDEFoam::TrainMultiTargetRegression()
00538 {
00539    // Training multi target regression foam
00540    // - foam density = Event density
00541    // - dimension of foam = number of non-targets + number of targets
00542    // - cell content = event density
00543 
00544    Log() << kDEBUG << "Number of variables: " << Data()->GetNVariables() << Endl;
00545    Log() << kDEBUG << "Number of Targets:   " << Data()->GetNTargets()   << Endl;
00546    Log() << kDEBUG << "Dimension of foam:   " << Data()->GetNVariables()+Data()->GetNTargets() << Endl;
00547    if (fKernel==kLinN)
00548       Log() << kFATAL << "LinNeighbors kernel currently not supported" 
00549             << " for multi target regression" << Endl;
00550 
00551    TString foamcaption = "MultiTargetRegressionFoam";
00552    fFoam.push_back( new PDEFoam(foamcaption) );
00553    InitFoam(fFoam.back(), kMultiTarget);
00554 
00555    Log() << kVERBOSE << "Filling binary search tree of multi target regression foam with events" 
00556          << Endl;
00557    // insert event to BinarySearchTree
00558    for (Long64_t k=0; k<GetNEvents(); k++)
00559       fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00560 
00561    Log() << kINFO << "Build multi target regression foam" << Endl;
00562    fFoam.back()->Create(); // build foam
00563 
00564    Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00565    // loop over all events -> fill foam cells with number of events
00566    for (UInt_t k=0; k<GetNEvents(); k++)
00567       fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00568 }
00569 
00570 //_______________________________________________________________________
00571 Double_t TMVA::MethodPDEFoam::GetMvaValue( Double_t* err, Double_t* errUpper )
00572 {
00573    // Return Mva-Value.  In case of 'fSigBgSeparated==false' return
00574    // the cell content (D = N_sig/(N_bg+N_sig)).  In case of
00575    // 'fSigBgSeparated==false' return D =
00576    // Density_sig/(Density_sig+Density_bg).  In both cases the error
00577    // of the discriminant is stored in 'err'.
00578 
00579    const Event* ev = GetEvent();
00580    Double_t discr = 0.;
00581    Double_t discr_error = 0.;
00582 
00583    if (fSigBgSeparated) {
00584       std::vector<Float_t> xvec = ev->GetValues();
00585 
00586       Double_t density_sig = 0.;
00587       Double_t density_bg  = 0.;
00588 
00589       density_sig = fFoam.at(0)->GetCellDensity(xvec, fKernel); // get signal event density
00590       density_bg  = fFoam.at(1)->GetCellDensity(xvec, fKernel); // get background event density
00591 
00592       // calc disciminator (normed!)
00593       if ( (density_sig+density_bg) > 0 )
00594          discr = density_sig/(density_sig+density_bg);
00595       else
00596          discr = 0.5; // assume 50% signal probability, if no events found (bad assumption, but can be overruled by cut on error)
00597 
00598       // do error estimation (not jet used in TMVA)
00599       Double_t neventsB = fFoam.at(1)->GetCellValue(xvec, kNev);
00600       Double_t neventsS = fFoam.at(0)->GetCellValue(xvec, kNev);
00601       Double_t scaleB = 1.;
00602       Double_t errorS = TMath::Sqrt(neventsS); // estimation of statistical error on counted signal events
00603       Double_t errorB = TMath::Sqrt(neventsB); // estimation of statistical error on counted background events
00604 
00605       if (neventsS == 0) // no signal events in cell
00606          errorS = 1.;
00607       if (neventsB == 0) // no bg events in cell
00608          errorB = 1.;
00609 
00610       if ( (neventsS>1e-10) || (neventsB>1e-10) ) // eq. (5) in paper T.Carli, B.Koblitz 2002
00611          discr_error = TMath::Sqrt( Sqr ( scaleB*neventsB
00612                                           / Sqr(neventsS+scaleB*neventsB)
00613                                           * errorS) +
00614                                     Sqr ( scaleB*neventsS
00615                                           / Sqr(neventsS+scaleB*neventsB)
00616                                           * errorB) );
00617       else discr_error = 1.;
00618 
00619       if (discr_error < 1e-10) discr_error = 1.;
00620    }
00621    else { // Signal and Bg not separated
00622       std::vector<Float_t> xvec = ev->GetValues();
00623       
00624       // get discriminator direct from the foam
00625       discr       = fFoam.at(0)->GetCellDiscr(xvec, fKernel);
00626       discr_error = fFoam.at(0)->GetCellValue(xvec, kDiscriminatorError);
00627    }
00628 
00629    // attribute error
00630    if (err != 0) *err = discr_error;
00631    if (errUpper != 0) *errUpper = discr_error;
00632 
00633    if (fUseYesNoCell)
00634       return (discr < 0.5 ? -1 : 1);
00635    else
00636       return discr;
00637 }
00638 
00639 //_______________________________________________________________________
00640 void TMVA::MethodPDEFoam::SetXminXmax( TMVA::PDEFoam *pdefoam )
00641 {
00642    // Set Xmin, Xmax for every dimension in the given pdefoam object
00643 
00644    if (!pdefoam){
00645       Log() << kFATAL << "Null pointer given!" << Endl;
00646       return;
00647    }
00648 
00649    UInt_t num_vars = GetNvar();
00650    if (fMultiTargetRegression)
00651       num_vars += Data()->GetNTargets();
00652 
00653    for (UInt_t idim=0; idim<num_vars; idim++) { // set upper/ lower limit in foam
00654       Log()<< kDEBUG << "foam: SetXmin[dim="<<idim<<"]: " << fXmin.at(idim) << Endl;
00655       Log()<< kDEBUG << "foam: SetXmax[dim="<<idim<<"]: " << fXmax.at(idim) << Endl;
00656       pdefoam->SetXmin(idim, fXmin.at(idim));
00657       pdefoam->SetXmax(idim, fXmax.at(idim));
00658    }
00659 }
00660 
00661 //_______________________________________________________________________
00662 void TMVA::MethodPDEFoam::InitFoam(TMVA::PDEFoam *pdefoam, EFoamType ft)
00663 {
00664    // Set foam options (incl. Xmin, Xmax) and initialize foam via 
00665    // pdefoam->Init()
00666 
00667    if (!pdefoam){
00668       Log() << kFATAL << "Null pointer given!" << Endl;
00669       return;
00670    }
00671 
00672    // set fLogger attributes
00673    pdefoam->Log().SetMinType(this->Log().GetMinType());
00674 
00675    // Set foam fill value
00676    pdefoam->SetFoamType(ft);
00677    
00678    // set Options VolFrac, kDim, ...
00679    if (ft==kMultiTarget)
00680       // dimension of foam = number of targets + non-targets
00681       pdefoam->SetDim(      Data()->GetNTargets()+Data()->GetNVariables());
00682    else
00683       pdefoam->SetDim(      GetNvar());  // Mandatory!
00684    pdefoam->SetVolumeFraction(fVolFrac); // Mandatory!
00685    pdefoam->SetnCells(      fnCells);    // optional
00686    pdefoam->SetnSampl(      fnSampl);    // optional
00687    pdefoam->SetnBin(        fnBin);      // optional
00688    pdefoam->SetEvPerBin(    fEvPerBin);  // optional
00689    pdefoam->SetFillFoamWithOrigWeights(fFillFoamWithOrigWeights);
00690    pdefoam->SetDTSeparation(fDTSeparation);
00691    pdefoam->SetPeekMax(fPeekMax);
00692 
00693    // cuts
00694    pdefoam->SetNmin(fNmin);
00695    pdefoam->SetMaxDepth(fMaxDepth); // maximum cell tree depth
00696 
00697    // Init PDEFoam
00698    pdefoam->Init();
00699    
00700    // Set Xmin, Xmax
00701    SetXminXmax(pdefoam);
00702 }
00703 
00704 //_______________________________________________________________________
00705 const std::vector<Float_t>& TMVA::MethodPDEFoam::GetRegressionValues()
00706 {
00707    // Return regression values for both multi and mono target regression
00708 
00709    if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>();
00710    fRegressionReturnVal->clear();
00711 
00712    const Event* ev = GetEvent();
00713    std::vector<Float_t> vals = ev->GetValues(); // get array of event variables (non-targets)   
00714 
00715    if (vals.size() == 0) {
00716       Log() << kWARNING << "<GetRegressionValues> value vector has size 0. " << Endl;
00717    }
00718 
00719    if (fMultiTargetRegression) {
00720       std::vector<Float_t> targets = fFoam.at(0)->GetProjectedRegValue(vals, fKernel, fTargetSelection);
00721       for(UInt_t i=0; i<(Data()->GetNTargets()); i++)
00722          fRegressionReturnVal->push_back(targets.at(i));
00723    }
00724    else {
00725       fRegressionReturnVal->push_back(fFoam.at(0)->GetCellRegValue0(vals, fKernel));   
00726    }
00727 
00728    // apply inverse transformation to regression values
00729    Event * evT = new Event(*ev);
00730    for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
00731       evT->SetTarget(itgt, fRegressionReturnVal->at(itgt) );
00732    }
00733    const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
00734    fRegressionReturnVal->clear();
00735    for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
00736       fRegressionReturnVal->push_back( evT2->GetTarget(itgt) );
00737    }
00738 
00739    delete evT;
00740 
00741    return (*fRegressionReturnVal);
00742 }
00743 
00744 //_______________________________________________________________________
00745 void TMVA::MethodPDEFoam::PrintCoefficients( void ) 
00746 {}
00747 
00748 //_______________________________________________________________________
00749 void TMVA::MethodPDEFoam::AddWeightsXMLTo( void* parent ) const 
00750 {
00751    // create XML output of PDEFoam method variables
00752 
00753    void* wght = gTools().AddChild(parent, "Weights");
00754    gTools().AddAttr( wght, "SigBgSeparated",  fSigBgSeparated );
00755    gTools().AddAttr( wght, "Frac",            fFrac );
00756    gTools().AddAttr( wght, "DiscrErrCut",     fDiscrErrCut );
00757    gTools().AddAttr( wght, "VolFrac",         fVolFrac );
00758    gTools().AddAttr( wght, "nCells",          fnCells );
00759    gTools().AddAttr( wght, "nSampl",          fnSampl );
00760    gTools().AddAttr( wght, "nBin",            fnBin );
00761    gTools().AddAttr( wght, "EvPerBin",        fEvPerBin );
00762    gTools().AddAttr( wght, "Compress",        fCompress );
00763    gTools().AddAttr( wght, "DoRegression",    DoRegression() );
00764    gTools().AddAttr( wght, "CutNmin",         fNmin>0 );
00765    gTools().AddAttr( wght, "Nmin",            fNmin );
00766    gTools().AddAttr( wght, "CutRMSmin",       false );
00767    gTools().AddAttr( wght, "RMSmin",          0.0 );
00768    gTools().AddAttr( wght, "Kernel",          KernelToUInt(fKernel) );
00769    gTools().AddAttr( wght, "TargetSelection", TargetSelectionToUInt(fTargetSelection) );
00770    gTools().AddAttr( wght, "FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
00771    gTools().AddAttr( wght, "UseYesNoCell",    fUseYesNoCell );
00772    
00773    // save foam borders Xmin[i], Xmax[i]
00774    void *xmin_wrap;
00775    for (UInt_t i=0; i<fXmin.size(); i++){
00776       xmin_wrap = gTools().AddChild( wght, "Xmin" );
00777       gTools().AddAttr( xmin_wrap, "Index", i );
00778       gTools().AddAttr( xmin_wrap, "Value", fXmin.at(i) );
00779    }
00780    void *xmax_wrap;
00781    for (UInt_t i=0; i<fXmax.size(); i++){
00782       xmax_wrap = gTools().AddChild( wght, "Xmax" );
00783       gTools().AddAttr( xmax_wrap, "Index", i );
00784       gTools().AddAttr( xmax_wrap, "Value", fXmax.at(i) );
00785    }
00786 
00787    // write foams to xml file
00788    WriteFoamsToFile();
00789 }
00790 
00791 //_______________________________________________________________________
00792 void TMVA::MethodPDEFoam::WriteFoamsToFile() const 
00793 {
00794    // Write pure foams to file
00795 
00796    // fill variable names into foam
00797    FillVariableNamesToFoam();   
00798 
00799    TString rfname( GetWeightFileName() ); 
00800 
00801    // replace in case of txt weight file
00802    rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );   
00803 
00804    // add foam indicator to distinguish from main weight file
00805    rfname.ReplaceAll( ".xml", "_foams.root" );
00806 
00807    TFile *rootFile = 0;
00808    if (fCompress) rootFile = new TFile(rfname, "RECREATE", "foamfile", 9);
00809    else           rootFile = new TFile(rfname, "RECREATE");
00810 
00811    fFoam.at(0)->Write(fFoam.at(0)->GetFoamName().Data());
00812    if (!DoRegression() && fSigBgSeparated) 
00813       fFoam.at(1)->Write(fFoam.at(1)->GetFoamName().Data());
00814    rootFile->Close();
00815    Log() << kINFO << "Foams written to file: " 
00816          << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
00817 }
00818 
00819 //_______________________________________________________________________
00820 void  TMVA::MethodPDEFoam::ReadWeightsFromStream( istream& istr )
00821 {
00822    // read options and internal parameters
00823 
00824    istr >> fSigBgSeparated;                 // Seperate Sig and Bg, or not
00825    istr >> fFrac;                           // Fraction used for calc of Xmin, Xmax
00826    istr >> fDiscrErrCut;                    // cut on discrimant error
00827    istr >> fVolFrac;                        // volume fraction (used for density calculation during buildup)
00828    istr >> fnCells;                         // Number of Cells  (500)
00829    istr >> fnSampl;                         // Number of MC events per cell in build-up (1000)
00830    istr >> fnBin;                           // Number of bins in build-up (100)
00831    istr >> fEvPerBin;                       // Maximum events (equiv.) per bin in buid-up (1000) 
00832    istr >> fCompress;                       // compress output file
00833 
00834    Bool_t regr;
00835    istr >> regr;                            // regression foam
00836    SetAnalysisType( (regr ? Types::kRegression : Types::kClassification ) );
00837    
00838    Bool_t CutNmin, CutRMSmin; // dummy for backwards compatib.
00839    Float_t RMSmin;            // dummy for backwards compatib.
00840    istr >> CutNmin;                         // cut on minimal number of events in cell
00841    istr >> fNmin;
00842    istr >> CutRMSmin;                       // cut on minimal RMS in cell
00843    istr >> RMSmin;
00844 
00845    UInt_t ker = 0;
00846    istr >> ker;                             // used kernel for GetMvaValue()
00847    fKernel = UIntToKernel(ker);
00848 
00849    UInt_t ts = 0;
00850    istr >> ts;                             // used method for target selection
00851    fTargetSelection = UIntToTargetSelection(ts);
00852 
00853    istr >> fFillFoamWithOrigWeights;        // fill foam with original event weights
00854    istr >> fUseYesNoCell;                   // return -1 or 1 for bg or signal event
00855 
00856    // clear old range and prepare new range
00857    fXmin.clear();
00858    fXmax.clear();
00859    UInt_t kDim = GetNvar();
00860    if (fMultiTargetRegression)
00861       kDim += Data()->GetNTargets();
00862 
00863    for (UInt_t i=0; i<kDim; i++) {
00864       fXmin.push_back(0.);
00865       fXmax.push_back(0.);
00866    }
00867    // read range
00868    for (UInt_t i=0; i<kDim; i++) 
00869       istr >> fXmin.at(i);
00870    for (UInt_t i=0; i<kDim; i++) 
00871       istr >> fXmax.at(i);
00872 
00873    // read pure foams from file
00874    ReadFoamsFromFile();
00875 }
00876 
00877 //_______________________________________________________________________
00878 void TMVA::MethodPDEFoam::ReadWeightsFromXML( void* wghtnode ) 
00879 {
00880    // read PDEFoam variables from xml weight file
00881 
00882    gTools().ReadAttr( wghtnode, "SigBgSeparated",  fSigBgSeparated );
00883    gTools().ReadAttr( wghtnode, "Frac",            fFrac );
00884    gTools().ReadAttr( wghtnode, "DiscrErrCut",     fDiscrErrCut );
00885    gTools().ReadAttr( wghtnode, "VolFrac",         fVolFrac );
00886    gTools().ReadAttr( wghtnode, "nCells",          fnCells );
00887    gTools().ReadAttr( wghtnode, "nSampl",          fnSampl );
00888    gTools().ReadAttr( wghtnode, "nBin",            fnBin );
00889    gTools().ReadAttr( wghtnode, "EvPerBin",        fEvPerBin );
00890    gTools().ReadAttr( wghtnode, "Compress",        fCompress );
00891    Bool_t regr;
00892    gTools().ReadAttr( wghtnode, "DoRegression",    regr );
00893    SetAnalysisType( (regr ? Types::kRegression : Types::kClassification ) );
00894    Bool_t CutNmin; // dummy for backwards compatib.
00895    gTools().ReadAttr( wghtnode, "CutNmin",         CutNmin );
00896    gTools().ReadAttr( wghtnode, "Nmin",            fNmin );
00897    Bool_t CutRMSmin; // dummy for backwards compatib.
00898    Float_t RMSmin;   // dummy for backwards compatib.
00899    gTools().ReadAttr( wghtnode, "CutRMSmin",       CutRMSmin );
00900    gTools().ReadAttr( wghtnode, "RMSmin",          RMSmin );
00901    UInt_t ker = 0;
00902    gTools().ReadAttr( wghtnode, "Kernel",          ker );
00903    fKernel = UIntToKernel(ker);
00904    UInt_t ts = 0;
00905    gTools().ReadAttr( wghtnode, "TargetSelection", ts );
00906    fTargetSelection = UIntToTargetSelection(ts);
00907    if (gTools().HasAttr(wghtnode, "FillFoamWithOrigWeights"))
00908       gTools().ReadAttr( wghtnode, "FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
00909    if (gTools().HasAttr(wghtnode, "UseYesNoCell"))
00910       gTools().ReadAttr( wghtnode, "UseYesNoCell", fUseYesNoCell );
00911    
00912    // clear old range [Xmin, Xmax] and prepare new range for reading
00913    fXmin.clear();
00914    fXmax.clear();
00915    UInt_t kDim = GetNvar();
00916    if (fMultiTargetRegression)
00917       kDim += Data()->GetNTargets();
00918 
00919    for (UInt_t i=0; i<kDim; i++) {
00920       fXmin.push_back(0.);
00921       fXmax.push_back(0.);
00922    }
00923 
00924    // read foam range
00925    void *xmin_wrap = gTools().GetChild( wghtnode );
00926    for (UInt_t counter=0; counter<kDim; counter++) {
00927       UInt_t i=0;
00928       gTools().ReadAttr( xmin_wrap , "Index", i );
00929       if (i>=kDim)
00930          Log() << kFATAL << "dimension index out of range:" << i << Endl;
00931       gTools().ReadAttr( xmin_wrap , "Value", fXmin.at(i) );
00932       xmin_wrap = gTools().GetNextChild( xmin_wrap );
00933    }
00934 
00935    void *xmax_wrap = xmin_wrap;
00936    for (UInt_t counter=0; counter<kDim; counter++) {
00937       UInt_t i=0;
00938       gTools().ReadAttr( xmax_wrap , "Index", i );
00939       if (i>=kDim)
00940          Log() << kFATAL << "dimension index out of range:" << i << Endl;
00941       gTools().ReadAttr( xmax_wrap , "Value", fXmax.at(i) );
00942       xmax_wrap = gTools().GetNextChild( xmax_wrap );
00943    }
00944 
00945    // if foams exist, delete them
00946    for (UInt_t i=0; i<fFoam.size(); i++)
00947       if (fFoam.at(i)) delete fFoam.at(i);
00948    fFoam.clear();
00949    
00950    // read pure foams from file
00951    ReadFoamsFromFile();
00952 }
00953 
00954 //_______________________________________________________________________
00955 void TMVA::MethodPDEFoam::ReadFoamsFromFile()
00956 {
00957    // read pure foams from file
00958 
00959    TString rfname( GetWeightFileName() ); 
00960 
00961    // replace in case of txt weight file
00962    rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );
00963 
00964    // add foam indicator to distinguish from main weight file
00965    rfname.ReplaceAll( ".xml", "_foams.root" );
00966 
00967    Log() << kINFO << "Read foams from file: " << gTools().Color("lightblue") 
00968          << rfname << gTools().Color("reset") << Endl;
00969    TFile *rootFile = new TFile( rfname, "READ" );
00970    if (rootFile->IsZombie()) Log() << kFATAL << "Cannot open file \"" << rfname << "\"" << Endl;
00971 
00972    // read foams from file
00973    if (DoRegression()) {
00974       if (fMultiTargetRegression) 
00975          fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("MultiTargetRegressionFoam")) );
00976       else                        
00977          fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("MonoTargetRegressionFoam")) );
00978    }
00979    else {
00980       if (fSigBgSeparated) {
00981          fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("SignalFoam")) );
00982          fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("BgFoam")) );
00983       }
00984       else 
00985          fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("DiscrFoam")) );
00986    }
00987    if (!fFoam.at(0) || (!DoRegression() && fSigBgSeparated && !fFoam.at(1)))
00988       Log() << kFATAL << "Could not load foam!" << Endl;
00989 }
00990 
00991 //_______________________________________________________________________
00992 TMVA::EKernel TMVA::MethodPDEFoam::UIntToKernel(UInt_t iker)
00993 {
00994    // convert UInt_t to EKernel (used for reading weight files)
00995    switch(iker) {
00996    case 0:  return kNone;
00997    case 1:  return kGaus;
00998    case 2:  return kLinN;
00999    default:
01000       Log() << kWARNING << "<UIntToKernel>: unknown kernel number: " << iker << Endl;
01001       return kNone;
01002    }
01003    return kNone;
01004 }
01005 
01006 //_______________________________________________________________________
01007 TMVA::ETargetSelection TMVA::MethodPDEFoam::UIntToTargetSelection(UInt_t its)
01008 {
01009    // convert UInt_t to ETargetSelection (used for reading weight files)
01010    switch(its) {
01011    case 0:  return kMean;
01012    case 1:  return kMpv;
01013    default:
01014       Log() << kWARNING << "<UIntToTargetSelection>: unknown method TargetSelection: " << its << Endl;
01015       return kMean;
01016    }
01017    return kMean;
01018 }
01019 
01020 //_______________________________________________________________________
01021 void TMVA::MethodPDEFoam::FillVariableNamesToFoam() const 
01022 {
01023    // fill variable names into foam(s)
01024    for (UInt_t ifoam=0; ifoam<fFoam.size(); ifoam++) {
01025       for (Int_t idim=0; idim<fFoam.at(ifoam)->GetTotDim(); idim++) {
01026          if(fMultiTargetRegression && (UInt_t)idim>=DataInfo().GetNVariables())
01027             fFoam.at(ifoam)->AddVariableName(DataInfo().GetTargetInfo(idim-DataInfo().GetNVariables()).GetExpression().Data());
01028          else
01029             fFoam.at(ifoam)->AddVariableName(DataInfo().GetVariableInfo(idim).GetExpression().Data());
01030       }
01031    }   
01032 }
01033 
01034 //_______________________________________________________________________
01035 void TMVA::MethodPDEFoam::MakeClassSpecific( std::ostream& /*fout*/, const TString& /*className*/ ) const
01036 {
01037    // write PDEFoam-specific classifier response
01038 }
01039 
01040 //_______________________________________________________________________
01041 void TMVA::MethodPDEFoam::GetHelpMessage() const
01042 {
01043    // provide help message
01044    Log() << Endl;
01045    Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
01046    Log() << Endl;
01047    Log() << "PDE-Foam is a variation of the PDE-RS method using a self-adapting" << Endl;
01048    Log() << "binning method to divide the multi-dimensional variable space into a" << Endl;
01049    Log() << "finite number of hyper-rectangles (cells). The binning algorithm " << Endl;
01050    Log() << "adjusts the size and position of a predefined number of cells such" << Endl;
01051    Log() << "that the variance of the signal and background densities inside the " << Endl;
01052    Log() << "cells reaches a minimum" << Endl;
01053    Log() << Endl;
01054    Log() << gTools().Color("bold") << "--- Use of booking options:" << gTools().Color("reset") << Endl;
01055    Log() << Endl;
01056    Log() << "The PDEFoam classifier supports two different algorithms: " << Endl;
01057    Log() << Endl;
01058    Log() << "  (1) Create one foam, which stores the signal over background" << Endl;
01059    Log() << "      probability density.  During foam buildup the variance of the" << Endl;
01060    Log() << "      discriminant inside the cells is minimised." << Endl;
01061    Log() << Endl;
01062    Log() << "      Booking option:   SigBgSeparated=F" << Endl;
01063    Log() << Endl;
01064    Log() << "  (2) Create two separate foams, one for the signal events and one for" << Endl;
01065    Log() << "      background events.  During foam buildup the variance of the" << Endl;
01066    Log() << "      event density inside the cells is minimised separately for" << Endl;
01067    Log() << "      signal and background." << Endl;
01068    Log() << Endl;
01069    Log() << "      Booking option:   SigBgSeparated=T" << Endl;
01070    Log() << Endl;
01071    Log() << "The following options can be set (the listed values are found to be a" << Endl;
01072    Log() << "good starting point for most applications):" << Endl;
01073    Log() << Endl;
01074    Log() << "        SigBgSeparate   False   Separate Signal and Background" << Endl;
01075    Log() << "              TailCut   0.001   Fraction of outlier events that excluded" << Endl;
01076    Log() << "                                from the foam in each dimension " << Endl;
01077    Log() << "              VolFrac  0.0333   Volume fraction (used for density calculation" << Endl;
01078    Log() << "                                during foam build-up) " << Endl;
01079    Log() << "         nActiveCells     500   Maximal number of active cells in final foam " << Endl;
01080    Log() << "               nSampl    2000   Number of MC events per cell in foam build-up " << Endl;
01081    Log() << "                 nBin       5   Number of bins used in foam build-up " << Endl;
01082    Log() << "                 Nmin     100   Number of events in cell required to split cell" << Endl;
01083    Log() << "               Kernel    None   Kernel type used (possible valuses are: None," << Endl;
01084    Log() << "                                Gauss)" << Endl;
01085    Log() << "             Compress    True   Compress foam output file " << Endl;
01086    Log() << Endl;
01087    Log() << "   Additional regression options:" << Endl;
01088    Log() << Endl;
01089    Log() << "MultiTargetRegression   False   Do regression with multiple targets " << Endl;
01090    Log() << "      TargetSelection    Mean   Target selection method (possible valuses are: " << Endl;
01091    Log() << "                                Mean, Mpv)" << Endl;
01092    Log() << Endl;
01093    Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
01094    Log() << Endl;
01095    Log() << "The performance of the two implementations was found to be similar for" << Endl;
01096    Log() << "most examples studied. For the same number of cells per foam, the two-" << Endl;
01097    Log() << "foam option approximately doubles the amount of computer memory needed" << Endl;
01098    Log() << "during classification. For special cases where the event-density" << Endl;
01099    Log() << "distribution of signal and background events is very different, the" << Endl;
01100    Log() << "two-foam option was found to perform significantly better than the" << Endl;
01101    Log() << "option with only one foam." << Endl;
01102    Log() << Endl;
01103    Log() << "In order to gain better classification performance we recommend to set" << Endl;
01104    Log() << "the parameter \"nActiveCells\" to a high value." << Endl;
01105    Log() << Endl;
01106    Log() << "The parameter \"VolFrac\" specifies the size of the sampling volume" << Endl;
01107    Log() << "during foam buildup and should be tuned in order to achieve optimal" << Endl;
01108    Log() << "performance.  A larger box leads to a reduced statistical uncertainty" << Endl;
01109    Log() << "for small training samples and to smoother sampling. A smaller box on" << Endl;
01110    Log() << "the other hand increases the sensitivity to statistical fluctuations" << Endl;
01111    Log() << "in the training samples, but for sufficiently large training samples" << Endl;
01112    Log() << "it will result in a more precise local estimate of the sampled" << Endl;
01113    Log() << "density. In general, higher dimensional problems require larger box" << Endl;
01114    Log() << "sizes, due to the reduced average number of events per box volume. The" << Endl;
01115    Log() << "default value of 0.0333 was optimised for an example with 5" << Endl;
01116    Log() << "observables and training samples of the order of 50000 signal and" << Endl;
01117    Log() << "background events each." << Endl;
01118    Log() << Endl;
01119    Log() << "Furthermore kernel weighting can be activated, which will lead to an" << Endl;
01120    Log() << "additional performance improvement. Note that Gauss weighting will" << Endl;
01121    Log() << "significantly increase the response time of the method. LinNeighbors" << Endl;
01122    Log() << "weighting performs a linear interpolation with direct neighbor cells" << Endl;
01123    Log() << "for each dimension and is much faster than Gauss weighting." << Endl;
01124    Log() << Endl;
01125    Log() << "The classification results were found to be rather insensitive to the" << Endl;
01126    Log() << "values of the parameters \"nSamples\" and \"nBin\"." << Endl;
01127 }

Generated on Tue Jul 5 15:25:04 2011 for ROOT_528-00b_version by  doxygen 1.5.1