00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 #include <iomanip>
00034 #include <cassert>
00035 
00036 #include "TMath.h"
00037 #include "Riostream.h"
00038 #include "TFile.h"
00039 
00040 #include "TMVA/MethodPDEFoam.h"
00041 #include "TMVA/Tools.h"
00042 #include "TMatrix.h"
00043 #include "TMVA/Ranking.h"
00044 #include "TMVA/Types.h"
00045 #include "TMVA/ClassifierFactory.h"
00046 #include "TMVA/Config.h"
00047 
00048 REGISTER_METHOD(PDEFoam)
00049 
00050 ClassImp(TMVA::MethodPDEFoam)
00051 
00052 
00053 TMVA::MethodPDEFoam::MethodPDEFoam( const TString& jobName,
00054                                     const TString& methodTitle,
00055                                     DataSetInfo& dsi,
00056                                     const TString& theOption,
00057                                     TDirectory* theTargetDir ) :
00058    MethodBase( jobName, Types::kPDEFoam, methodTitle, dsi, theOption, theTargetDir )
00059    , fSigBgSeparated(kFALSE)
00060    , fFrac(0.001)
00061    , fDiscrErrCut(-1.0)
00062    , fVolFrac(30.0)
00063    , fVolFracInv(1.0/30.0)
00064    , fnCells(999)
00065    , fnActiveCells(500)
00066    , fnSampl(2000)
00067    , fnBin(5)
00068    , fEvPerBin(10000)
00069    , fCompress(kTRUE)
00070    , fMultiTargetRegression(kFALSE)
00071    , fNmin(100)
00072    , fCutNmin(kTRUE)
00073    , fMaxDepth(0)
00074    , fKernelStr("None")
00075    , fKernel(kNone)
00076    , fTargetSelectionStr("Mean")
00077    , fTargetSelection(kMean)
00078    , fFillFoamWithOrigWeights(kFALSE)
00079    , fUseYesNoCell(kFALSE)
00080    , fDTLogic("None")
00081    , fDTSeparation(kFoam)
00082    , fPeekMax(kTRUE)
00083    , fXmin(std::vector<Double_t>())
00084    , fXmax(std::vector<Double_t>())
00085    , fFoam(std::vector<PDEFoam*>())
00086 {
00087    
00088 }
00089 
00090 
00091 TMVA::MethodPDEFoam::MethodPDEFoam( DataSetInfo& dsi,
00092                                     const TString& theWeightFile,
00093                                     TDirectory* theTargetDir ) :
00094    MethodBase( Types::kPDEFoam, dsi, theWeightFile, theTargetDir )
00095    , fSigBgSeparated(kFALSE)
00096    , fFrac(0.001)
00097    , fDiscrErrCut(-1.0)
00098    , fVolFrac(30.0)
00099    , fVolFracInv(1.0/30.0)
00100    , fnCells(999)
00101    , fnActiveCells(500)
00102    , fnSampl(2000)
00103    , fnBin(5)
00104    , fEvPerBin(10000)
00105    , fCompress(kTRUE)
00106    , fMultiTargetRegression(kFALSE)
00107    , fNmin(100)
00108    , fCutNmin(kTRUE)
00109    , fMaxDepth(0)
00110    , fKernelStr("None")
00111    , fKernel(kNone)
00112    , fTargetSelectionStr("Mean")
00113    , fTargetSelection(kMean)
00114    , fFillFoamWithOrigWeights(kFALSE)
00115    , fUseYesNoCell(kFALSE)
00116    , fDTLogic("None")
00117    , fDTSeparation(kFoam)
00118    , fPeekMax(kTRUE)
00119    , fXmin(std::vector<Double_t>())
00120    , fXmax(std::vector<Double_t>())
00121    , fFoam(std::vector<PDEFoam*>())
00122 {
00123    
00124 }
00125 
00126 
00127 Bool_t TMVA::MethodPDEFoam::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t  )
00128 {
00129    
00130    
00131    if (type == Types::kClassification && numberClasses == 2) return kTRUE;
00132    if (type == Types::kRegression) return kTRUE;
00133    return kFALSE;
00134 }
00135 
00136 
00137 void TMVA::MethodPDEFoam::Init( void )
00138 {
00139    
00140 
00141    
00142    fSigBgSeparated = kFALSE;   
00143    fFrac           = 0.001;    
00144    fDiscrErrCut    = -1.;      
00145    fVolFrac        = 30.0;     
00146    fVolFracInv     = 1./30.;   
00147    fnActiveCells   = 500;      
00148    fnCells         = fnActiveCells*2-1; 
00149    fnSampl         = 2000;     
00150    fnBin           = 5;        
00151    fEvPerBin       = 10000;    
00152    fNmin           = 100;      
00153    fMaxDepth       = 0;        
00154    fFillFoamWithOrigWeights = kFALSE; 
00155    fUseYesNoCell   = kFALSE;   
00156    fDTLogic        = "None";   
00157    fDTSeparation   = kFoam;    
00158    fPeekMax        = kTRUE;    
00159 
00160    fKernel         = kNone; 
00161    fTargetSelection= kMean; 
00162 
00163    fCompress              = kTRUE;  
00164    fMultiTargetRegression = kFALSE; 
00165 
00166    for (UInt_t i=0; i<fFoam.size(); i++) 
00167       if (fFoam.at(i)) delete fFoam.at(i);
00168    fFoam.clear();
00169 
00170    if (fUseYesNoCell)
00171       SetSignalReferenceCut( 0.0 ); 
00172    else
00173       SetSignalReferenceCut( 0.5 ); 
00174 }
00175 
00176 
00177 void TMVA::MethodPDEFoam::DeclareOptions()
00178 {
00179    
00180    
00181    
00182    DeclareOptionRef( fSigBgSeparated = kFALSE, "SigBgSeparate", "Separate foams for signal and background" );
00183    DeclareOptionRef( fFrac = 0.001,           "TailCut",  "Fraction of outlier events that are excluded from the foam in each dimension" );
00184    DeclareOptionRef( fVolFracInv = 1./30.,    "VolFrac",  "Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
00185    DeclareOptionRef( fnActiveCells = 500,     "nActiveCells",  "Maximum number of active cells to be created by the foam");
00186    DeclareOptionRef( fnSampl = 2000,          "nSampl",   "Number of generated MC events per cell");
00187    DeclareOptionRef( fnBin = 5,               "nBin",     "Number of bins in edge histograms");
00188    DeclareOptionRef( fCompress = kTRUE,       "Compress", "Compress foam output file");
00189    DeclareOptionRef( fMultiTargetRegression = kFALSE,     "MultiTargetRegression", "Do regression with multiple targets");
00190    DeclareOptionRef( fNmin = 100,             "Nmin",     "Number of events in cell required to split cell");
00191    DeclareOptionRef( fMaxDepth = 0,           "MaxDepth",  "Maximum depth of cell tree (0=unlimited)");
00192    DeclareOptionRef( fFillFoamWithOrigWeights = kFALSE, "FillFoamWithOrigWeights", "Fill foam with original or boost weights");
00193    DeclareOptionRef( fUseYesNoCell = kFALSE, "UseYesNoCell", "Return -1 or 1 for bkg or signal like events");
00194    DeclareOptionRef( fDTLogic = "None", "DTLogic", "Use decision tree algorithm to split cells");
00195    AddPreDefVal(TString("None"));
00196    AddPreDefVal(TString("GiniIndex"));
00197    AddPreDefVal(TString("MisClassificationError"));
00198    AddPreDefVal(TString("CrossEntropy"));
00199    DeclareOptionRef( fPeekMax = kTRUE, "PeekMax", "Peek up cell with max. driver integral for the next split");
00200 
00201    DeclareOptionRef( fKernelStr = "None",     "Kernel",   "Kernel type used");
00202    AddPreDefVal(TString("None"));
00203    AddPreDefVal(TString("Gauss"));
00204    AddPreDefVal(TString("LinNeighbors"));
00205    DeclareOptionRef( fTargetSelectionStr = "Mean", "TargetSelection", "Target selection method");
00206    AddPreDefVal(TString("Mean"));
00207    AddPreDefVal(TString("Mpv"));
00208 }
00209 
00210 
00211 void TMVA::MethodPDEFoam::DeclareCompatibilityOptions() {
00212    MethodBase::DeclareCompatibilityOptions();
00213    DeclareOptionRef(fCutNmin = kTRUE, "CutNmin",  "Requirement for minimal number of events in cell");
00214 }
00215 
00216 
00217 void TMVA::MethodPDEFoam::ProcessOptions()
00218 {
00219    
00220    if (!(fFrac>=0. && fFrac<=1.)) {
00221       Log() << kWARNING << "TailCut not in [0.,1] ==> using 0.001 instead" << Endl;
00222       fFrac = 0.001;
00223    }
00224 
00225    if (fnActiveCells < 1) {
00226       Log() << kWARNING << "invalid number of active cells specified: "
00227             << fnActiveCells << "; setting nActiveCells=2" << Endl;
00228       fnActiveCells = 2;
00229    }
00230    fnCells = fnActiveCells*2-1;
00231 
00232    fVolFrac = 1./fVolFracInv;
00233 
00234    
00235    if (fSigBgSeparated && fDTLogic != "None") {
00236       Log() << kWARNING << "Decision tree logic works only for a single foam (SigBgSeparate=F)" << Endl;
00237       fDTLogic = "None";
00238       fDTSeparation = kFoam;
00239    }
00240 
00241    
00242    if (fDTLogic == "None")
00243       fDTSeparation = kFoam;
00244    else if (fDTLogic == "GiniIndex")
00245       fDTSeparation = kGiniIndex;
00246    else if (fDTLogic == "MisClassificationError")
00247       fDTSeparation = kMisClassificationError;
00248    else if (fDTLogic == "CrossEntropy")
00249       fDTSeparation = kCrossEntropy;
00250    else {
00251       Log() << kWARNING << "Unknown separation type: " << fDTLogic 
00252             << ", setting to None" << Endl;
00253       fDTLogic = "None";
00254       fDTSeparation = kFoam;
00255    }
00256 
00257    if (fKernelStr == "None" ) fKernel = kNone;
00258    else if (fKernelStr == "Gauss" ) fKernel = kGaus;
00259    else if (fKernelStr == "LinNeighbors") fKernel = kLinN;
00260 
00261    if (fTargetSelectionStr == "Mean" ) fTargetSelection = kMean;
00262    else                                fTargetSelection = kMpv;
00263 }
00264 
00265 
00266 TMVA::MethodPDEFoam::~MethodPDEFoam( void )
00267 {
00268    
00269    for (UInt_t i=0; i<fFoam.size(); i++) {
00270       if (fFoam.at(i)) delete fFoam.at(i);
00271    }
00272    fFoam.clear();
00273 }
00274 
00275 
00276 void TMVA::MethodPDEFoam::CalcXminXmax() 
00277 {
00278    
00279    
00280 
00281    fXmin.clear();
00282    fXmax.clear();
00283    UInt_t kDim = GetNvar(); 
00284    UInt_t tDim = Data()->GetNTargets();
00285    UInt_t vDim = Data()->GetNVariables();
00286    if (fMultiTargetRegression)
00287       kDim += tDim;
00288 
00289    Double_t *xmin = new Double_t[kDim];
00290    Double_t *xmax = new Double_t[kDim];
00291 
00292    
00293    for (UInt_t dim=0; dim<kDim; dim++) {
00294       xmin[dim] =  1.e100;
00295       xmax[dim] = -1.e100;
00296    }
00297 
00298    Log() << kDEBUG << "Number of training events: " << Data()->GetNTrainingEvents() << Endl;
00299    Int_t nevoutside = (Int_t)((Data()->GetNTrainingEvents())*(fFrac)); 
00300    Int_t rangehistbins = 10000;                               
00301   
00302    
00303    
00304    for (Long64_t i=0; i<(GetNEvents()); i++) { 
00305       const Event* ev = GetEvent(i);    
00306       for (UInt_t dim=0; dim<kDim; dim++) { 
00307          Double_t val;
00308          if (fMultiTargetRegression) {
00309             if (dim < vDim)
00310                val = ev->GetValue(dim);
00311             else 
00312                val = ev->GetTarget(dim-vDim);
00313          }
00314          else
00315             val = ev->GetValue(dim);
00316 
00317          if (val<xmin[dim])
00318             xmin[dim] = val;
00319          if (val>xmax[dim])
00320             xmax[dim] = val;
00321       }
00322    }
00323 
00324    
00325    
00326    
00327    TH1F **range_h = new TH1F*[kDim]; 
00328    for (UInt_t dim=0; dim<kDim; dim++) {
00329       range_h[dim]  = new TH1F(Form("range%i", dim), "range", rangehistbins, xmin[dim], xmax[dim]);
00330    }
00331 
00332    
00333    for (Long64_t i=0; i<GetNEvents(); i++) {
00334       const Event* ev = GetEvent(i);
00335       for (UInt_t dim=0; dim<kDim; dim++) {
00336          if (fMultiTargetRegression) {
00337             if (dim < vDim)
00338                range_h[dim]->Fill(ev->GetValue(dim));
00339             else
00340                range_h[dim]->Fill(ev->GetTarget(dim-vDim));
00341          }
00342          else
00343             range_h[dim]->Fill(ev->GetValue(dim));
00344       }
00345    }
00346 
00347    
00348    for (UInt_t dim=0; dim<kDim; dim++) { 
00349       for (Int_t i=1; i<(rangehistbins+1); i++) { 
00350          if (range_h[dim]->Integral(0, i) > nevoutside) { 
00351             xmin[dim]=range_h[dim]->GetBinLowEdge(i);
00352             break;
00353          }
00354       }
00355       for (Int_t i=rangehistbins; i>0; i--) { 
00356          if (range_h[dim]->Integral(i, (rangehistbins+1)) > nevoutside) {
00357             xmax[dim]=range_h[dim]->GetBinLowEdge(i+1);
00358             break;
00359          }
00360       }
00361    }  
00362    
00363 
00364    
00365    fXmin.clear();
00366    fXmax.clear();
00367    for (UInt_t dim=0; dim<kDim; dim++) { 
00368       fXmin.push_back(xmin[dim]);
00369       fXmax.push_back(xmax[dim]);
00370    }
00371 
00372 
00373    delete[] xmin;
00374    delete[] xmax;
00375 
00376    
00377    for (UInt_t dim=0; dim<kDim; dim++)
00378       delete range_h[dim];
00379    delete[] range_h;
00380 
00381    return;
00382 }
00383 
00384 
00385 void TMVA::MethodPDEFoam::Train( void )
00386 {
00387    
00388 
00389    Log() << kVERBOSE << "Calculate Xmin and Xmax for every dimension" << Endl;
00390    CalcXminXmax();
00391 
00392    
00393    for (UInt_t i=0; i<fFoam.size(); i++) 
00394       if (fFoam.at(i)) delete fFoam.at(i);
00395    fFoam.clear();
00396 
00397    
00398    if (DoRegression()) {
00399       if (fMultiTargetRegression)
00400          TrainMultiTargetRegression();
00401       else
00402          TrainMonoTargetRegression();
00403    }
00404    else {
00405       if (DataInfo().GetNormalization() != "EQUALNUMEVENTS" ) { 
00406          Log() << kINFO << "NormMode=" << DataInfo().GetNormalization() 
00407                << " chosen. Note that only NormMode=EqualNumEvents" 
00408                << " ensures that Discriminant values correspond to"
00409                << " signal probabilities." << Endl;
00410       }
00411 
00412       Log() << kDEBUG << "N_sig for training events: " << Data()->GetNEvtSigTrain() << Endl;
00413       Log() << kDEBUG << "N_bg for training events:  " << Data()->GetNEvtBkgdTrain() << Endl;
00414       Log() << kDEBUG << "User normalization: " << DataInfo().GetNormalization().Data() << Endl;
00415 
00416       if (fSigBgSeparated)
00417          TrainSeparatedClassification();
00418       else
00419          TrainUnifiedClassification();
00420    }
00421 
00422    
00423    
00424    for(UInt_t i=0; i<fFoam.size(); i++) {
00425       Log() << kVERBOSE << "Check all cells and remove cells with volume 0" << Endl;
00426       fFoam.at(i)->CheckCells(true);
00427       if(fFoam.at(i)) fFoam.at(i)->DeleteBinarySearchTree();
00428    }
00429 }
00430 
00431 
00432 void TMVA::MethodPDEFoam::TrainSeparatedClassification() 
00433 {
00434    
00435    
00436 
00437    TString foamcaption[2];
00438    foamcaption[0] = "SignalFoam";
00439    foamcaption[1] = "BgFoam";
00440 
00441    for(int i=0; i<2; i++) {
00442       
00443       fFoam.push_back( new PDEFoam(foamcaption[i]) );
00444       InitFoam(fFoam.back(), kSeparate);
00445 
00446       Log() << kVERBOSE << "Filling binary search tree of " << foamcaption[i] 
00447             << " with events" << Endl;
00448       
00449       for (Long64_t k=0; k<GetNEvents(); k++) {
00450          const Event* ev = GetEvent(k);
00451          if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
00452             fFoam.back()->FillBinarySearchTree(ev, IgnoreEventsWithNegWeightsInTraining());
00453       }
00454 
00455       Log() << kINFO << "Build up " << foamcaption[i] << Endl;
00456       fFoam.back()->Create(); 
00457 
00458       Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00459       
00460       for (Long64_t k=0; k<GetNEvents(); k++) {
00461          const Event* ev = GetEvent(k); 
00462          if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
00463             fFoam.back()->FillFoamCells(ev, IgnoreEventsWithNegWeightsInTraining());
00464       }
00465    }
00466 }
00467 
00468 
00469 void TMVA::MethodPDEFoam::TrainUnifiedClassification() 
00470 {
00471    
00472    
00473 
00474    fFoam.push_back( new PDEFoam("DiscrFoam") );
00475    InitFoam(fFoam.back(), kDiscr);
00476 
00477    Log() << kVERBOSE << "Filling binary search tree of discriminator foam with events" << Endl;
00478    
00479    for (Long64_t k=0; k<GetNEvents(); k++)
00480       fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00481 
00482    Log() << kINFO << "Build up discriminator foam" << Endl;
00483    fFoam.back()->Create(); 
00484 
00485    Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00486    
00487    for (UInt_t k=0; k<GetNEvents(); k++)
00488       fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00489 
00490    Log() << kVERBOSE << "Calculate cell discriminator"<< Endl;
00491    
00492    fFoam.back()->CalcCellDiscr();
00493 }
00494 
00495 
00496 void TMVA::MethodPDEFoam::TrainMonoTargetRegression() 
00497 {
00498    
00499    
00500    
00501    
00502 
00503    if (Data()->GetNTargets() < 1) {
00504       Log() << kFATAL << "Error: number of targets = " << Data()->GetNTargets() << Endl;
00505       return;
00506    }
00507    else if (Data()->GetNTargets() > 1) {
00508       Log() << kWARNING << "Warning: number of targets = " << Data()->GetNTargets()
00509             << "  --> using only first target" << Endl;
00510    }
00511    else 
00512       Log() << kDEBUG << "MethodPDEFoam: number of Targets: " << Data()->GetNTargets() << Endl;
00513 
00514    TString foamcaption = "MonoTargetRegressionFoam";
00515    fFoam.push_back( new PDEFoam(foamcaption) );
00516    InitFoam(fFoam.back(), kMonoTarget);
00517 
00518    Log() << kVERBOSE << "Filling binary search tree with events" << Endl;
00519    
00520    for (Long64_t k=0; k<GetNEvents(); k++)
00521       fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00522 
00523    Log() << kINFO << "Build mono target regression foam" << Endl;
00524    fFoam.back()->Create(); 
00525 
00526    Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00527    
00528    for (UInt_t k=0; k<GetNEvents(); k++)
00529       fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00530 
00531    Log() << kVERBOSE << "Calculate average cell targets"<< Endl;
00532    
00533    fFoam.back()->CalcCellTarget();
00534 }
00535 
00536 
00537 void TMVA::MethodPDEFoam::TrainMultiTargetRegression()
00538 {
00539    
00540    
00541    
00542    
00543 
00544    Log() << kDEBUG << "Number of variables: " << Data()->GetNVariables() << Endl;
00545    Log() << kDEBUG << "Number of Targets:   " << Data()->GetNTargets()   << Endl;
00546    Log() << kDEBUG << "Dimension of foam:   " << Data()->GetNVariables()+Data()->GetNTargets() << Endl;
00547    if (fKernel==kLinN)
00548       Log() << kFATAL << "LinNeighbors kernel currently not supported" 
00549             << " for multi target regression" << Endl;
00550 
00551    TString foamcaption = "MultiTargetRegressionFoam";
00552    fFoam.push_back( new PDEFoam(foamcaption) );
00553    InitFoam(fFoam.back(), kMultiTarget);
00554 
00555    Log() << kVERBOSE << "Filling binary search tree of multi target regression foam with events" 
00556          << Endl;
00557    
00558    for (Long64_t k=0; k<GetNEvents(); k++)
00559       fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00560 
00561    Log() << kINFO << "Build multi target regression foam" << Endl;
00562    fFoam.back()->Create(); 
00563 
00564    Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00565    
00566    for (UInt_t k=0; k<GetNEvents(); k++)
00567       fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00568 }
00569 
00570 
00571 Double_t TMVA::MethodPDEFoam::GetMvaValue( Double_t* err, Double_t* errUpper )
00572 {
00573    
00574    
00575    
00576    
00577    
00578 
00579    const Event* ev = GetEvent();
00580    Double_t discr = 0.;
00581    Double_t discr_error = 0.;
00582 
00583    if (fSigBgSeparated) {
00584       std::vector<Float_t> xvec = ev->GetValues();
00585 
00586       Double_t density_sig = 0.;
00587       Double_t density_bg  = 0.;
00588 
00589       density_sig = fFoam.at(0)->GetCellDensity(xvec, fKernel); 
00590       density_bg  = fFoam.at(1)->GetCellDensity(xvec, fKernel); 
00591 
00592       
00593       if ( (density_sig+density_bg) > 0 )
00594          discr = density_sig/(density_sig+density_bg);
00595       else
00596          discr = 0.5; 
00597 
00598       
00599       Double_t neventsB = fFoam.at(1)->GetCellValue(xvec, kNev);
00600       Double_t neventsS = fFoam.at(0)->GetCellValue(xvec, kNev);
00601       Double_t scaleB = 1.;
00602       Double_t errorS = TMath::Sqrt(neventsS); 
00603       Double_t errorB = TMath::Sqrt(neventsB); 
00604 
00605       if (neventsS == 0) 
00606          errorS = 1.;
00607       if (neventsB == 0) 
00608          errorB = 1.;
00609 
00610       if ( (neventsS>1e-10) || (neventsB>1e-10) ) 
00611          discr_error = TMath::Sqrt( Sqr ( scaleB*neventsB
00612                                           / Sqr(neventsS+scaleB*neventsB)
00613                                           * errorS) +
00614                                     Sqr ( scaleB*neventsS
00615                                           / Sqr(neventsS+scaleB*neventsB)
00616                                           * errorB) );
00617       else discr_error = 1.;
00618 
00619       if (discr_error < 1e-10) discr_error = 1.;
00620    }
00621    else { 
00622       std::vector<Float_t> xvec = ev->GetValues();
00623       
00624       
00625       discr       = fFoam.at(0)->GetCellDiscr(xvec, fKernel);
00626       discr_error = fFoam.at(0)->GetCellValue(xvec, kDiscriminatorError);
00627    }
00628 
00629    
00630    if (err != 0) *err = discr_error;
00631    if (errUpper != 0) *errUpper = discr_error;
00632 
00633    if (fUseYesNoCell)
00634       return (discr < 0.5 ? -1 : 1);
00635    else
00636       return discr;
00637 }
00638 
00639 
00640 void TMVA::MethodPDEFoam::SetXminXmax( TMVA::PDEFoam *pdefoam )
00641 {
00642    
00643 
00644    if (!pdefoam){
00645       Log() << kFATAL << "Null pointer given!" << Endl;
00646       return;
00647    }
00648 
00649    UInt_t num_vars = GetNvar();
00650    if (fMultiTargetRegression)
00651       num_vars += Data()->GetNTargets();
00652 
00653    for (UInt_t idim=0; idim<num_vars; idim++) { 
00654       Log()<< kDEBUG << "foam: SetXmin[dim="<<idim<<"]: " << fXmin.at(idim) << Endl;
00655       Log()<< kDEBUG << "foam: SetXmax[dim="<<idim<<"]: " << fXmax.at(idim) << Endl;
00656       pdefoam->SetXmin(idim, fXmin.at(idim));
00657       pdefoam->SetXmax(idim, fXmax.at(idim));
00658    }
00659 }
00660 
00661 
00662 void TMVA::MethodPDEFoam::InitFoam(TMVA::PDEFoam *pdefoam, EFoamType ft)
00663 {
00664    
00665    
00666 
00667    if (!pdefoam){
00668       Log() << kFATAL << "Null pointer given!" << Endl;
00669       return;
00670    }
00671 
00672    
00673    pdefoam->Log().SetMinType(this->Log().GetMinType());
00674 
00675    
00676    pdefoam->SetFoamType(ft);
00677    
00678    
00679    if (ft==kMultiTarget)
00680       
00681       pdefoam->SetDim(      Data()->GetNTargets()+Data()->GetNVariables());
00682    else
00683       pdefoam->SetDim(      GetNvar());  
00684    pdefoam->SetVolumeFraction(fVolFrac); 
00685    pdefoam->SetnCells(      fnCells);    
00686    pdefoam->SetnSampl(      fnSampl);    
00687    pdefoam->SetnBin(        fnBin);      
00688    pdefoam->SetEvPerBin(    fEvPerBin);  
00689    pdefoam->SetFillFoamWithOrigWeights(fFillFoamWithOrigWeights);
00690    pdefoam->SetDTSeparation(fDTSeparation);
00691    pdefoam->SetPeekMax(fPeekMax);
00692 
00693    
00694    pdefoam->SetNmin(fNmin);
00695    pdefoam->SetMaxDepth(fMaxDepth); 
00696 
00697    
00698    pdefoam->Init();
00699    
00700    
00701    SetXminXmax(pdefoam);
00702 }
00703 
00704 
00705 const std::vector<Float_t>& TMVA::MethodPDEFoam::GetRegressionValues()
00706 {
00707    
00708 
00709    if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>();
00710    fRegressionReturnVal->clear();
00711 
00712    const Event* ev = GetEvent();
00713    std::vector<Float_t> vals = ev->GetValues(); 
00714 
00715    if (vals.size() == 0) {
00716       Log() << kWARNING << "<GetRegressionValues> value vector has size 0. " << Endl;
00717    }
00718 
00719    if (fMultiTargetRegression) {
00720       std::vector<Float_t> targets = fFoam.at(0)->GetProjectedRegValue(vals, fKernel, fTargetSelection);
00721       for(UInt_t i=0; i<(Data()->GetNTargets()); i++)
00722          fRegressionReturnVal->push_back(targets.at(i));
00723    }
00724    else {
00725       fRegressionReturnVal->push_back(fFoam.at(0)->GetCellRegValue0(vals, fKernel));   
00726    }
00727 
00728    
00729    Event * evT = new Event(*ev);
00730    for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
00731       evT->SetTarget(itgt, fRegressionReturnVal->at(itgt) );
00732    }
00733    const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
00734    fRegressionReturnVal->clear();
00735    for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
00736       fRegressionReturnVal->push_back( evT2->GetTarget(itgt) );
00737    }
00738 
00739    delete evT;
00740 
00741    return (*fRegressionReturnVal);
00742 }
00743 
00744 
00745 void TMVA::MethodPDEFoam::PrintCoefficients( void ) 
00746 {}
00747 
00748 
00749 void TMVA::MethodPDEFoam::AddWeightsXMLTo( void* parent ) const 
00750 {
00751    
00752 
00753    void* wght = gTools().AddChild(parent, "Weights");
00754    gTools().AddAttr( wght, "SigBgSeparated",  fSigBgSeparated );
00755    gTools().AddAttr( wght, "Frac",            fFrac );
00756    gTools().AddAttr( wght, "DiscrErrCut",     fDiscrErrCut );
00757    gTools().AddAttr( wght, "VolFrac",         fVolFrac );
00758    gTools().AddAttr( wght, "nCells",          fnCells );
00759    gTools().AddAttr( wght, "nSampl",          fnSampl );
00760    gTools().AddAttr( wght, "nBin",            fnBin );
00761    gTools().AddAttr( wght, "EvPerBin",        fEvPerBin );
00762    gTools().AddAttr( wght, "Compress",        fCompress );
00763    gTools().AddAttr( wght, "DoRegression",    DoRegression() );
00764    gTools().AddAttr( wght, "CutNmin",         fNmin>0 );
00765    gTools().AddAttr( wght, "Nmin",            fNmin );
00766    gTools().AddAttr( wght, "CutRMSmin",       false );
00767    gTools().AddAttr( wght, "RMSmin",          0.0 );
00768    gTools().AddAttr( wght, "Kernel",          KernelToUInt(fKernel) );
00769    gTools().AddAttr( wght, "TargetSelection", TargetSelectionToUInt(fTargetSelection) );
00770    gTools().AddAttr( wght, "FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
00771    gTools().AddAttr( wght, "UseYesNoCell",    fUseYesNoCell );
00772    
00773    
00774    void *xmin_wrap;
00775    for (UInt_t i=0; i<fXmin.size(); i++){
00776       xmin_wrap = gTools().AddChild( wght, "Xmin" );
00777       gTools().AddAttr( xmin_wrap, "Index", i );
00778       gTools().AddAttr( xmin_wrap, "Value", fXmin.at(i) );
00779    }
00780    void *xmax_wrap;
00781    for (UInt_t i=0; i<fXmax.size(); i++){
00782       xmax_wrap = gTools().AddChild( wght, "Xmax" );
00783       gTools().AddAttr( xmax_wrap, "Index", i );
00784       gTools().AddAttr( xmax_wrap, "Value", fXmax.at(i) );
00785    }
00786 
00787    
00788    WriteFoamsToFile();
00789 }
00790 
00791 
00792 void TMVA::MethodPDEFoam::WriteFoamsToFile() const 
00793 {
00794    
00795 
00796    
00797    FillVariableNamesToFoam();   
00798 
00799    TString rfname( GetWeightFileName() ); 
00800 
00801    
00802    rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );   
00803 
00804    
00805    rfname.ReplaceAll( ".xml", "_foams.root" );
00806 
00807    TFile *rootFile = 0;
00808    if (fCompress) rootFile = new TFile(rfname, "RECREATE", "foamfile", 9);
00809    else           rootFile = new TFile(rfname, "RECREATE");
00810 
00811    fFoam.at(0)->Write(fFoam.at(0)->GetFoamName().Data());
00812    if (!DoRegression() && fSigBgSeparated) 
00813       fFoam.at(1)->Write(fFoam.at(1)->GetFoamName().Data());
00814    rootFile->Close();
00815    Log() << kINFO << "Foams written to file: " 
00816          << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
00817 }
00818 
00819 
00820 void  TMVA::MethodPDEFoam::ReadWeightsFromStream( istream& istr )
00821 {
00822    
00823 
00824    istr >> fSigBgSeparated;                 
00825    istr >> fFrac;                           
00826    istr >> fDiscrErrCut;                    
00827    istr >> fVolFrac;                        
00828    istr >> fnCells;                         
00829    istr >> fnSampl;                         
00830    istr >> fnBin;                           
00831    istr >> fEvPerBin;                       
00832    istr >> fCompress;                       
00833 
00834    Bool_t regr;
00835    istr >> regr;                            
00836    SetAnalysisType( (regr ? Types::kRegression : Types::kClassification ) );
00837    
00838    Bool_t CutNmin, CutRMSmin; 
00839    Float_t RMSmin;            
00840    istr >> CutNmin;                         
00841    istr >> fNmin;
00842    istr >> CutRMSmin;                       
00843    istr >> RMSmin;
00844 
00845    UInt_t ker = 0;
00846    istr >> ker;                             
00847    fKernel = UIntToKernel(ker);
00848 
00849    UInt_t ts = 0;
00850    istr >> ts;                             
00851    fTargetSelection = UIntToTargetSelection(ts);
00852 
00853    istr >> fFillFoamWithOrigWeights;        
00854    istr >> fUseYesNoCell;                   
00855 
00856    
00857    fXmin.clear();
00858    fXmax.clear();
00859    UInt_t kDim = GetNvar();
00860    if (fMultiTargetRegression)
00861       kDim += Data()->GetNTargets();
00862 
00863    for (UInt_t i=0; i<kDim; i++) {
00864       fXmin.push_back(0.);
00865       fXmax.push_back(0.);
00866    }
00867    
00868    for (UInt_t i=0; i<kDim; i++) 
00869       istr >> fXmin.at(i);
00870    for (UInt_t i=0; i<kDim; i++) 
00871       istr >> fXmax.at(i);
00872 
00873    
00874    ReadFoamsFromFile();
00875 }
00876 
00877 
00878 void TMVA::MethodPDEFoam::ReadWeightsFromXML( void* wghtnode ) 
00879 {
00880    
00881 
00882    gTools().ReadAttr( wghtnode, "SigBgSeparated",  fSigBgSeparated );
00883    gTools().ReadAttr( wghtnode, "Frac",            fFrac );
00884    gTools().ReadAttr( wghtnode, "DiscrErrCut",     fDiscrErrCut );
00885    gTools().ReadAttr( wghtnode, "VolFrac",         fVolFrac );
00886    gTools().ReadAttr( wghtnode, "nCells",          fnCells );
00887    gTools().ReadAttr( wghtnode, "nSampl",          fnSampl );
00888    gTools().ReadAttr( wghtnode, "nBin",            fnBin );
00889    gTools().ReadAttr( wghtnode, "EvPerBin",        fEvPerBin );
00890    gTools().ReadAttr( wghtnode, "Compress",        fCompress );
00891    Bool_t regr;
00892    gTools().ReadAttr( wghtnode, "DoRegression",    regr );
00893    SetAnalysisType( (regr ? Types::kRegression : Types::kClassification ) );
00894    Bool_t CutNmin; 
00895    gTools().ReadAttr( wghtnode, "CutNmin",         CutNmin );
00896    gTools().ReadAttr( wghtnode, "Nmin",            fNmin );
00897    Bool_t CutRMSmin; 
00898    Float_t RMSmin;   
00899    gTools().ReadAttr( wghtnode, "CutRMSmin",       CutRMSmin );
00900    gTools().ReadAttr( wghtnode, "RMSmin",          RMSmin );
00901    UInt_t ker = 0;
00902    gTools().ReadAttr( wghtnode, "Kernel",          ker );
00903    fKernel = UIntToKernel(ker);
00904    UInt_t ts = 0;
00905    gTools().ReadAttr( wghtnode, "TargetSelection", ts );
00906    fTargetSelection = UIntToTargetSelection(ts);
00907    if (gTools().HasAttr(wghtnode, "FillFoamWithOrigWeights"))
00908       gTools().ReadAttr( wghtnode, "FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
00909    if (gTools().HasAttr(wghtnode, "UseYesNoCell"))
00910       gTools().ReadAttr( wghtnode, "UseYesNoCell", fUseYesNoCell );
00911    
00912    
00913    fXmin.clear();
00914    fXmax.clear();
00915    UInt_t kDim = GetNvar();
00916    if (fMultiTargetRegression)
00917       kDim += Data()->GetNTargets();
00918 
00919    for (UInt_t i=0; i<kDim; i++) {
00920       fXmin.push_back(0.);
00921       fXmax.push_back(0.);
00922    }
00923 
00924    
00925    void *xmin_wrap = gTools().GetChild( wghtnode );
00926    for (UInt_t counter=0; counter<kDim; counter++) {
00927       UInt_t i=0;
00928       gTools().ReadAttr( xmin_wrap , "Index", i );
00929       if (i>=kDim)
00930          Log() << kFATAL << "dimension index out of range:" << i << Endl;
00931       gTools().ReadAttr( xmin_wrap , "Value", fXmin.at(i) );
00932       xmin_wrap = gTools().GetNextChild( xmin_wrap );
00933    }
00934 
00935    void *xmax_wrap = xmin_wrap;
00936    for (UInt_t counter=0; counter<kDim; counter++) {
00937       UInt_t i=0;
00938       gTools().ReadAttr( xmax_wrap , "Index", i );
00939       if (i>=kDim)
00940          Log() << kFATAL << "dimension index out of range:" << i << Endl;
00941       gTools().ReadAttr( xmax_wrap , "Value", fXmax.at(i) );
00942       xmax_wrap = gTools().GetNextChild( xmax_wrap );
00943    }
00944 
00945    
00946    for (UInt_t i=0; i<fFoam.size(); i++)
00947       if (fFoam.at(i)) delete fFoam.at(i);
00948    fFoam.clear();
00949    
00950    
00951    ReadFoamsFromFile();
00952 }
00953 
00954 
00955 void TMVA::MethodPDEFoam::ReadFoamsFromFile()
00956 {
00957    
00958 
00959    TString rfname( GetWeightFileName() ); 
00960 
00961    
00962    rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );
00963 
00964    
00965    rfname.ReplaceAll( ".xml", "_foams.root" );
00966 
00967    Log() << kINFO << "Read foams from file: " << gTools().Color("lightblue") 
00968          << rfname << gTools().Color("reset") << Endl;
00969    TFile *rootFile = new TFile( rfname, "READ" );
00970    if (rootFile->IsZombie()) Log() << kFATAL << "Cannot open file \"" << rfname << "\"" << Endl;
00971 
00972    
00973    if (DoRegression()) {
00974       if (fMultiTargetRegression) 
00975          fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("MultiTargetRegressionFoam")) );
00976       else                        
00977          fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("MonoTargetRegressionFoam")) );
00978    }
00979    else {
00980       if (fSigBgSeparated) {
00981          fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("SignalFoam")) );
00982          fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("BgFoam")) );
00983       }
00984       else 
00985          fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("DiscrFoam")) );
00986    }
00987    if (!fFoam.at(0) || (!DoRegression() && fSigBgSeparated && !fFoam.at(1)))
00988       Log() << kFATAL << "Could not load foam!" << Endl;
00989 }
00990 
00991 
00992 TMVA::EKernel TMVA::MethodPDEFoam::UIntToKernel(UInt_t iker)
00993 {
00994    
00995    switch(iker) {
00996    case 0:  return kNone;
00997    case 1:  return kGaus;
00998    case 2:  return kLinN;
00999    default:
01000       Log() << kWARNING << "<UIntToKernel>: unknown kernel number: " << iker << Endl;
01001       return kNone;
01002    }
01003    return kNone;
01004 }
01005 
01006 
01007 TMVA::ETargetSelection TMVA::MethodPDEFoam::UIntToTargetSelection(UInt_t its)
01008 {
01009    
01010    switch(its) {
01011    case 0:  return kMean;
01012    case 1:  return kMpv;
01013    default:
01014       Log() << kWARNING << "<UIntToTargetSelection>: unknown method TargetSelection: " << its << Endl;
01015       return kMean;
01016    }
01017    return kMean;
01018 }
01019 
01020 
01021 void TMVA::MethodPDEFoam::FillVariableNamesToFoam() const 
01022 {
01023    
01024    for (UInt_t ifoam=0; ifoam<fFoam.size(); ifoam++) {
01025       for (Int_t idim=0; idim<fFoam.at(ifoam)->GetTotDim(); idim++) {
01026          if(fMultiTargetRegression && (UInt_t)idim>=DataInfo().GetNVariables())
01027             fFoam.at(ifoam)->AddVariableName(DataInfo().GetTargetInfo(idim-DataInfo().GetNVariables()).GetExpression().Data());
01028          else
01029             fFoam.at(ifoam)->AddVariableName(DataInfo().GetVariableInfo(idim).GetExpression().Data());
01030       }
01031    }   
01032 }
01033 
01034 
01035 void TMVA::MethodPDEFoam::MakeClassSpecific( std::ostream& , const TString&  ) const
01036 {
01037    
01038 }
01039 
01040 
01041 void TMVA::MethodPDEFoam::GetHelpMessage() const
01042 {
01043    
01044    Log() << Endl;
01045    Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
01046    Log() << Endl;
01047    Log() << "PDE-Foam is a variation of the PDE-RS method using a self-adapting" << Endl;
01048    Log() << "binning method to divide the multi-dimensional variable space into a" << Endl;
01049    Log() << "finite number of hyper-rectangles (cells). The binning algorithm " << Endl;
01050    Log() << "adjusts the size and position of a predefined number of cells such" << Endl;
01051    Log() << "that the variance of the signal and background densities inside the " << Endl;
01052    Log() << "cells reaches a minimum" << Endl;
01053    Log() << Endl;
01054    Log() << gTools().Color("bold") << "--- Use of booking options:" << gTools().Color("reset") << Endl;
01055    Log() << Endl;
01056    Log() << "The PDEFoam classifier supports two different algorithms: " << Endl;
01057    Log() << Endl;
01058    Log() << "  (1) Create one foam, which stores the signal over background" << Endl;
01059    Log() << "      probability density.  During foam buildup the variance of the" << Endl;
01060    Log() << "      discriminant inside the cells is minimised." << Endl;
01061    Log() << Endl;
01062    Log() << "      Booking option:   SigBgSeparated=F" << Endl;
01063    Log() << Endl;
01064    Log() << "  (2) Create two separate foams, one for the signal events and one for" << Endl;
01065    Log() << "      background events.  During foam buildup the variance of the" << Endl;
01066    Log() << "      event density inside the cells is minimised separately for" << Endl;
01067    Log() << "      signal and background." << Endl;
01068    Log() << Endl;
01069    Log() << "      Booking option:   SigBgSeparated=T" << Endl;
01070    Log() << Endl;
01071    Log() << "The following options can be set (the listed values are found to be a" << Endl;
01072    Log() << "good starting point for most applications):" << Endl;
01073    Log() << Endl;
01074    Log() << "        SigBgSeparate   False   Separate Signal and Background" << Endl;
01075    Log() << "              TailCut   0.001   Fraction of outlier events that excluded" << Endl;
01076    Log() << "                                from the foam in each dimension " << Endl;
01077    Log() << "              VolFrac  0.0333   Volume fraction (used for density calculation" << Endl;
01078    Log() << "                                during foam build-up) " << Endl;
01079    Log() << "         nActiveCells     500   Maximal number of active cells in final foam " << Endl;
01080    Log() << "               nSampl    2000   Number of MC events per cell in foam build-up " << Endl;
01081    Log() << "                 nBin       5   Number of bins used in foam build-up " << Endl;
01082    Log() << "                 Nmin     100   Number of events in cell required to split cell" << Endl;
01083    Log() << "               Kernel    None   Kernel type used (possible valuses are: None," << Endl;
01084    Log() << "                                Gauss)" << Endl;
01085    Log() << "             Compress    True   Compress foam output file " << Endl;
01086    Log() << Endl;
01087    Log() << "   Additional regression options:" << Endl;
01088    Log() << Endl;
01089    Log() << "MultiTargetRegression   False   Do regression with multiple targets " << Endl;
01090    Log() << "      TargetSelection    Mean   Target selection method (possible valuses are: " << Endl;
01091    Log() << "                                Mean, Mpv)" << Endl;
01092    Log() << Endl;
01093    Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
01094    Log() << Endl;
01095    Log() << "The performance of the two implementations was found to be similar for" << Endl;
01096    Log() << "most examples studied. For the same number of cells per foam, the two-" << Endl;
01097    Log() << "foam option approximately doubles the amount of computer memory needed" << Endl;
01098    Log() << "during classification. For special cases where the event-density" << Endl;
01099    Log() << "distribution of signal and background events is very different, the" << Endl;
01100    Log() << "two-foam option was found to perform significantly better than the" << Endl;
01101    Log() << "option with only one foam." << Endl;
01102    Log() << Endl;
01103    Log() << "In order to gain better classification performance we recommend to set" << Endl;
01104    Log() << "the parameter \"nActiveCells\" to a high value." << Endl;
01105    Log() << Endl;
01106    Log() << "The parameter \"VolFrac\" specifies the size of the sampling volume" << Endl;
01107    Log() << "during foam buildup and should be tuned in order to achieve optimal" << Endl;
01108    Log() << "performance.  A larger box leads to a reduced statistical uncertainty" << Endl;
01109    Log() << "for small training samples and to smoother sampling. A smaller box on" << Endl;
01110    Log() << "the other hand increases the sensitivity to statistical fluctuations" << Endl;
01111    Log() << "in the training samples, but for sufficiently large training samples" << Endl;
01112    Log() << "it will result in a more precise local estimate of the sampled" << Endl;
01113    Log() << "density. In general, higher dimensional problems require larger box" << Endl;
01114    Log() << "sizes, due to the reduced average number of events per box volume. The" << Endl;
01115    Log() << "default value of 0.0333 was optimised for an example with 5" << Endl;
01116    Log() << "observables and training samples of the order of 50000 signal and" << Endl;
01117    Log() << "background events each." << Endl;
01118    Log() << Endl;
01119    Log() << "Furthermore kernel weighting can be activated, which will lead to an" << Endl;
01120    Log() << "additional performance improvement. Note that Gauss weighting will" << Endl;
01121    Log() << "significantly increase the response time of the method. LinNeighbors" << Endl;
01122    Log() << "weighting performs a linear interpolation with direct neighbor cells" << Endl;
01123    Log() << "for each dimension and is much faster than Gauss weighting." << Endl;
01124    Log() << Endl;
01125    Log() << "The classification results were found to be rather insensitive to the" << Endl;
01126    Log() << "values of the parameters \"nSamples\" and \"nBin\"." << Endl;
01127 }