00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <iomanip>
00034 #include <cassert>
00035
00036 #include "TMath.h"
00037 #include "Riostream.h"
00038 #include "TFile.h"
00039
00040 #include "TMVA/MethodPDEFoam.h"
00041 #include "TMVA/Tools.h"
00042 #include "TMatrix.h"
00043 #include "TMVA/Ranking.h"
00044 #include "TMVA/Types.h"
00045 #include "TMVA/ClassifierFactory.h"
00046 #include "TMVA/Config.h"
00047
00048 REGISTER_METHOD(PDEFoam)
00049
00050 ClassImp(TMVA::MethodPDEFoam)
00051
00052
00053 TMVA::MethodPDEFoam::MethodPDEFoam( const TString& jobName,
00054 const TString& methodTitle,
00055 DataSetInfo& dsi,
00056 const TString& theOption,
00057 TDirectory* theTargetDir ) :
00058 MethodBase( jobName, Types::kPDEFoam, methodTitle, dsi, theOption, theTargetDir )
00059 , fSigBgSeparated(kFALSE)
00060 , fFrac(0.001)
00061 , fDiscrErrCut(-1.0)
00062 , fVolFrac(30.0)
00063 , fVolFracInv(1.0/30.0)
00064 , fnCells(999)
00065 , fnActiveCells(500)
00066 , fnSampl(2000)
00067 , fnBin(5)
00068 , fEvPerBin(10000)
00069 , fCompress(kTRUE)
00070 , fMultiTargetRegression(kFALSE)
00071 , fNmin(100)
00072 , fCutNmin(kTRUE)
00073 , fMaxDepth(0)
00074 , fKernelStr("None")
00075 , fKernel(kNone)
00076 , fTargetSelectionStr("Mean")
00077 , fTargetSelection(kMean)
00078 , fFillFoamWithOrigWeights(kFALSE)
00079 , fUseYesNoCell(kFALSE)
00080 , fDTLogic("None")
00081 , fDTSeparation(kFoam)
00082 , fPeekMax(kTRUE)
00083 , fXmin(std::vector<Double_t>())
00084 , fXmax(std::vector<Double_t>())
00085 , fFoam(std::vector<PDEFoam*>())
00086 {
00087
00088 }
00089
00090
00091 TMVA::MethodPDEFoam::MethodPDEFoam( DataSetInfo& dsi,
00092 const TString& theWeightFile,
00093 TDirectory* theTargetDir ) :
00094 MethodBase( Types::kPDEFoam, dsi, theWeightFile, theTargetDir )
00095 , fSigBgSeparated(kFALSE)
00096 , fFrac(0.001)
00097 , fDiscrErrCut(-1.0)
00098 , fVolFrac(30.0)
00099 , fVolFracInv(1.0/30.0)
00100 , fnCells(999)
00101 , fnActiveCells(500)
00102 , fnSampl(2000)
00103 , fnBin(5)
00104 , fEvPerBin(10000)
00105 , fCompress(kTRUE)
00106 , fMultiTargetRegression(kFALSE)
00107 , fNmin(100)
00108 , fCutNmin(kTRUE)
00109 , fMaxDepth(0)
00110 , fKernelStr("None")
00111 , fKernel(kNone)
00112 , fTargetSelectionStr("Mean")
00113 , fTargetSelection(kMean)
00114 , fFillFoamWithOrigWeights(kFALSE)
00115 , fUseYesNoCell(kFALSE)
00116 , fDTLogic("None")
00117 , fDTSeparation(kFoam)
00118 , fPeekMax(kTRUE)
00119 , fXmin(std::vector<Double_t>())
00120 , fXmax(std::vector<Double_t>())
00121 , fFoam(std::vector<PDEFoam*>())
00122 {
00123
00124 }
00125
00126
00127 Bool_t TMVA::MethodPDEFoam::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
00128 {
00129
00130
00131 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
00132 if (type == Types::kRegression) return kTRUE;
00133 return kFALSE;
00134 }
00135
00136
00137 void TMVA::MethodPDEFoam::Init( void )
00138 {
00139
00140
00141
00142 fSigBgSeparated = kFALSE;
00143 fFrac = 0.001;
00144 fDiscrErrCut = -1.;
00145 fVolFrac = 30.0;
00146 fVolFracInv = 1./30.;
00147 fnActiveCells = 500;
00148 fnCells = fnActiveCells*2-1;
00149 fnSampl = 2000;
00150 fnBin = 5;
00151 fEvPerBin = 10000;
00152 fNmin = 100;
00153 fMaxDepth = 0;
00154 fFillFoamWithOrigWeights = kFALSE;
00155 fUseYesNoCell = kFALSE;
00156 fDTLogic = "None";
00157 fDTSeparation = kFoam;
00158 fPeekMax = kTRUE;
00159
00160 fKernel = kNone;
00161 fTargetSelection= kMean;
00162
00163 fCompress = kTRUE;
00164 fMultiTargetRegression = kFALSE;
00165
00166 for (UInt_t i=0; i<fFoam.size(); i++)
00167 if (fFoam.at(i)) delete fFoam.at(i);
00168 fFoam.clear();
00169
00170 if (fUseYesNoCell)
00171 SetSignalReferenceCut( 0.0 );
00172 else
00173 SetSignalReferenceCut( 0.5 );
00174 }
00175
00176
00177 void TMVA::MethodPDEFoam::DeclareOptions()
00178 {
00179
00180
00181
00182 DeclareOptionRef( fSigBgSeparated = kFALSE, "SigBgSeparate", "Separate foams for signal and background" );
00183 DeclareOptionRef( fFrac = 0.001, "TailCut", "Fraction of outlier events that are excluded from the foam in each dimension" );
00184 DeclareOptionRef( fVolFracInv = 1./30., "VolFrac", "Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
00185 DeclareOptionRef( fnActiveCells = 500, "nActiveCells", "Maximum number of active cells to be created by the foam");
00186 DeclareOptionRef( fnSampl = 2000, "nSampl", "Number of generated MC events per cell");
00187 DeclareOptionRef( fnBin = 5, "nBin", "Number of bins in edge histograms");
00188 DeclareOptionRef( fCompress = kTRUE, "Compress", "Compress foam output file");
00189 DeclareOptionRef( fMultiTargetRegression = kFALSE, "MultiTargetRegression", "Do regression with multiple targets");
00190 DeclareOptionRef( fNmin = 100, "Nmin", "Number of events in cell required to split cell");
00191 DeclareOptionRef( fMaxDepth = 0, "MaxDepth", "Maximum depth of cell tree (0=unlimited)");
00192 DeclareOptionRef( fFillFoamWithOrigWeights = kFALSE, "FillFoamWithOrigWeights", "Fill foam with original or boost weights");
00193 DeclareOptionRef( fUseYesNoCell = kFALSE, "UseYesNoCell", "Return -1 or 1 for bkg or signal like events");
00194 DeclareOptionRef( fDTLogic = "None", "DTLogic", "Use decision tree algorithm to split cells");
00195 AddPreDefVal(TString("None"));
00196 AddPreDefVal(TString("GiniIndex"));
00197 AddPreDefVal(TString("MisClassificationError"));
00198 AddPreDefVal(TString("CrossEntropy"));
00199 DeclareOptionRef( fPeekMax = kTRUE, "PeekMax", "Peek up cell with max. driver integral for the next split");
00200
00201 DeclareOptionRef( fKernelStr = "None", "Kernel", "Kernel type used");
00202 AddPreDefVal(TString("None"));
00203 AddPreDefVal(TString("Gauss"));
00204 AddPreDefVal(TString("LinNeighbors"));
00205 DeclareOptionRef( fTargetSelectionStr = "Mean", "TargetSelection", "Target selection method");
00206 AddPreDefVal(TString("Mean"));
00207 AddPreDefVal(TString("Mpv"));
00208 }
00209
00210
00211 void TMVA::MethodPDEFoam::DeclareCompatibilityOptions() {
00212 MethodBase::DeclareCompatibilityOptions();
00213 DeclareOptionRef(fCutNmin = kTRUE, "CutNmin", "Requirement for minimal number of events in cell");
00214 }
00215
00216
00217 void TMVA::MethodPDEFoam::ProcessOptions()
00218 {
00219
00220 if (!(fFrac>=0. && fFrac<=1.)) {
00221 Log() << kWARNING << "TailCut not in [0.,1] ==> using 0.001 instead" << Endl;
00222 fFrac = 0.001;
00223 }
00224
00225 if (fnActiveCells < 1) {
00226 Log() << kWARNING << "invalid number of active cells specified: "
00227 << fnActiveCells << "; setting nActiveCells=2" << Endl;
00228 fnActiveCells = 2;
00229 }
00230 fnCells = fnActiveCells*2-1;
00231
00232 fVolFrac = 1./fVolFracInv;
00233
00234
00235 if (fSigBgSeparated && fDTLogic != "None") {
00236 Log() << kWARNING << "Decision tree logic works only for a single foam (SigBgSeparate=F)" << Endl;
00237 fDTLogic = "None";
00238 fDTSeparation = kFoam;
00239 }
00240
00241
00242 if (fDTLogic == "None")
00243 fDTSeparation = kFoam;
00244 else if (fDTLogic == "GiniIndex")
00245 fDTSeparation = kGiniIndex;
00246 else if (fDTLogic == "MisClassificationError")
00247 fDTSeparation = kMisClassificationError;
00248 else if (fDTLogic == "CrossEntropy")
00249 fDTSeparation = kCrossEntropy;
00250 else {
00251 Log() << kWARNING << "Unknown separation type: " << fDTLogic
00252 << ", setting to None" << Endl;
00253 fDTLogic = "None";
00254 fDTSeparation = kFoam;
00255 }
00256
00257 if (fKernelStr == "None" ) fKernel = kNone;
00258 else if (fKernelStr == "Gauss" ) fKernel = kGaus;
00259 else if (fKernelStr == "LinNeighbors") fKernel = kLinN;
00260
00261 if (fTargetSelectionStr == "Mean" ) fTargetSelection = kMean;
00262 else fTargetSelection = kMpv;
00263 }
00264
00265
00266 TMVA::MethodPDEFoam::~MethodPDEFoam( void )
00267 {
00268
00269 for (UInt_t i=0; i<fFoam.size(); i++) {
00270 if (fFoam.at(i)) delete fFoam.at(i);
00271 }
00272 fFoam.clear();
00273 }
00274
00275
00276 void TMVA::MethodPDEFoam::CalcXminXmax()
00277 {
00278
00279
00280
00281 fXmin.clear();
00282 fXmax.clear();
00283 UInt_t kDim = GetNvar();
00284 UInt_t tDim = Data()->GetNTargets();
00285 UInt_t vDim = Data()->GetNVariables();
00286 if (fMultiTargetRegression)
00287 kDim += tDim;
00288
00289 Double_t *xmin = new Double_t[kDim];
00290 Double_t *xmax = new Double_t[kDim];
00291
00292
00293 for (UInt_t dim=0; dim<kDim; dim++) {
00294 xmin[dim] = 1.e100;
00295 xmax[dim] = -1.e100;
00296 }
00297
00298 Log() << kDEBUG << "Number of training events: " << Data()->GetNTrainingEvents() << Endl;
00299 Int_t nevoutside = (Int_t)((Data()->GetNTrainingEvents())*(fFrac));
00300 Int_t rangehistbins = 10000;
00301
00302
00303
00304 for (Long64_t i=0; i<(GetNEvents()); i++) {
00305 const Event* ev = GetEvent(i);
00306 for (UInt_t dim=0; dim<kDim; dim++) {
00307 Double_t val;
00308 if (fMultiTargetRegression) {
00309 if (dim < vDim)
00310 val = ev->GetValue(dim);
00311 else
00312 val = ev->GetTarget(dim-vDim);
00313 }
00314 else
00315 val = ev->GetValue(dim);
00316
00317 if (val<xmin[dim])
00318 xmin[dim] = val;
00319 if (val>xmax[dim])
00320 xmax[dim] = val;
00321 }
00322 }
00323
00324
00325
00326
00327 TH1F **range_h = new TH1F*[kDim];
00328 for (UInt_t dim=0; dim<kDim; dim++) {
00329 range_h[dim] = new TH1F(Form("range%i", dim), "range", rangehistbins, xmin[dim], xmax[dim]);
00330 }
00331
00332
00333 for (Long64_t i=0; i<GetNEvents(); i++) {
00334 const Event* ev = GetEvent(i);
00335 for (UInt_t dim=0; dim<kDim; dim++) {
00336 if (fMultiTargetRegression) {
00337 if (dim < vDim)
00338 range_h[dim]->Fill(ev->GetValue(dim));
00339 else
00340 range_h[dim]->Fill(ev->GetTarget(dim-vDim));
00341 }
00342 else
00343 range_h[dim]->Fill(ev->GetValue(dim));
00344 }
00345 }
00346
00347
00348 for (UInt_t dim=0; dim<kDim; dim++) {
00349 for (Int_t i=1; i<(rangehistbins+1); i++) {
00350 if (range_h[dim]->Integral(0, i) > nevoutside) {
00351 xmin[dim]=range_h[dim]->GetBinLowEdge(i);
00352 break;
00353 }
00354 }
00355 for (Int_t i=rangehistbins; i>0; i--) {
00356 if (range_h[dim]->Integral(i, (rangehistbins+1)) > nevoutside) {
00357 xmax[dim]=range_h[dim]->GetBinLowEdge(i+1);
00358 break;
00359 }
00360 }
00361 }
00362
00363
00364
00365 fXmin.clear();
00366 fXmax.clear();
00367 for (UInt_t dim=0; dim<kDim; dim++) {
00368 fXmin.push_back(xmin[dim]);
00369 fXmax.push_back(xmax[dim]);
00370 }
00371
00372
00373 delete[] xmin;
00374 delete[] xmax;
00375
00376
00377 for (UInt_t dim=0; dim<kDim; dim++)
00378 delete range_h[dim];
00379 delete[] range_h;
00380
00381 return;
00382 }
00383
00384
00385 void TMVA::MethodPDEFoam::Train( void )
00386 {
00387
00388
00389 Log() << kVERBOSE << "Calculate Xmin and Xmax for every dimension" << Endl;
00390 CalcXminXmax();
00391
00392
00393 for (UInt_t i=0; i<fFoam.size(); i++)
00394 if (fFoam.at(i)) delete fFoam.at(i);
00395 fFoam.clear();
00396
00397
00398 if (DoRegression()) {
00399 if (fMultiTargetRegression)
00400 TrainMultiTargetRegression();
00401 else
00402 TrainMonoTargetRegression();
00403 }
00404 else {
00405 if (DataInfo().GetNormalization() != "EQUALNUMEVENTS" ) {
00406 Log() << kINFO << "NormMode=" << DataInfo().GetNormalization()
00407 << " chosen. Note that only NormMode=EqualNumEvents"
00408 << " ensures that Discriminant values correspond to"
00409 << " signal probabilities." << Endl;
00410 }
00411
00412 Log() << kDEBUG << "N_sig for training events: " << Data()->GetNEvtSigTrain() << Endl;
00413 Log() << kDEBUG << "N_bg for training events: " << Data()->GetNEvtBkgdTrain() << Endl;
00414 Log() << kDEBUG << "User normalization: " << DataInfo().GetNormalization().Data() << Endl;
00415
00416 if (fSigBgSeparated)
00417 TrainSeparatedClassification();
00418 else
00419 TrainUnifiedClassification();
00420 }
00421
00422
00423
00424 for(UInt_t i=0; i<fFoam.size(); i++) {
00425 Log() << kVERBOSE << "Check all cells and remove cells with volume 0" << Endl;
00426 fFoam.at(i)->CheckCells(true);
00427 if(fFoam.at(i)) fFoam.at(i)->DeleteBinarySearchTree();
00428 }
00429 }
00430
00431
00432 void TMVA::MethodPDEFoam::TrainSeparatedClassification()
00433 {
00434
00435
00436
00437 TString foamcaption[2];
00438 foamcaption[0] = "SignalFoam";
00439 foamcaption[1] = "BgFoam";
00440
00441 for(int i=0; i<2; i++) {
00442
00443 fFoam.push_back( new PDEFoam(foamcaption[i]) );
00444 InitFoam(fFoam.back(), kSeparate);
00445
00446 Log() << kVERBOSE << "Filling binary search tree of " << foamcaption[i]
00447 << " with events" << Endl;
00448
00449 for (Long64_t k=0; k<GetNEvents(); k++) {
00450 const Event* ev = GetEvent(k);
00451 if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
00452 fFoam.back()->FillBinarySearchTree(ev, IgnoreEventsWithNegWeightsInTraining());
00453 }
00454
00455 Log() << kINFO << "Build up " << foamcaption[i] << Endl;
00456 fFoam.back()->Create();
00457
00458 Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00459
00460 for (Long64_t k=0; k<GetNEvents(); k++) {
00461 const Event* ev = GetEvent(k);
00462 if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
00463 fFoam.back()->FillFoamCells(ev, IgnoreEventsWithNegWeightsInTraining());
00464 }
00465 }
00466 }
00467
00468
00469 void TMVA::MethodPDEFoam::TrainUnifiedClassification()
00470 {
00471
00472
00473
00474 fFoam.push_back( new PDEFoam("DiscrFoam") );
00475 InitFoam(fFoam.back(), kDiscr);
00476
00477 Log() << kVERBOSE << "Filling binary search tree of discriminator foam with events" << Endl;
00478
00479 for (Long64_t k=0; k<GetNEvents(); k++)
00480 fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00481
00482 Log() << kINFO << "Build up discriminator foam" << Endl;
00483 fFoam.back()->Create();
00484
00485 Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00486
00487 for (UInt_t k=0; k<GetNEvents(); k++)
00488 fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00489
00490 Log() << kVERBOSE << "Calculate cell discriminator"<< Endl;
00491
00492 fFoam.back()->CalcCellDiscr();
00493 }
00494
00495
00496 void TMVA::MethodPDEFoam::TrainMonoTargetRegression()
00497 {
00498
00499
00500
00501
00502
00503 if (Data()->GetNTargets() < 1) {
00504 Log() << kFATAL << "Error: number of targets = " << Data()->GetNTargets() << Endl;
00505 return;
00506 }
00507 else if (Data()->GetNTargets() > 1) {
00508 Log() << kWARNING << "Warning: number of targets = " << Data()->GetNTargets()
00509 << " --> using only first target" << Endl;
00510 }
00511 else
00512 Log() << kDEBUG << "MethodPDEFoam: number of Targets: " << Data()->GetNTargets() << Endl;
00513
00514 TString foamcaption = "MonoTargetRegressionFoam";
00515 fFoam.push_back( new PDEFoam(foamcaption) );
00516 InitFoam(fFoam.back(), kMonoTarget);
00517
00518 Log() << kVERBOSE << "Filling binary search tree with events" << Endl;
00519
00520 for (Long64_t k=0; k<GetNEvents(); k++)
00521 fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00522
00523 Log() << kINFO << "Build mono target regression foam" << Endl;
00524 fFoam.back()->Create();
00525
00526 Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00527
00528 for (UInt_t k=0; k<GetNEvents(); k++)
00529 fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00530
00531 Log() << kVERBOSE << "Calculate average cell targets"<< Endl;
00532
00533 fFoam.back()->CalcCellTarget();
00534 }
00535
00536
00537 void TMVA::MethodPDEFoam::TrainMultiTargetRegression()
00538 {
00539
00540
00541
00542
00543
00544 Log() << kDEBUG << "Number of variables: " << Data()->GetNVariables() << Endl;
00545 Log() << kDEBUG << "Number of Targets: " << Data()->GetNTargets() << Endl;
00546 Log() << kDEBUG << "Dimension of foam: " << Data()->GetNVariables()+Data()->GetNTargets() << Endl;
00547 if (fKernel==kLinN)
00548 Log() << kFATAL << "LinNeighbors kernel currently not supported"
00549 << " for multi target regression" << Endl;
00550
00551 TString foamcaption = "MultiTargetRegressionFoam";
00552 fFoam.push_back( new PDEFoam(foamcaption) );
00553 InitFoam(fFoam.back(), kMultiTarget);
00554
00555 Log() << kVERBOSE << "Filling binary search tree of multi target regression foam with events"
00556 << Endl;
00557
00558 for (Long64_t k=0; k<GetNEvents(); k++)
00559 fFoam.back()->FillBinarySearchTree(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00560
00561 Log() << kINFO << "Build multi target regression foam" << Endl;
00562 fFoam.back()->Create();
00563
00564 Log() << kVERBOSE << "Filling foam cells with events" << Endl;
00565
00566 for (UInt_t k=0; k<GetNEvents(); k++)
00567 fFoam.back()->FillFoamCells(GetEvent(k), IgnoreEventsWithNegWeightsInTraining());
00568 }
00569
00570
00571 Double_t TMVA::MethodPDEFoam::GetMvaValue( Double_t* err, Double_t* errUpper )
00572 {
00573
00574
00575
00576
00577
00578
00579 const Event* ev = GetEvent();
00580 Double_t discr = 0.;
00581 Double_t discr_error = 0.;
00582
00583 if (fSigBgSeparated) {
00584 std::vector<Float_t> xvec = ev->GetValues();
00585
00586 Double_t density_sig = 0.;
00587 Double_t density_bg = 0.;
00588
00589 density_sig = fFoam.at(0)->GetCellDensity(xvec, fKernel);
00590 density_bg = fFoam.at(1)->GetCellDensity(xvec, fKernel);
00591
00592
00593 if ( (density_sig+density_bg) > 0 )
00594 discr = density_sig/(density_sig+density_bg);
00595 else
00596 discr = 0.5;
00597
00598
00599 Double_t neventsB = fFoam.at(1)->GetCellValue(xvec, kNev);
00600 Double_t neventsS = fFoam.at(0)->GetCellValue(xvec, kNev);
00601 Double_t scaleB = 1.;
00602 Double_t errorS = TMath::Sqrt(neventsS);
00603 Double_t errorB = TMath::Sqrt(neventsB);
00604
00605 if (neventsS == 0)
00606 errorS = 1.;
00607 if (neventsB == 0)
00608 errorB = 1.;
00609
00610 if ( (neventsS>1e-10) || (neventsB>1e-10) )
00611 discr_error = TMath::Sqrt( Sqr ( scaleB*neventsB
00612 / Sqr(neventsS+scaleB*neventsB)
00613 * errorS) +
00614 Sqr ( scaleB*neventsS
00615 / Sqr(neventsS+scaleB*neventsB)
00616 * errorB) );
00617 else discr_error = 1.;
00618
00619 if (discr_error < 1e-10) discr_error = 1.;
00620 }
00621 else {
00622 std::vector<Float_t> xvec = ev->GetValues();
00623
00624
00625 discr = fFoam.at(0)->GetCellDiscr(xvec, fKernel);
00626 discr_error = fFoam.at(0)->GetCellValue(xvec, kDiscriminatorError);
00627 }
00628
00629
00630 if (err != 0) *err = discr_error;
00631 if (errUpper != 0) *errUpper = discr_error;
00632
00633 if (fUseYesNoCell)
00634 return (discr < 0.5 ? -1 : 1);
00635 else
00636 return discr;
00637 }
00638
00639
00640 void TMVA::MethodPDEFoam::SetXminXmax( TMVA::PDEFoam *pdefoam )
00641 {
00642
00643
00644 if (!pdefoam){
00645 Log() << kFATAL << "Null pointer given!" << Endl;
00646 return;
00647 }
00648
00649 UInt_t num_vars = GetNvar();
00650 if (fMultiTargetRegression)
00651 num_vars += Data()->GetNTargets();
00652
00653 for (UInt_t idim=0; idim<num_vars; idim++) {
00654 Log()<< kDEBUG << "foam: SetXmin[dim="<<idim<<"]: " << fXmin.at(idim) << Endl;
00655 Log()<< kDEBUG << "foam: SetXmax[dim="<<idim<<"]: " << fXmax.at(idim) << Endl;
00656 pdefoam->SetXmin(idim, fXmin.at(idim));
00657 pdefoam->SetXmax(idim, fXmax.at(idim));
00658 }
00659 }
00660
00661
00662 void TMVA::MethodPDEFoam::InitFoam(TMVA::PDEFoam *pdefoam, EFoamType ft)
00663 {
00664
00665
00666
00667 if (!pdefoam){
00668 Log() << kFATAL << "Null pointer given!" << Endl;
00669 return;
00670 }
00671
00672
00673 pdefoam->Log().SetMinType(this->Log().GetMinType());
00674
00675
00676 pdefoam->SetFoamType(ft);
00677
00678
00679 if (ft==kMultiTarget)
00680
00681 pdefoam->SetDim( Data()->GetNTargets()+Data()->GetNVariables());
00682 else
00683 pdefoam->SetDim( GetNvar());
00684 pdefoam->SetVolumeFraction(fVolFrac);
00685 pdefoam->SetnCells( fnCells);
00686 pdefoam->SetnSampl( fnSampl);
00687 pdefoam->SetnBin( fnBin);
00688 pdefoam->SetEvPerBin( fEvPerBin);
00689 pdefoam->SetFillFoamWithOrigWeights(fFillFoamWithOrigWeights);
00690 pdefoam->SetDTSeparation(fDTSeparation);
00691 pdefoam->SetPeekMax(fPeekMax);
00692
00693
00694 pdefoam->SetNmin(fNmin);
00695 pdefoam->SetMaxDepth(fMaxDepth);
00696
00697
00698 pdefoam->Init();
00699
00700
00701 SetXminXmax(pdefoam);
00702 }
00703
00704
00705 const std::vector<Float_t>& TMVA::MethodPDEFoam::GetRegressionValues()
00706 {
00707
00708
00709 if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>();
00710 fRegressionReturnVal->clear();
00711
00712 const Event* ev = GetEvent();
00713 std::vector<Float_t> vals = ev->GetValues();
00714
00715 if (vals.size() == 0) {
00716 Log() << kWARNING << "<GetRegressionValues> value vector has size 0. " << Endl;
00717 }
00718
00719 if (fMultiTargetRegression) {
00720 std::vector<Float_t> targets = fFoam.at(0)->GetProjectedRegValue(vals, fKernel, fTargetSelection);
00721 for(UInt_t i=0; i<(Data()->GetNTargets()); i++)
00722 fRegressionReturnVal->push_back(targets.at(i));
00723 }
00724 else {
00725 fRegressionReturnVal->push_back(fFoam.at(0)->GetCellRegValue0(vals, fKernel));
00726 }
00727
00728
00729 Event * evT = new Event(*ev);
00730 for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
00731 evT->SetTarget(itgt, fRegressionReturnVal->at(itgt) );
00732 }
00733 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
00734 fRegressionReturnVal->clear();
00735 for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
00736 fRegressionReturnVal->push_back( evT2->GetTarget(itgt) );
00737 }
00738
00739 delete evT;
00740
00741 return (*fRegressionReturnVal);
00742 }
00743
00744
00745 void TMVA::MethodPDEFoam::PrintCoefficients( void )
00746 {}
00747
00748
00749 void TMVA::MethodPDEFoam::AddWeightsXMLTo( void* parent ) const
00750 {
00751
00752
00753 void* wght = gTools().AddChild(parent, "Weights");
00754 gTools().AddAttr( wght, "SigBgSeparated", fSigBgSeparated );
00755 gTools().AddAttr( wght, "Frac", fFrac );
00756 gTools().AddAttr( wght, "DiscrErrCut", fDiscrErrCut );
00757 gTools().AddAttr( wght, "VolFrac", fVolFrac );
00758 gTools().AddAttr( wght, "nCells", fnCells );
00759 gTools().AddAttr( wght, "nSampl", fnSampl );
00760 gTools().AddAttr( wght, "nBin", fnBin );
00761 gTools().AddAttr( wght, "EvPerBin", fEvPerBin );
00762 gTools().AddAttr( wght, "Compress", fCompress );
00763 gTools().AddAttr( wght, "DoRegression", DoRegression() );
00764 gTools().AddAttr( wght, "CutNmin", fNmin>0 );
00765 gTools().AddAttr( wght, "Nmin", fNmin );
00766 gTools().AddAttr( wght, "CutRMSmin", false );
00767 gTools().AddAttr( wght, "RMSmin", 0.0 );
00768 gTools().AddAttr( wght, "Kernel", KernelToUInt(fKernel) );
00769 gTools().AddAttr( wght, "TargetSelection", TargetSelectionToUInt(fTargetSelection) );
00770 gTools().AddAttr( wght, "FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
00771 gTools().AddAttr( wght, "UseYesNoCell", fUseYesNoCell );
00772
00773
00774 void *xmin_wrap;
00775 for (UInt_t i=0; i<fXmin.size(); i++){
00776 xmin_wrap = gTools().AddChild( wght, "Xmin" );
00777 gTools().AddAttr( xmin_wrap, "Index", i );
00778 gTools().AddAttr( xmin_wrap, "Value", fXmin.at(i) );
00779 }
00780 void *xmax_wrap;
00781 for (UInt_t i=0; i<fXmax.size(); i++){
00782 xmax_wrap = gTools().AddChild( wght, "Xmax" );
00783 gTools().AddAttr( xmax_wrap, "Index", i );
00784 gTools().AddAttr( xmax_wrap, "Value", fXmax.at(i) );
00785 }
00786
00787
00788 WriteFoamsToFile();
00789 }
00790
00791
00792 void TMVA::MethodPDEFoam::WriteFoamsToFile() const
00793 {
00794
00795
00796
00797 FillVariableNamesToFoam();
00798
00799 TString rfname( GetWeightFileName() );
00800
00801
00802 rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );
00803
00804
00805 rfname.ReplaceAll( ".xml", "_foams.root" );
00806
00807 TFile *rootFile = 0;
00808 if (fCompress) rootFile = new TFile(rfname, "RECREATE", "foamfile", 9);
00809 else rootFile = new TFile(rfname, "RECREATE");
00810
00811 fFoam.at(0)->Write(fFoam.at(0)->GetFoamName().Data());
00812 if (!DoRegression() && fSigBgSeparated)
00813 fFoam.at(1)->Write(fFoam.at(1)->GetFoamName().Data());
00814 rootFile->Close();
00815 Log() << kINFO << "Foams written to file: "
00816 << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
00817 }
00818
00819
00820 void TMVA::MethodPDEFoam::ReadWeightsFromStream( istream& istr )
00821 {
00822
00823
00824 istr >> fSigBgSeparated;
00825 istr >> fFrac;
00826 istr >> fDiscrErrCut;
00827 istr >> fVolFrac;
00828 istr >> fnCells;
00829 istr >> fnSampl;
00830 istr >> fnBin;
00831 istr >> fEvPerBin;
00832 istr >> fCompress;
00833
00834 Bool_t regr;
00835 istr >> regr;
00836 SetAnalysisType( (regr ? Types::kRegression : Types::kClassification ) );
00837
00838 Bool_t CutNmin, CutRMSmin;
00839 Float_t RMSmin;
00840 istr >> CutNmin;
00841 istr >> fNmin;
00842 istr >> CutRMSmin;
00843 istr >> RMSmin;
00844
00845 UInt_t ker = 0;
00846 istr >> ker;
00847 fKernel = UIntToKernel(ker);
00848
00849 UInt_t ts = 0;
00850 istr >> ts;
00851 fTargetSelection = UIntToTargetSelection(ts);
00852
00853 istr >> fFillFoamWithOrigWeights;
00854 istr >> fUseYesNoCell;
00855
00856
00857 fXmin.clear();
00858 fXmax.clear();
00859 UInt_t kDim = GetNvar();
00860 if (fMultiTargetRegression)
00861 kDim += Data()->GetNTargets();
00862
00863 for (UInt_t i=0; i<kDim; i++) {
00864 fXmin.push_back(0.);
00865 fXmax.push_back(0.);
00866 }
00867
00868 for (UInt_t i=0; i<kDim; i++)
00869 istr >> fXmin.at(i);
00870 for (UInt_t i=0; i<kDim; i++)
00871 istr >> fXmax.at(i);
00872
00873
00874 ReadFoamsFromFile();
00875 }
00876
00877
00878 void TMVA::MethodPDEFoam::ReadWeightsFromXML( void* wghtnode )
00879 {
00880
00881
00882 gTools().ReadAttr( wghtnode, "SigBgSeparated", fSigBgSeparated );
00883 gTools().ReadAttr( wghtnode, "Frac", fFrac );
00884 gTools().ReadAttr( wghtnode, "DiscrErrCut", fDiscrErrCut );
00885 gTools().ReadAttr( wghtnode, "VolFrac", fVolFrac );
00886 gTools().ReadAttr( wghtnode, "nCells", fnCells );
00887 gTools().ReadAttr( wghtnode, "nSampl", fnSampl );
00888 gTools().ReadAttr( wghtnode, "nBin", fnBin );
00889 gTools().ReadAttr( wghtnode, "EvPerBin", fEvPerBin );
00890 gTools().ReadAttr( wghtnode, "Compress", fCompress );
00891 Bool_t regr;
00892 gTools().ReadAttr( wghtnode, "DoRegression", regr );
00893 SetAnalysisType( (regr ? Types::kRegression : Types::kClassification ) );
00894 Bool_t CutNmin;
00895 gTools().ReadAttr( wghtnode, "CutNmin", CutNmin );
00896 gTools().ReadAttr( wghtnode, "Nmin", fNmin );
00897 Bool_t CutRMSmin;
00898 Float_t RMSmin;
00899 gTools().ReadAttr( wghtnode, "CutRMSmin", CutRMSmin );
00900 gTools().ReadAttr( wghtnode, "RMSmin", RMSmin );
00901 UInt_t ker = 0;
00902 gTools().ReadAttr( wghtnode, "Kernel", ker );
00903 fKernel = UIntToKernel(ker);
00904 UInt_t ts = 0;
00905 gTools().ReadAttr( wghtnode, "TargetSelection", ts );
00906 fTargetSelection = UIntToTargetSelection(ts);
00907 if (gTools().HasAttr(wghtnode, "FillFoamWithOrigWeights"))
00908 gTools().ReadAttr( wghtnode, "FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
00909 if (gTools().HasAttr(wghtnode, "UseYesNoCell"))
00910 gTools().ReadAttr( wghtnode, "UseYesNoCell", fUseYesNoCell );
00911
00912
00913 fXmin.clear();
00914 fXmax.clear();
00915 UInt_t kDim = GetNvar();
00916 if (fMultiTargetRegression)
00917 kDim += Data()->GetNTargets();
00918
00919 for (UInt_t i=0; i<kDim; i++) {
00920 fXmin.push_back(0.);
00921 fXmax.push_back(0.);
00922 }
00923
00924
00925 void *xmin_wrap = gTools().GetChild( wghtnode );
00926 for (UInt_t counter=0; counter<kDim; counter++) {
00927 UInt_t i=0;
00928 gTools().ReadAttr( xmin_wrap , "Index", i );
00929 if (i>=kDim)
00930 Log() << kFATAL << "dimension index out of range:" << i << Endl;
00931 gTools().ReadAttr( xmin_wrap , "Value", fXmin.at(i) );
00932 xmin_wrap = gTools().GetNextChild( xmin_wrap );
00933 }
00934
00935 void *xmax_wrap = xmin_wrap;
00936 for (UInt_t counter=0; counter<kDim; counter++) {
00937 UInt_t i=0;
00938 gTools().ReadAttr( xmax_wrap , "Index", i );
00939 if (i>=kDim)
00940 Log() << kFATAL << "dimension index out of range:" << i << Endl;
00941 gTools().ReadAttr( xmax_wrap , "Value", fXmax.at(i) );
00942 xmax_wrap = gTools().GetNextChild( xmax_wrap );
00943 }
00944
00945
00946 for (UInt_t i=0; i<fFoam.size(); i++)
00947 if (fFoam.at(i)) delete fFoam.at(i);
00948 fFoam.clear();
00949
00950
00951 ReadFoamsFromFile();
00952 }
00953
00954
00955 void TMVA::MethodPDEFoam::ReadFoamsFromFile()
00956 {
00957
00958
00959 TString rfname( GetWeightFileName() );
00960
00961
00962 rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );
00963
00964
00965 rfname.ReplaceAll( ".xml", "_foams.root" );
00966
00967 Log() << kINFO << "Read foams from file: " << gTools().Color("lightblue")
00968 << rfname << gTools().Color("reset") << Endl;
00969 TFile *rootFile = new TFile( rfname, "READ" );
00970 if (rootFile->IsZombie()) Log() << kFATAL << "Cannot open file \"" << rfname << "\"" << Endl;
00971
00972
00973 if (DoRegression()) {
00974 if (fMultiTargetRegression)
00975 fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("MultiTargetRegressionFoam")) );
00976 else
00977 fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("MonoTargetRegressionFoam")) );
00978 }
00979 else {
00980 if (fSigBgSeparated) {
00981 fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("SignalFoam")) );
00982 fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("BgFoam")) );
00983 }
00984 else
00985 fFoam.push_back( dynamic_cast<PDEFoam*>(rootFile->Get("DiscrFoam")) );
00986 }
00987 if (!fFoam.at(0) || (!DoRegression() && fSigBgSeparated && !fFoam.at(1)))
00988 Log() << kFATAL << "Could not load foam!" << Endl;
00989 }
00990
00991
00992 TMVA::EKernel TMVA::MethodPDEFoam::UIntToKernel(UInt_t iker)
00993 {
00994
00995 switch(iker) {
00996 case 0: return kNone;
00997 case 1: return kGaus;
00998 case 2: return kLinN;
00999 default:
01000 Log() << kWARNING << "<UIntToKernel>: unknown kernel number: " << iker << Endl;
01001 return kNone;
01002 }
01003 return kNone;
01004 }
01005
01006
01007 TMVA::ETargetSelection TMVA::MethodPDEFoam::UIntToTargetSelection(UInt_t its)
01008 {
01009
01010 switch(its) {
01011 case 0: return kMean;
01012 case 1: return kMpv;
01013 default:
01014 Log() << kWARNING << "<UIntToTargetSelection>: unknown method TargetSelection: " << its << Endl;
01015 return kMean;
01016 }
01017 return kMean;
01018 }
01019
01020
01021 void TMVA::MethodPDEFoam::FillVariableNamesToFoam() const
01022 {
01023
01024 for (UInt_t ifoam=0; ifoam<fFoam.size(); ifoam++) {
01025 for (Int_t idim=0; idim<fFoam.at(ifoam)->GetTotDim(); idim++) {
01026 if(fMultiTargetRegression && (UInt_t)idim>=DataInfo().GetNVariables())
01027 fFoam.at(ifoam)->AddVariableName(DataInfo().GetTargetInfo(idim-DataInfo().GetNVariables()).GetExpression().Data());
01028 else
01029 fFoam.at(ifoam)->AddVariableName(DataInfo().GetVariableInfo(idim).GetExpression().Data());
01030 }
01031 }
01032 }
01033
01034
01035 void TMVA::MethodPDEFoam::MakeClassSpecific( std::ostream& , const TString& ) const
01036 {
01037
01038 }
01039
01040
01041 void TMVA::MethodPDEFoam::GetHelpMessage() const
01042 {
01043
01044 Log() << Endl;
01045 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
01046 Log() << Endl;
01047 Log() << "PDE-Foam is a variation of the PDE-RS method using a self-adapting" << Endl;
01048 Log() << "binning method to divide the multi-dimensional variable space into a" << Endl;
01049 Log() << "finite number of hyper-rectangles (cells). The binning algorithm " << Endl;
01050 Log() << "adjusts the size and position of a predefined number of cells such" << Endl;
01051 Log() << "that the variance of the signal and background densities inside the " << Endl;
01052 Log() << "cells reaches a minimum" << Endl;
01053 Log() << Endl;
01054 Log() << gTools().Color("bold") << "--- Use of booking options:" << gTools().Color("reset") << Endl;
01055 Log() << Endl;
01056 Log() << "The PDEFoam classifier supports two different algorithms: " << Endl;
01057 Log() << Endl;
01058 Log() << " (1) Create one foam, which stores the signal over background" << Endl;
01059 Log() << " probability density. During foam buildup the variance of the" << Endl;
01060 Log() << " discriminant inside the cells is minimised." << Endl;
01061 Log() << Endl;
01062 Log() << " Booking option: SigBgSeparated=F" << Endl;
01063 Log() << Endl;
01064 Log() << " (2) Create two separate foams, one for the signal events and one for" << Endl;
01065 Log() << " background events. During foam buildup the variance of the" << Endl;
01066 Log() << " event density inside the cells is minimised separately for" << Endl;
01067 Log() << " signal and background." << Endl;
01068 Log() << Endl;
01069 Log() << " Booking option: SigBgSeparated=T" << Endl;
01070 Log() << Endl;
01071 Log() << "The following options can be set (the listed values are found to be a" << Endl;
01072 Log() << "good starting point for most applications):" << Endl;
01073 Log() << Endl;
01074 Log() << " SigBgSeparate False Separate Signal and Background" << Endl;
01075 Log() << " TailCut 0.001 Fraction of outlier events that excluded" << Endl;
01076 Log() << " from the foam in each dimension " << Endl;
01077 Log() << " VolFrac 0.0333 Volume fraction (used for density calculation" << Endl;
01078 Log() << " during foam build-up) " << Endl;
01079 Log() << " nActiveCells 500 Maximal number of active cells in final foam " << Endl;
01080 Log() << " nSampl 2000 Number of MC events per cell in foam build-up " << Endl;
01081 Log() << " nBin 5 Number of bins used in foam build-up " << Endl;
01082 Log() << " Nmin 100 Number of events in cell required to split cell" << Endl;
01083 Log() << " Kernel None Kernel type used (possible valuses are: None," << Endl;
01084 Log() << " Gauss)" << Endl;
01085 Log() << " Compress True Compress foam output file " << Endl;
01086 Log() << Endl;
01087 Log() << " Additional regression options:" << Endl;
01088 Log() << Endl;
01089 Log() << "MultiTargetRegression False Do regression with multiple targets " << Endl;
01090 Log() << " TargetSelection Mean Target selection method (possible valuses are: " << Endl;
01091 Log() << " Mean, Mpv)" << Endl;
01092 Log() << Endl;
01093 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
01094 Log() << Endl;
01095 Log() << "The performance of the two implementations was found to be similar for" << Endl;
01096 Log() << "most examples studied. For the same number of cells per foam, the two-" << Endl;
01097 Log() << "foam option approximately doubles the amount of computer memory needed" << Endl;
01098 Log() << "during classification. For special cases where the event-density" << Endl;
01099 Log() << "distribution of signal and background events is very different, the" << Endl;
01100 Log() << "two-foam option was found to perform significantly better than the" << Endl;
01101 Log() << "option with only one foam." << Endl;
01102 Log() << Endl;
01103 Log() << "In order to gain better classification performance we recommend to set" << Endl;
01104 Log() << "the parameter \"nActiveCells\" to a high value." << Endl;
01105 Log() << Endl;
01106 Log() << "The parameter \"VolFrac\" specifies the size of the sampling volume" << Endl;
01107 Log() << "during foam buildup and should be tuned in order to achieve optimal" << Endl;
01108 Log() << "performance. A larger box leads to a reduced statistical uncertainty" << Endl;
01109 Log() << "for small training samples and to smoother sampling. A smaller box on" << Endl;
01110 Log() << "the other hand increases the sensitivity to statistical fluctuations" << Endl;
01111 Log() << "in the training samples, but for sufficiently large training samples" << Endl;
01112 Log() << "it will result in a more precise local estimate of the sampled" << Endl;
01113 Log() << "density. In general, higher dimensional problems require larger box" << Endl;
01114 Log() << "sizes, due to the reduced average number of events per box volume. The" << Endl;
01115 Log() << "default value of 0.0333 was optimised for an example with 5" << Endl;
01116 Log() << "observables and training samples of the order of 50000 signal and" << Endl;
01117 Log() << "background events each." << Endl;
01118 Log() << Endl;
01119 Log() << "Furthermore kernel weighting can be activated, which will lead to an" << Endl;
01120 Log() << "additional performance improvement. Note that Gauss weighting will" << Endl;
01121 Log() << "significantly increase the response time of the method. LinNeighbors" << Endl;
01122 Log() << "weighting performs a linear interpolation with direct neighbor cells" << Endl;
01123 Log() << "for each dimension and is much faster than Gauss weighting." << Endl;
01124 Log() << Endl;
01125 Log() << "The classification results were found to be rather insensitive to the" << Endl;
01126 Log() << "values of the parameters \"nSamples\" and \"nBin\"." << Endl;
01127 }