00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071 #include <assert.h>
00072 #include <algorithm>
00073
00074 #include "TFile.h"
00075 #include "TObjString.h"
00076 #include "TMath.h"
00077
00078 #include "TMVA/ClassifierFactory.h"
00079 #include "TMVA/MethodPDERS.h"
00080 #include "TMVA/Tools.h"
00081 #include "TMVA/RootFinder.h"
00082
00083 #define TMVA_MethodPDERS__countByHand__Debug__
00084 #undef TMVA_MethodPDERS__countByHand__Debug__
00085
00086 namespace TMVA {
00087 const Bool_t MethodPDERS_UseFindRoot = kFALSE;
00088 };
00089
00090 TMVA::MethodPDERS* TMVA::MethodPDERS::fgThisPDERS = NULL;
00091
00092 REGISTER_METHOD(PDERS)
00093
00094 ClassImp(TMVA::MethodPDERS)
00095
00096
00097 TMVA::MethodPDERS::MethodPDERS( const TString& jobName,
00098 const TString& methodTitle,
00099 DataSetInfo& theData,
00100 const TString& theOption,
00101 TDirectory* theTargetDir ) :
00102 MethodBase( jobName, Types::kPDERS, methodTitle, theData, theOption, theTargetDir ),
00103 fFcnCall(0),
00104 fVRangeMode(kAdaptive),
00105 fKernelEstimator(kBox),
00106 fDelta(0),
00107 fShift(0),
00108 fScaleS(0),
00109 fScaleB(0),
00110 fDeltaFrac(0),
00111 fGaussSigma(0),
00112 fGaussSigmaNorm(0),
00113 fNRegOut(0),
00114 fNEventsMin(0),
00115 fNEventsMax(0),
00116 fMaxVIterations(0),
00117 fInitialScale(0),
00118 fInitializedVolumeEle(0),
00119 fkNNMin(0),
00120 fkNNMax(0),
00121 fMax_distance(0),
00122 fPrinted(0),
00123 fNormTree(0)
00124 {
00125
00126 }
00127
00128
00129 TMVA::MethodPDERS::MethodPDERS( DataSetInfo& theData,
00130 const TString& theWeightFile,
00131 TDirectory* theTargetDir ) :
00132 MethodBase( Types::kPDERS, theData, theWeightFile, theTargetDir ),
00133 fFcnCall(0),
00134 fVRangeMode(kAdaptive),
00135 fKernelEstimator(kBox),
00136 fDelta(0),
00137 fShift(0),
00138 fScaleS(0),
00139 fScaleB(0),
00140 fDeltaFrac(0),
00141 fGaussSigma(0),
00142 fGaussSigmaNorm(0),
00143 fNRegOut(0),
00144 fNEventsMin(0),
00145 fNEventsMax(0),
00146 fMaxVIterations(0),
00147 fInitialScale(0),
00148 fInitializedVolumeEle(0),
00149 fkNNMin(0),
00150 fkNNMax(0),
00151 fMax_distance(0),
00152 fPrinted(0),
00153 fNormTree(0)
00154 {
00155
00156 }
00157
00158
00159 Bool_t TMVA::MethodPDERS::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
00160 {
00161
00162 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
00163 if (type == Types::kRegression) return kTRUE;
00164 return kFALSE;
00165 }
00166
00167
00168 void TMVA::MethodPDERS::Init( void )
00169 {
00170
00171
00172 fBinaryTree = NULL;
00173
00174 UpdateThis();
00175
00176
00177 fDeltaFrac = 3.0;
00178 fVRangeMode = kAdaptive;
00179 fKernelEstimator = kBox;
00180
00181
00182 fNEventsMin = 100;
00183 fNEventsMax = 200;
00184 fMaxVIterations = 150;
00185 fInitialScale = 0.99;
00186 fGaussSigma = 0.1;
00187 fNormTree = kFALSE;
00188
00189 fkNNMin = Int_t(fNEventsMin);
00190 fkNNMax = Int_t(fNEventsMax);
00191
00192 fInitializedVolumeEle = kFALSE;
00193 fAverageRMS.clear();
00194
00195
00196 SetSignalReferenceCut( 0.0 );
00197 }
00198
00199
00200 TMVA::MethodPDERS::~MethodPDERS( void )
00201 {
00202
00203 if (fDelta) delete fDelta;
00204 if (fShift) delete fShift;
00205
00206 if (NULL != fBinaryTree) delete fBinaryTree;
00207 }
00208
00209
00210 void TMVA::MethodPDERS::DeclareOptions()
00211 {
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243 DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
00244 AddPreDefVal(TString("Unscaled"));
00245 AddPreDefVal(TString("MinMax"));
00246 AddPreDefVal(TString("RMS"));
00247 AddPreDefVal(TString("Adaptive"));
00248 AddPreDefVal(TString("kNN"));
00249
00250 DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
00251 AddPreDefVal(TString("Box"));
00252 AddPreDefVal(TString("Sphere"));
00253 AddPreDefVal(TString("Teepee"));
00254 AddPreDefVal(TString("Gauss"));
00255 AddPreDefVal(TString("Sinc3"));
00256 AddPreDefVal(TString("Sinc5"));
00257 AddPreDefVal(TString("Sinc7"));
00258 AddPreDefVal(TString("Sinc9"));
00259 AddPreDefVal(TString("Sinc11"));
00260 AddPreDefVal(TString("Lanczos2"));
00261 AddPreDefVal(TString("Lanczos3"));
00262 AddPreDefVal(TString("Lanczos5"));
00263 AddPreDefVal(TString("Lanczos8"));
00264 AddPreDefVal(TString("Trim"));
00265
00266 DeclareOptionRef(fDeltaFrac , "DeltaFrac", "nEventsMin/Max for minmax and rms volume range");
00267 DeclareOptionRef(fNEventsMin , "NEventsMin", "nEventsMin for adaptive volume range");
00268 DeclareOptionRef(fNEventsMax , "NEventsMax", "nEventsMax for adaptive volume range");
00269 DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
00270 DeclareOptionRef(fInitialScale , "InitialScale", "InitialScale for adaptive volume range");
00271 DeclareOptionRef(fGaussSigma , "GaussSigma", "Width (wrt volume size) of Gaussian kernel estimator");
00272 DeclareOptionRef(fNormTree , "NormTree", "Normalize binary search tree");
00273 }
00274
00275
00276 void TMVA::MethodPDERS::ProcessOptions()
00277 {
00278
00279
00280 if (IgnoreEventsWithNegWeightsInTraining()) {
00281 Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
00282 << GetMethodTypeName()
00283 << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
00284 << Endl;
00285 }
00286
00287 fGaussSigmaNorm = fGaussSigma;
00288
00289 fVRangeMode = MethodPDERS::kUnsupported;
00290
00291 if (fVolumeRange == "MinMax" ) fVRangeMode = kMinMax;
00292 else if (fVolumeRange == "RMS" ) fVRangeMode = kRMS;
00293 else if (fVolumeRange == "Adaptive" ) fVRangeMode = kAdaptive;
00294 else if (fVolumeRange == "Unscaled" ) fVRangeMode = kUnscaled;
00295 else if (fVolumeRange == "kNN" ) fVRangeMode = kkNN;
00296 else {
00297 Log() << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
00298 }
00299
00300 if (fKernelString == "Box" ) fKernelEstimator = kBox;
00301 else if (fKernelString == "Sphere" ) fKernelEstimator = kSphere;
00302 else if (fKernelString == "Teepee" ) fKernelEstimator = kTeepee;
00303 else if (fKernelString == "Gauss" ) fKernelEstimator = kGauss;
00304 else if (fKernelString == "Sinc3" ) fKernelEstimator = kSinc3;
00305 else if (fKernelString == "Sinc5" ) fKernelEstimator = kSinc5;
00306 else if (fKernelString == "Sinc7" ) fKernelEstimator = kSinc7;
00307 else if (fKernelString == "Sinc9" ) fKernelEstimator = kSinc9;
00308 else if (fKernelString == "Sinc11" ) fKernelEstimator = kSinc11;
00309 else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
00310 else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
00311 else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
00312 else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
00313 else if (fKernelString == "Trim" ) fKernelEstimator = kTrim;
00314 else {
00315 Log() << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
00316 }
00317
00318
00319
00320 Log() << kVERBOSE << "interpreted option string: vRangeMethod: '"
00321 << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
00322 (fVRangeMode == kUnscaled) ? "Unscaled" :
00323 (fVRangeMode == kRMS ) ? "RMS" : "Adaptive") << "'" << Endl;
00324 if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
00325 Log() << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
00326 else
00327 Log() << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
00328 << fNEventsMin << " " << fNEventsMax
00329 << " " << fMaxVIterations << " " << fInitialScale << Endl;
00330 Log() << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
00331 }
00332
00333
00334 void TMVA::MethodPDERS::Train( void )
00335 {
00336
00337
00338
00339
00340
00341 if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with PDERS; "
00342 << "please remove the option from the configuration string, or "
00343 << "use \"!Normalise\""
00344 << Endl;
00345
00346 CreateBinarySearchTree( Types::kTraining );
00347
00348 CalcAverages();
00349 SetVolumeElement();
00350
00351 fInitializedVolumeEle = kTRUE;
00352 }
00353
00354
00355 Double_t TMVA::MethodPDERS::GetMvaValue( Double_t* err, Double_t* errUpper )
00356 {
00357
00358
00359 if (fInitializedVolumeEle == kFALSE) {
00360 fInitializedVolumeEle = kTRUE;
00361
00362
00363 assert( fBinaryTree );
00364
00365 CalcAverages();
00366 SetVolumeElement();
00367 }
00368
00369
00370 NoErrorCalc(err, errUpper);
00371
00372 return this->CRScalc( *GetEvent() );
00373 }
00374
00375
00376 const std::vector< Float_t >& TMVA::MethodPDERS::GetRegressionValues()
00377 {
00378 if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
00379 fRegressionReturnVal->clear();
00380
00381
00382 if (fInitializedVolumeEle == kFALSE) {
00383 fInitializedVolumeEle = kTRUE;
00384
00385
00386 assert( fBinaryTree );
00387
00388 CalcAverages();
00389
00390 SetVolumeElement();
00391 }
00392
00393 const Event* ev = GetEvent();
00394 this->RRScalc( *ev, fRegressionReturnVal );
00395
00396 Event * evT = new Event(*ev);
00397 UInt_t ivar = 0;
00398 for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); it++ ) {
00399 evT->SetTarget(ivar,(*it));
00400 ivar++;
00401 }
00402
00403 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
00404 fRegressionReturnVal->clear();
00405
00406 for (ivar = 0; ivar<evT2->GetNTargets(); ivar++) {
00407 fRegressionReturnVal->push_back(evT2->GetTarget(ivar));
00408 }
00409
00410 delete evT;
00411
00412
00413 return (*fRegressionReturnVal);
00414 }
00415
00416
00417 void TMVA::MethodPDERS::CalcAverages()
00418 {
00419
00420 if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
00421 fAverageRMS.clear();
00422 fBinaryTree->CalcStatistics();
00423
00424 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00425 if (!DoRegression()){
00426 Float_t rmsS = fBinaryTree->RMS(Types::kSignal, ivar);
00427 Float_t rmsB = fBinaryTree->RMS(Types::kBackground, ivar);
00428 fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
00429 } else {
00430 Float_t rms = fBinaryTree->RMS( ivar );
00431 fAverageRMS.push_back( rms );
00432 }
00433 }
00434 }
00435 }
00436
00437
00438 void TMVA::MethodPDERS::CreateBinarySearchTree( Types::ETreeType type )
00439 {
00440
00441 if (NULL != fBinaryTree) delete fBinaryTree;
00442 fBinaryTree = new BinarySearchTree();
00443 if (fNormTree) {
00444 fBinaryTree->SetNormalize( kTRUE );
00445 }
00446
00447 fBinaryTree->Fill( GetEventCollection(type) );
00448
00449 if (fNormTree) {
00450 fBinaryTree->NormalizeTree();
00451 }
00452
00453 if (!DoRegression()) {
00454
00455 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
00456 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
00457
00458 Log() << kVERBOSE << "Signal and background scales: " << fScaleS << " " << fScaleB << Endl;
00459 }
00460 }
00461
00462
00463 void TMVA::MethodPDERS::SetVolumeElement( void ) {
00464
00465
00466 if (GetNvar()==0) {
00467 Log() << kFATAL << "GetNvar() == 0" << Endl;
00468 return;
00469 }
00470
00471
00472 fkNNMin = Int_t(fNEventsMin);
00473 fkNNMax = Int_t(fNEventsMax);
00474
00475 if (fDelta) delete fDelta;
00476 if (fShift) delete fShift;
00477 fDelta = new std::vector<Float_t>( GetNvar() );
00478 fShift = new std::vector<Float_t>( GetNvar() );
00479
00480 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00481 switch (fVRangeMode) {
00482
00483 case kRMS:
00484 case kkNN:
00485 case kAdaptive:
00486
00487 if (fAverageRMS.size() != GetNvar())
00488 Log() << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
00489 (*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
00490 Log() << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
00491 << "\t]: " << fAverageRMS[ivar]
00492 << "\t | comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
00493 << Endl;
00494 break;
00495 case kMinMax:
00496 (*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
00497 break;
00498 case kUnscaled:
00499 (*fDelta)[ivar] = fDeltaFrac;
00500 break;
00501 default:
00502 Log() << kFATAL << "<SetVolumeElement> unknown range-set mode: "
00503 << fVRangeMode << Endl;
00504 }
00505 (*fShift)[ivar] = 0.5;
00506 }
00507
00508 }
00509
00510
00511 Double_t TMVA::MethodPDERS::IGetVolumeContentForRoot( Double_t scale )
00512 {
00513
00514 return ThisPDERS()->GetVolumeContentForRoot( scale );
00515 }
00516
00517
00518 Double_t TMVA::MethodPDERS::GetVolumeContentForRoot( Double_t scale )
00519 {
00520
00521
00522 Volume v( *fHelpVolume );
00523 v.ScaleInterval( scale );
00524
00525 Double_t count = GetBinaryTree()->SearchVolume( &v );
00526
00527 v.Delete();
00528 return count;
00529 }
00530
00531 void TMVA::MethodPDERS::GetSample( const Event& e,
00532 std::vector<const BinarySearchTreeNode*>& events,
00533 Volume *volume )
00534 {
00535 Float_t count = 0;
00536
00537
00538
00539
00540
00541
00542
00543 #ifdef TMVA_MethodPDERS__countByHand__Debug__
00544
00545
00546 count = fBinaryTree->SearchVolume( volume );
00547
00548 Int_t iS = 0, iB = 0;
00549 UInt_t nvar = GetNvar();
00550 for (UInt_t ievt_=0; ievt_<Data()->GetNTrainingEvents(); ievt_++) {
00551 const Event * ev = GetTrainingEvent(ievt_);
00552 Bool_t inV;
00553 for (Int_t ivar=0; ivar<nvar; ivar++) {
00554 Float_t x = ev->GetValue(ivar);
00555 inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
00556 if (!inV) break;
00557 }
00558 if (inV) {
00559 in++;
00560 }
00561 }
00562 Log() << kVERBOSE << "debug: my test: " << in << Endl;
00563 Log() << kVERBOSE << "debug: binTree: " << count << Endl << Endl;
00564
00565 #endif
00566
00567
00568
00569 if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) {
00570
00571 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
00572 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
00573 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
00574 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00575 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
00576 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
00577 }
00578 Volume* svolume = new Volume( lb, ub );
00579
00580
00581 fBinaryTree->SearchVolume( svolume, &events );
00582 }
00583 else if (fVRangeMode == kAdaptive) {
00584
00585
00586
00587
00588
00589 if (MethodPDERS_UseFindRoot) {
00590
00591
00592
00593 fHelpVolume = volume;
00594
00595 UpdateThis();
00596 RootFinder rootFinder( &IGetVolumeContentForRoot, 0.01, 50, 200, 10 );
00597 Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
00598
00599 volume->ScaleInterval( scale );
00600
00601 fBinaryTree->SearchVolume( volume, &events );
00602
00603 fHelpVolume = NULL;
00604 }
00605
00606 else {
00607
00608
00609 count = fBinaryTree->SearchVolume( volume );
00610
00611 Float_t nEventsO = count;
00612 Int_t i_=0;
00613
00614 while (nEventsO < fNEventsMin) {
00615 volume->ScaleInterval( 1.15 );
00616 count = fBinaryTree->SearchVolume( volume );
00617 nEventsO = count;
00618 i_++;
00619 }
00620 if (i_ > 50) Log() << kWARNING << "warning in event: " << e
00621 << ": adaptive volume pre-adjustment reached "
00622 << ">50 iterations in while loop (" << i_ << ")" << Endl;
00623
00624 Float_t nEventsN = nEventsO;
00625 Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
00626 Float_t scaleO = 1.0;
00627 Float_t scaleN = fInitialScale;
00628 Float_t scale = scaleN;
00629 Float_t scaleBest = scaleN;
00630 Float_t nEventsBest = nEventsN;
00631
00632 for (Int_t ic=1; ic<fMaxVIterations; ic++) {
00633 if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
00634
00635
00636 Volume* v = new Volume( *volume );
00637 v->ScaleInterval( scale );
00638 nEventsN = fBinaryTree->SearchVolume( v );
00639
00640
00641 if (nEventsN > 1 && nEventsN - nEventsO != 0)
00642 if (scaleN - scaleO != 0)
00643 scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
00644 else
00645 scale += (-0.01);
00646 else
00647 scale += 0.5;
00648
00649
00650 scaleN = scale;
00651
00652
00653 if (TMath::Abs(nEventsN - nEventsE) < TMath::Abs(nEventsBest - nEventsE) &&
00654 (nEventsN >= fNEventsMin || nEventsBest < nEventsN)) {
00655 nEventsBest = nEventsN;
00656 scaleBest = scale;
00657 }
00658
00659 v->Delete();
00660 delete v;
00661 }
00662 else break;
00663 }
00664
00665
00666 nEventsN = nEventsBest;
00667
00668 if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
00669 Log() << kWARNING << "warning in event " << e
00670 << ": adaptive volume adjustment reached "
00671 << "max. #iterations (" << fMaxVIterations << ")"
00672 << "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
00673 << Endl;
00674
00675 volume->ScaleInterval( scaleBest );
00676 fBinaryTree->SearchVolume( volume, &events );
00677 }
00678
00679
00680
00681 } else if (fVRangeMode == kkNN) {
00682 Volume v(*volume);
00683
00684 events.clear();
00685
00686 Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
00687
00688
00689 Int_t t_times = 0;
00690
00691 while ( !(kNNcount >= fkNNMin && kNNcount <= fkNNMax) ) {
00692 if (kNNcount < fkNNMin) {
00693 Float_t scale = 2;
00694 volume->ScaleInterval( scale );
00695 }
00696 else if (kNNcount > fkNNMax) {
00697 Float_t scale = 0.1;
00698 volume->ScaleInterval( scale );
00699 }
00700 events.clear();
00701
00702 v = *volume ;
00703
00704 kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
00705
00706 t_times++;
00707
00708 if (t_times == fMaxVIterations) {
00709 Log() << kWARNING << "warining in event" << e
00710 << ": kNN volume adjustment reached "
00711 << "max. #iterations (" << fMaxVIterations << ")"
00712 << "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
00713 break;
00714 }
00715 }
00716
00717
00718 Double_t *dim_normalization = new Double_t[GetNvar()];
00719 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00720 dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
00721 }
00722
00723 std::vector<const BinarySearchTreeNode*> tempVector;
00724
00725 if (kNNcount >= fkNNMin) {
00726 std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcount );
00727
00728
00729 for (Int_t j=0;j< Int_t(events.size()) ;j++)
00730 (*distances)[j] = GetNormalizedDistance ( e, *events[j], dim_normalization );
00731
00732
00733 std::vector<Double_t>::iterator wsk = distances->begin();
00734 for (Int_t j=0;j<fkNNMin-1;j++) wsk++;
00735 std::nth_element( distances->begin(), wsk, distances->end() );
00736
00737
00738
00739 for (Int_t j=0;j<Int_t(events.size());j++) {
00740 Double_t dist = GetNormalizedDistance( e, *events[j], dim_normalization );
00741
00742 if (dist <= (*distances)[fkNNMin-1])
00743 tempVector.push_back( events[j] );
00744 }
00745 fMax_distance = (*distances)[fkNNMin-1];
00746 delete distances;
00747 }
00748 delete[] dim_normalization;
00749 events = tempVector;
00750
00751 } else {
00752
00753
00754 Log() << kFATAL << "<GetSample> unknown RangeMode: " << fVRangeMode << Endl;
00755 }
00756
00757 }
00758
00759
00760 Double_t TMVA::MethodPDERS::CRScalc( const Event& e )
00761 {
00762 std::vector<const BinarySearchTreeNode*> events;
00763
00764
00765
00766
00767 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
00768 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
00769
00770 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
00771 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00772 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
00773 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
00774 }
00775
00776 Volume *volume = new Volume( lb, ub );
00777
00778 GetSample( e, events, volume );
00779 Double_t count = CKernelEstimate( e, events, *volume );
00780 delete volume;
00781 delete lb;
00782 delete ub;
00783
00784 return count;
00785 }
00786
00787
00788 void TMVA::MethodPDERS::RRScalc( const Event& e, std::vector<Float_t>* count )
00789 {
00790 std::vector<const BinarySearchTreeNode*> events;
00791
00792
00793
00794
00795 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
00796 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
00797
00798 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
00799 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00800 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
00801 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
00802 }
00803 Volume *volume = new Volume( lb, ub );
00804
00805 GetSample( e, events, volume );
00806 RKernelEstimate( e, events, *volume, count );
00807
00808 delete volume;
00809
00810 return;
00811 }
00812
00813 Double_t TMVA::MethodPDERS::CKernelEstimate( const Event & event,
00814 std::vector<const BinarySearchTreeNode*>& events, Volume& v )
00815 {
00816
00817 Double_t *dim_normalization = new Double_t[GetNvar()];
00818 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
00819 dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
00820
00821 Double_t pdfSumS = 0;
00822 Double_t pdfSumB = 0;
00823
00824
00825 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
00826
00827
00828 Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
00829
00830
00831
00832 if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
00833
00834 if ( (*iev)->IsSignal() )
00835 pdfSumS += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
00836 else
00837 pdfSumB += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
00838 }
00839 pdfSumS = KernelNormalization( pdfSumS < 0. ? 0. : pdfSumS );
00840 pdfSumB = KernelNormalization( pdfSumB < 0. ? 0. : pdfSumB );
00841
00842 delete[] dim_normalization;
00843
00844 if (pdfSumS < 1e-20 && pdfSumB < 1e-20) return 0.5;
00845 if (pdfSumB < 1e-20) return 1.0;
00846 if (pdfSumS < 1e-20) return 0.0;
00847
00848 Float_t r = pdfSumB*fScaleB/(pdfSumS*fScaleS);
00849 return 1.0/(r + 1.0);
00850 }
00851
00852
00853 void TMVA::MethodPDERS::RKernelEstimate( const Event & event,
00854 std::vector<const BinarySearchTreeNode*>& events, Volume& v,
00855 std::vector<Float_t>* pdfSum )
00856 {
00857
00858 Double_t *dim_normalization = new Double_t[GetNvar()];
00859 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
00860 dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
00861
00862
00863 pdfSum->clear();
00864 Float_t pdfDiv = 0;
00865 fNRegOut = 1;
00866
00867 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
00868 pdfSum->push_back( 0 );
00869
00870
00871 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
00872
00873
00874 Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
00875
00876
00877
00878 if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
00879
00880 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++) {
00881 pdfSum->at(ivar) += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
00882 pdfDiv += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
00883 }
00884 }
00885
00886 delete[] dim_normalization;
00887
00888 if (pdfDiv == 0)
00889 return;
00890
00891 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
00892 pdfSum->at(ivar) /= pdfDiv;
00893
00894 return;
00895 }
00896
00897
00898 Double_t TMVA::MethodPDERS::ApplyKernelFunction (Double_t normalized_distance)
00899 {
00900
00901
00902 switch (fKernelEstimator) {
00903 case kBox:
00904 case kSphere:
00905 return 1;
00906 break;
00907 case kTeepee:
00908 return (1 - normalized_distance);
00909 break;
00910 case kGauss:
00911 return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
00912 break;
00913 case kSinc3:
00914 case kSinc5:
00915 case kSinc7:
00916 case kSinc9:
00917 case kSinc11: {
00918 Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
00919 return NormSinc (side_crossings * normalized_distance);
00920 }
00921 break;
00922 case kLanczos2:
00923 return LanczosFilter (2, normalized_distance);
00924 break;
00925 case kLanczos3:
00926 return LanczosFilter (3, normalized_distance);
00927 break;
00928 case kLanczos5:
00929 return LanczosFilter (5, normalized_distance);
00930 break;
00931 case kLanczos8:
00932 return LanczosFilter (8, normalized_distance);
00933 break;
00934 case kTrim: {
00935 Double_t x = normalized_distance / fMax_distance;
00936 x = 1 - x*x*x;
00937 return x*x*x;
00938 }
00939 break;
00940 default:
00941 Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
00942 break;
00943 }
00944
00945 return 0;
00946 }
00947
00948
00949 Double_t TMVA::MethodPDERS::KernelNormalization (Double_t pdf)
00950 {
00951
00952
00953
00954
00955
00956 static Double_t ret = 1.0;
00957
00958 if (ret != 0.0) return ret*pdf;
00959
00960
00961 switch (fKernelEstimator) {
00962 case kBox:
00963 case kSphere:
00964 ret = 1.;
00965 break;
00966 case kTeepee:
00967 ret = (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
00968 ( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
00969 break;
00970 case kGauss:
00971
00972 ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
00973 break;
00974 case kSinc3:
00975 case kSinc5:
00976 case kSinc7:
00977 case kSinc9:
00978 case kSinc11:
00979 case kLanczos2:
00980 case kLanczos3:
00981 case kLanczos5:
00982 case kLanczos8:
00983
00984 ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
00985 break;
00986 default:
00987 Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
00988 }
00989
00990
00991 ret *= ( TMath::Power (2., GetNvar()) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
00992 TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
00993
00994 return ret*pdf;
00995 }
00996
00997
00998 Double_t TMVA::MethodPDERS::GetNormalizedDistance ( const Event &base_event,
00999 const BinarySearchTreeNode &sample_event,
01000 Double_t *dim_normalization)
01001 {
01002
01003 Double_t ret=0;
01004 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01005 Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetValue(ivar));
01006 ret += dist*dist;
01007 }
01008
01009 ret /= GetNvar();
01010 return TMath::Sqrt (ret);
01011 }
01012
01013
01014 Double_t TMVA::MethodPDERS::NormSinc (Double_t x)
01015 {
01016
01017 if (x < 10e-10 && x > -10e-10) {
01018 return 1;
01019 }
01020
01021 Double_t pix = TMath::Pi() * x;
01022 Double_t sinc = TMath::Sin(pix) / pix;
01023 Double_t ret;
01024
01025 if (GetNvar() % 2)
01026 ret = TMath::Power (sinc, GetNvar());
01027 else
01028 ret = TMath::Abs (sinc) * TMath::Power (sinc, GetNvar() - 1);
01029
01030 return ret;
01031 }
01032
01033
01034 Double_t TMVA::MethodPDERS::LanczosFilter (Int_t level, Double_t x)
01035 {
01036
01037 if (x < 10e-10 && x > -10e-10) {
01038 return 1;
01039 }
01040
01041 Double_t pix = TMath::Pi() * x;
01042 Double_t pixtimesn = pix * ((Double_t) level);
01043 Double_t lanczos = (TMath::Sin(pix) / pix) * (TMath::Sin(pixtimesn) / pixtimesn);
01044 Double_t ret;
01045
01046 if (GetNvar() % 2) ret = TMath::Power (lanczos, GetNvar());
01047 else ret = TMath::Abs (lanczos) * TMath::Power (lanczos, GetNvar() - 1);
01048
01049 return ret;
01050 }
01051
01052
01053 Float_t TMVA::MethodPDERS::GetError( Float_t countS, Float_t countB,
01054 Float_t sumW2S, Float_t sumW2B ) const
01055 {
01056
01057
01058 Float_t c = fScaleB/fScaleS;
01059 Float_t d = countS + c*countB; d *= d;
01060
01061 if (d < 1e-10) return 1;
01062
01063 Float_t f = c*c/d/d;
01064 Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
01065
01066 if (err < 1e-10) return 1;
01067
01068 return sqrt(err);
01069 }
01070
01071
01072 void TMVA::MethodPDERS::AddWeightsXMLTo( void* parent ) const
01073 {
01074
01075 void* wght = gTools().AddChild(parent, "Weights");
01076 if (fBinaryTree)
01077 fBinaryTree->AddXMLTo(wght);
01078 else
01079 Log() << kFATAL << "Signal and background binary search tree not available" << Endl;
01080
01081 }
01082
01083
01084 void TMVA::MethodPDERS::ReadWeightsFromXML( void* wghtnode)
01085 {
01086 if (NULL != fBinaryTree) delete fBinaryTree;
01087 void* treenode = gTools().GetChild(wghtnode);
01088 fBinaryTree = TMVA::BinarySearchTree::CreateFromXML(treenode);
01089 if(!fBinaryTree)
01090 Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
01091 if(!fBinaryTree)
01092 Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
01093 fBinaryTree->SetPeriode( GetNvar() );
01094 fBinaryTree->CalcStatistics();
01095 fBinaryTree->CountNodes();
01096 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
01097 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
01098 Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
01099 CalcAverages();
01100 SetVolumeElement();
01101 fInitializedVolumeEle = kTRUE;
01102 }
01103
01104
01105 void TMVA::MethodPDERS::ReadWeightsFromStream( istream& istr)
01106 {
01107
01108 if (NULL != fBinaryTree) delete fBinaryTree;
01109
01110 fBinaryTree = new BinarySearchTree();
01111
01112 istr >> *fBinaryTree;
01113
01114 fBinaryTree->SetPeriode( GetNvar() );
01115
01116 fBinaryTree->CalcStatistics();
01117
01118 fBinaryTree->CountNodes();
01119
01120
01121 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
01122 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
01123
01124 Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
01125
01126 CalcAverages();
01127
01128 SetVolumeElement();
01129
01130 fInitializedVolumeEle = kTRUE;
01131 }
01132
01133
01134 void TMVA::MethodPDERS::WriteWeightsToStream( TFile& ) const
01135 {
01136
01137 }
01138
01139
01140 void TMVA::MethodPDERS::ReadWeightsFromStream( TFile& )
01141 {
01142
01143 }
01144
01145
01146 TMVA::MethodPDERS* TMVA::MethodPDERS::ThisPDERS( void )
01147 {
01148
01149 return fgThisPDERS;
01150 }
01151
01152 void TMVA::MethodPDERS::UpdateThis( void )
01153 {
01154
01155 fgThisPDERS = this;
01156 }
01157
01158
01159 void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
01160 {
01161
01162 fout << " // not implemented for class: \"" << className << "\"" << std::endl;
01163 fout << "};" << std::endl;
01164 }
01165
01166
01167 void TMVA::MethodPDERS::GetHelpMessage() const
01168 {
01169
01170
01171
01172
01173 Log() << Endl;
01174 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
01175 Log() << Endl;
01176 Log() << "PDERS is a generalization of the projective likelihood classifier " << Endl;
01177 Log() << "to N dimensions, where N is the number of input variables used." << Endl;
01178 Log() << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
01179 Log() << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
01180 Log() << "were known, this classifier would exploit the full information" << Endl;
01181 Log() << "contained in the input variables, and would hence be optimal. In " << Endl;
01182 Log() << "practice however, huge training samples are necessary to sufficiently " << Endl;
01183 Log() << "populate the multidimensional phase space. " << Endl;
01184 Log() << Endl;
01185 Log() << "The simplest implementation of PDERS counts the number of signal" << Endl;
01186 Log() << "and background events in the vicinity of a test event, and returns" << Endl;
01187 Log() << "a weight according to the majority species of the neighboring events." << Endl;
01188 Log() << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
01189 Log() << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
01190 Log() << Endl;
01191 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
01192 Log() << Endl;
01193 Log() << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
01194 Log() << "e.g., distinct islands of signal and background regions. Because of " << Endl;
01195 Log() << "the exponential growth of the phase space, it is important to restrict" << Endl;
01196 Log() << "the number of input variables (dimension) to the strictly necessary." << Endl;
01197 Log() << Endl;
01198 Log() << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
01199 Log() << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
01200 Log() << "memory, limits the number of training events that can effectively be " << Endl;
01201 Log() << "used to model the multidimensional PDF." << Endl;
01202 Log() << Endl;
01203 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
01204 Log() << Endl;
01205 Log() << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
01206 Log() << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
01207 Log() << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
01208 Log() << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
01209 Log() << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
01210 }