MethodPDERS.cxx

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodPDERS.cxx 36966 2010-11-26 09:50:13Z evt $
00002 // Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
00003 
00004 /***********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis        *
00006  * Package: TMVA                                                                   *
00007  * Class  : MethodPDERS                                                            *
00008  * Web    : http://tmva.sourceforge.net                                            *
00009  *                                                                                 *
00010  * Description:                                                                    *
00011  *      Implementation                                                             *
00012  *                                                                                 *
00013  * Authors (alphabetical):                                                         *
00014  *      Krzysztof Danielowski <danielow@cern.ch>       - IFJ PAN & AGH, Poland     *
00015  *      Andreas Hoecker       <Andreas.Hocker@cern.ch> - CERN, Switzerland         *
00016  *      Kamil Kraszewski      <kalq@cern.ch>           - IFJ PAN & UJ, Poland      *
00017  *      Maciej Kruk           <mkruk@cern.ch>          - IFJ PAN & AGH, Poland     *
00018  *      Yair Mahalalel        <Yair.Mahalalel@cern.ch> - CERN, Switzerland         *
00019  *      Helge Voss            <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany *
00020  *      Kai Voss              <Kai.Voss@cern.ch>       - U. of Victoria, Canada    *
00021  *                                                                                 *
00022  * Copyright (c) 2005:                                                             *
00023  *      CERN, Switzerland                                                          *
00024  *      U. of Victoria, Canada                                                     *
00025  *      MPI-K Heidelberg, Germany                                                  *
00026  *                                                                                 *
00027  * Redistribution and use in source and binary forms, with or without              *
00028  * modification, are permitted according to the terms listed in LICENSE            *
00029  * (http://tmva.sourceforge.net/LICENSE)                                           *
00030  ***********************************************************************************/
00031 
00032 //_______________________________________________________________________
00033 // Begin_Html
00034 /*
00035   This is a generalization of the above Likelihood methods to <i>N</i><sub>var</sub>
00036   dimensions, where <i>N</i><sub>var</sub> is the number of input variables
00037   used in the MVA. If the multi-dimensional probability density functions
00038   (PDFs) for signal and background were known, this method contains the entire
00039   physical information, and is therefore optimal. Usually, kernel estimation
00040   methods are used to approximate the PDFs using the events from the
00041   training sample. <br><p></p>
00042 
00043   A very simple probability density estimator (PDE) has been suggested
00044   in <a href="http://arxiv.org/abs/hep-ex/0211019">hep-ex/0211019</a>. The
00045   PDE for a given test event is obtained from counting the (normalized)
00046   number of signal and background (training) events that occur in the
00047   "vicinity" of the test event. The volume that describes "vicinity" is
00048   user-defined. A <a href="http://arxiv.org/abs/hep-ex/0211019">search
00049   method based on binary-trees</a> is used to effectively reduce the
00050   selection time for the range search. Three different volume definitions
00051   are optional: <br><p></p>
00052   <ul>
00053   <li><u>MinMax:</u>
00054   the volume is defined in each dimension with respect
00055   to the full variable range found in the training sample. </li>
00056   <li><u>RMS:</u>
00057   the volume is defined in each dimensions with respect
00058   to the RMS estimated from the training sample. </li>
00059   <li><u>Adaptive:</u>
00060   a volume element is defined in each dimensions with
00061   respect to the RMS estimated from the training sample. The overall
00062   scale of the volume element is then determined for each event so
00063   that the total number of events confined in the volume be within
00064   a user-defined range.</li>
00065   </ul><p></p>
00066   The adaptive range search is used by default.
00067   // End_Html
00068   */
00069 //_______________________________________________________________________
00070 
00071 #include <assert.h>
00072 #include <algorithm>
00073 
00074 #include "TFile.h"
00075 #include "TObjString.h"
00076 #include "TMath.h"
00077 
00078 #include "TMVA/ClassifierFactory.h"
00079 #include "TMVA/MethodPDERS.h"
00080 #include "TMVA/Tools.h"
00081 #include "TMVA/RootFinder.h"
00082 
00083 #define TMVA_MethodPDERS__countByHand__Debug__
00084 #undef  TMVA_MethodPDERS__countByHand__Debug__
00085 
00086 namespace TMVA {
00087    const Bool_t MethodPDERS_UseFindRoot = kFALSE;
00088 };
00089 
00090 TMVA::MethodPDERS* TMVA::MethodPDERS::fgThisPDERS = NULL;
00091 
00092 REGISTER_METHOD(PDERS)
00093 
00094 ClassImp(TMVA::MethodPDERS)
00095 
00096 //_______________________________________________________________________
00097 TMVA::MethodPDERS::MethodPDERS( const TString& jobName,
00098                                 const TString& methodTitle,
00099                                 DataSetInfo& theData,
00100                                 const TString& theOption,
00101                                 TDirectory* theTargetDir ) :
00102    MethodBase( jobName, Types::kPDERS, methodTitle, theData, theOption, theTargetDir ),
00103    fFcnCall(0),
00104    fVRangeMode(kAdaptive),
00105    fKernelEstimator(kBox),
00106    fDelta(0),
00107    fShift(0),
00108    fScaleS(0),
00109    fScaleB(0),
00110    fDeltaFrac(0),
00111    fGaussSigma(0),
00112    fGaussSigmaNorm(0),
00113    fNRegOut(0),
00114    fNEventsMin(0),
00115    fNEventsMax(0),
00116    fMaxVIterations(0),
00117    fInitialScale(0),
00118    fInitializedVolumeEle(0),
00119    fkNNMin(0),
00120    fkNNMax(0),
00121    fMax_distance(0),
00122    fPrinted(0),
00123    fNormTree(0)
00124 {
00125    // standard constructor for the PDERS method
00126 }
00127 
00128 //_______________________________________________________________________
00129 TMVA::MethodPDERS::MethodPDERS( DataSetInfo& theData,
00130                                 const TString& theWeightFile,
00131                                 TDirectory* theTargetDir ) :
00132    MethodBase( Types::kPDERS, theData, theWeightFile, theTargetDir ),
00133    fFcnCall(0),
00134    fVRangeMode(kAdaptive),
00135    fKernelEstimator(kBox),
00136    fDelta(0),
00137    fShift(0),
00138    fScaleS(0),
00139    fScaleB(0),
00140    fDeltaFrac(0),
00141    fGaussSigma(0),
00142    fGaussSigmaNorm(0),
00143    fNRegOut(0),
00144    fNEventsMin(0),
00145    fNEventsMax(0),
00146    fMaxVIterations(0),
00147    fInitialScale(0),
00148    fInitializedVolumeEle(0),
00149    fkNNMin(0),
00150    fkNNMax(0),
00151    fMax_distance(0),
00152    fPrinted(0),
00153    fNormTree(0)
00154 {
00155    // construct MethodPDERS through from file
00156 }
00157 
00158 //_______________________________________________________________________
00159 Bool_t TMVA::MethodPDERS::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ )
00160 {
00161    // PDERS can handle classification with 2 classes and regression with one or more regression-targets
00162    if (type == Types::kClassification && numberClasses == 2) return kTRUE;
00163    if (type == Types::kRegression) return kTRUE;
00164    return kFALSE;
00165 }
00166 
00167 //_______________________________________________________________________
00168 void TMVA::MethodPDERS::Init( void )
00169 {
00170    // default initialisation routine called by all constructors
00171 
00172    fBinaryTree = NULL;
00173 
00174    UpdateThis();
00175 
00176    // default options
00177    fDeltaFrac       = 3.0;
00178    fVRangeMode      = kAdaptive;
00179    fKernelEstimator = kBox;
00180 
00181    // special options for Adaptive mode
00182    fNEventsMin      = 100;
00183    fNEventsMax      = 200;
00184    fMaxVIterations  = 150;
00185    fInitialScale    = 0.99;
00186    fGaussSigma      = 0.1;
00187    fNormTree        = kFALSE;
00188     
00189    fkNNMin      = Int_t(fNEventsMin);
00190    fkNNMax      = Int_t(fNEventsMax);
00191 
00192    fInitializedVolumeEle = kFALSE;
00193    fAverageRMS.clear();
00194 
00195    // the minimum requirement to declare an event signal-like
00196    SetSignalReferenceCut( 0.0 );
00197 }
00198 
00199 //_______________________________________________________________________
00200 TMVA::MethodPDERS::~MethodPDERS( void )
00201 {
00202    // destructor
00203    if (fDelta) delete fDelta;
00204    if (fShift) delete fShift;
00205 
00206    if (NULL != fBinaryTree) delete fBinaryTree;
00207 }
00208 
00209 //_______________________________________________________________________
00210 void TMVA::MethodPDERS::DeclareOptions() 
00211 {
00212    // define the options (their key words) that can be set in the option string 
00213    // know options:
00214    // VolumeRangeMode   <string>  Method to determine volume range
00215    //    available values are:        MinMax 
00216    //                                 Unscaled
00217    //                                 RMS
00218    //                                 kNN
00219    //                                 Adaptive <default>
00220    //
00221    // KernelEstimator   <string>  Kernel estimation function
00222    //    available values are:        Box <default>
00223    //                                 Sphere
00224    //                                 Teepee
00225    //                                 Gauss
00226    //                                 Sinc3
00227    //                                 Sinc5
00228    //                                 Sinc7
00229    //                                 Sinc9
00230    //                                 Sinc11
00231    //                                 Lanczos2
00232    //                                 Lanczos3
00233    //                                 Lanczos5
00234    //                                 Lanczos8
00235    //                                 Trim
00236    //
00237    // DeltaFrac         <float>   Ratio of #EventsMin/#EventsMax for MinMax and RMS volume range
00238    // NEventsMin        <int>     Minimum number of events for adaptive volume range             
00239    // NEventsMax        <int>     Maximum number of events for adaptive volume range
00240    // MaxVIterations    <int>     Maximum number of iterations for adaptive volume range
00241    // InitialScale      <float>   Initial scale for adaptive volume range           
00242    // GaussSigma        <float>   Width with respect to the volume size of Gaussian kernel estimator
00243    DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
00244    AddPreDefVal(TString("Unscaled"));
00245    AddPreDefVal(TString("MinMax"));
00246    AddPreDefVal(TString("RMS"));
00247    AddPreDefVal(TString("Adaptive"));
00248    AddPreDefVal(TString("kNN"));
00249 
00250    DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
00251    AddPreDefVal(TString("Box"));
00252    AddPreDefVal(TString("Sphere"));
00253    AddPreDefVal(TString("Teepee"));
00254    AddPreDefVal(TString("Gauss"));
00255    AddPreDefVal(TString("Sinc3"));
00256    AddPreDefVal(TString("Sinc5"));
00257    AddPreDefVal(TString("Sinc7"));
00258    AddPreDefVal(TString("Sinc9"));
00259    AddPreDefVal(TString("Sinc11"));
00260    AddPreDefVal(TString("Lanczos2"));
00261    AddPreDefVal(TString("Lanczos3"));
00262    AddPreDefVal(TString("Lanczos5"));
00263    AddPreDefVal(TString("Lanczos8"));
00264    AddPreDefVal(TString("Trim"));
00265 
00266    DeclareOptionRef(fDeltaFrac     , "DeltaFrac",      "nEventsMin/Max for minmax and rms volume range");
00267    DeclareOptionRef(fNEventsMin    , "NEventsMin",     "nEventsMin for adaptive volume range");
00268    DeclareOptionRef(fNEventsMax    , "NEventsMax",     "nEventsMax for adaptive volume range");
00269    DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
00270    DeclareOptionRef(fInitialScale  , "InitialScale",   "InitialScale for adaptive volume range");
00271    DeclareOptionRef(fGaussSigma    , "GaussSigma",     "Width (wrt volume size) of Gaussian kernel estimator");
00272    DeclareOptionRef(fNormTree      , "NormTree",       "Normalize binary search tree");
00273 }
00274 
00275 //_______________________________________________________________________
00276 void TMVA::MethodPDERS::ProcessOptions() 
00277 {
00278    // process the options specified by the user
00279    
00280    if (IgnoreEventsWithNegWeightsInTraining()) {
00281       Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
00282             << GetMethodTypeName() 
00283             << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
00284             << Endl;
00285    }
00286 
00287    fGaussSigmaNorm = fGaussSigma; // * TMath::Sqrt( Double_t(GetNvar()) );
00288 
00289    fVRangeMode = MethodPDERS::kUnsupported;
00290 
00291    if      (fVolumeRange == "MinMax"    ) fVRangeMode = kMinMax;
00292    else if (fVolumeRange == "RMS"       ) fVRangeMode = kRMS;
00293    else if (fVolumeRange == "Adaptive"  ) fVRangeMode = kAdaptive;
00294    else if (fVolumeRange == "Unscaled"  ) fVRangeMode = kUnscaled;
00295    else if (fVolumeRange == "kNN"   ) fVRangeMode = kkNN;
00296    else {
00297       Log() << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
00298    }
00299 
00300    if      (fKernelString == "Box"      ) fKernelEstimator = kBox;
00301    else if (fKernelString == "Sphere"   ) fKernelEstimator = kSphere;
00302    else if (fKernelString == "Teepee"   ) fKernelEstimator = kTeepee;
00303    else if (fKernelString == "Gauss"    ) fKernelEstimator = kGauss;
00304    else if (fKernelString == "Sinc3"    ) fKernelEstimator = kSinc3;
00305    else if (fKernelString == "Sinc5"    ) fKernelEstimator = kSinc5;
00306    else if (fKernelString == "Sinc7"    ) fKernelEstimator = kSinc7;
00307    else if (fKernelString == "Sinc9"    ) fKernelEstimator = kSinc9;
00308    else if (fKernelString == "Sinc11"   ) fKernelEstimator = kSinc11;
00309    else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
00310    else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
00311    else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
00312    else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
00313    else if (fKernelString == "Trim"     ) fKernelEstimator = kTrim;
00314    else {
00315       Log() << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
00316    }
00317 
00318    // TODO: Add parameter validation
00319 
00320    Log() << kVERBOSE << "interpreted option string: vRangeMethod: '"
00321            << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
00322                             (fVRangeMode == kUnscaled) ? "Unscaled" :
00323                             (fVRangeMode == kRMS   ) ? "RMS" : "Adaptive") << "'" << Endl;
00324    if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
00325       Log() << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
00326    else
00327       Log() << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
00328               << fNEventsMin << "  " << fNEventsMax
00329               << "  " << fMaxVIterations << "  " << fInitialScale << Endl;
00330    Log() << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
00331 }
00332 
00333 //_______________________________________________________________________
00334 void TMVA::MethodPDERS::Train( void )
00335 {
00336    // this is a dummy training: the preparation work to do is the construction
00337    // of the binary tree as a pointer chain. It is easier to directly save the
00338    // trainingTree in the weight file, and to rebuild the binary tree in the
00339    // test phase from scratch
00340 
00341    if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with PDERS; " 
00342                                << "please remove the option from the configuration string, or "
00343                                << "use \"!Normalise\""
00344                                << Endl;
00345 
00346    CreateBinarySearchTree( Types::kTraining );
00347     
00348    CalcAverages();
00349    SetVolumeElement();
00350 
00351    fInitializedVolumeEle = kTRUE;
00352 }
00353 
00354 //_______________________________________________________________________
00355 Double_t TMVA::MethodPDERS::GetMvaValue( Double_t* err, Double_t* errUpper )
00356 {
00357    // init the size of a volume element using a defined fraction of the
00358    // volume containing the entire events
00359    if (fInitializedVolumeEle == kFALSE) {
00360       fInitializedVolumeEle = kTRUE;
00361 
00362       // binary trees must exist
00363       assert( fBinaryTree );
00364 
00365       CalcAverages();
00366       SetVolumeElement();
00367    }
00368 
00369    // cannot determine error
00370    NoErrorCalc(err, errUpper);
00371 
00372    return this->CRScalc( *GetEvent() );
00373 }
00374 
00375 //_______________________________________________________________________
00376 const std::vector< Float_t >& TMVA::MethodPDERS::GetRegressionValues()
00377 {
00378    if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
00379    fRegressionReturnVal->clear();
00380    // init the size of a volume element using a defined fraction of the
00381    // volume containing the entire events
00382    if (fInitializedVolumeEle == kFALSE) {
00383       fInitializedVolumeEle = kTRUE;
00384 
00385       // binary trees must exist
00386       assert( fBinaryTree );
00387 
00388       CalcAverages();
00389 
00390       SetVolumeElement();
00391    }
00392 
00393    const Event* ev = GetEvent();
00394    this->RRScalc( *ev, fRegressionReturnVal );
00395 
00396    Event * evT = new Event(*ev);
00397    UInt_t ivar = 0;
00398    for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); it++ ) {
00399       evT->SetTarget(ivar,(*it));
00400       ivar++;
00401    }
00402 
00403    const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
00404    fRegressionReturnVal->clear();
00405 
00406    for (ivar = 0; ivar<evT2->GetNTargets(); ivar++) {
00407       fRegressionReturnVal->push_back(evT2->GetTarget(ivar));
00408    }
00409 
00410    delete evT;
00411 
00412 
00413    return (*fRegressionReturnVal);
00414 }
00415 
00416 //_______________________________________________________________________
00417 void TMVA::MethodPDERS::CalcAverages()
00418 {
00419    // compute also average RMS values required for adaptive Gaussian
00420    if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN  ) {
00421       fAverageRMS.clear();
00422       fBinaryTree->CalcStatistics();
00423 
00424       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) { 
00425          if (!DoRegression()){ //why there are separate rms for signal and background?
00426             Float_t rmsS = fBinaryTree->RMS(Types::kSignal, ivar);
00427             Float_t rmsB = fBinaryTree->RMS(Types::kBackground, ivar);
00428             fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
00429          } else {
00430             Float_t rms = fBinaryTree->RMS( ivar );
00431             fAverageRMS.push_back( rms );
00432          }
00433       }
00434    }
00435 }   
00436 
00437 //_______________________________________________________________________
00438 void TMVA::MethodPDERS::CreateBinarySearchTree( Types::ETreeType type )
00439 {
00440    // create binary search trees for signal and background
00441    if (NULL != fBinaryTree) delete fBinaryTree;
00442    fBinaryTree = new BinarySearchTree();
00443    if (fNormTree) {
00444       fBinaryTree->SetNormalize( kTRUE );
00445    }
00446 
00447    fBinaryTree->Fill( GetEventCollection(type) );
00448 
00449    if (fNormTree) {
00450       fBinaryTree->NormalizeTree();
00451    }
00452 
00453    if (!DoRegression()) {
00454       // these are the signal and background scales for the weights
00455       fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
00456       fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
00457 
00458       Log() << kVERBOSE << "Signal and background scales: " << fScaleS << " " << fScaleB << Endl;
00459    }
00460 }
00461 
00462 //_______________________________________________________________________
00463 void TMVA::MethodPDERS::SetVolumeElement( void ) {
00464    // defines volume dimensions
00465 
00466    if (GetNvar()==0) {
00467       Log() << kFATAL << "GetNvar() == 0" << Endl;
00468       return;
00469    }
00470 
00471    // init relative scales
00472    fkNNMin      = Int_t(fNEventsMin);
00473    fkNNMax      = Int_t(fNEventsMax);   
00474 
00475    if (fDelta) delete fDelta;
00476    if (fShift) delete fShift;
00477    fDelta = new std::vector<Float_t>( GetNvar() );
00478    fShift = new std::vector<Float_t>( GetNvar() );
00479 
00480    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00481       switch (fVRangeMode) {
00482          
00483       case kRMS:
00484       case kkNN:
00485       case kAdaptive:
00486          // sanity check
00487          if (fAverageRMS.size() != GetNvar())
00488             Log() << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
00489          (*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
00490          Log() << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
00491                  << "\t]: " << fAverageRMS[ivar]
00492                  << "\t  |  comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
00493                  << Endl;
00494          break;
00495       case kMinMax:
00496          (*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
00497          break;
00498       case kUnscaled:
00499          (*fDelta)[ivar] = fDeltaFrac;
00500          break;
00501       default:
00502          Log() << kFATAL << "<SetVolumeElement> unknown range-set mode: "
00503                  << fVRangeMode << Endl;
00504       }
00505       (*fShift)[ivar] = 0.5; // volume is centered around test value
00506    }
00507 
00508 }
00509 
00510 //_______________________________________________________________________
00511 Double_t TMVA::MethodPDERS::IGetVolumeContentForRoot( Double_t scale )
00512 {
00513    // Interface to RootFinder
00514    return ThisPDERS()->GetVolumeContentForRoot( scale );
00515 }
00516 
00517 //_______________________________________________________________________
00518 Double_t TMVA::MethodPDERS::GetVolumeContentForRoot( Double_t scale )
00519 {
00520    // count number of events in rescaled volume
00521 
00522    Volume v( *fHelpVolume );
00523    v.ScaleInterval( scale );
00524 
00525    Double_t count = GetBinaryTree()->SearchVolume( &v );
00526 
00527    v.Delete();
00528    return count;
00529 }
00530 
00531 void TMVA::MethodPDERS::GetSample( const Event& e,
00532                                    std::vector<const BinarySearchTreeNode*>& events,
00533                                    Volume *volume )
00534 {
00535    Float_t count = 0;
00536 
00537    // -------------------------------------------------------------------------
00538    //
00539    // ==== test of volume search =====
00540    //
00541    // #define TMVA::MethodPDERS__countByHand__Debug__
00542 
00543 #ifdef  TMVA_MethodPDERS__countByHand__Debug__
00544 
00545    // starting values
00546    count = fBinaryTree->SearchVolume( volume );
00547 
00548    Int_t iS = 0, iB = 0;
00549    UInt_t nvar = GetNvar();
00550    for (UInt_t ievt_=0; ievt_<Data()->GetNTrainingEvents(); ievt_++) {
00551       const Event * ev = GetTrainingEvent(ievt_);
00552       Bool_t inV;
00553       for (Int_t ivar=0; ivar<nvar; ivar++) {
00554          Float_t x = ev->GetValue(ivar);
00555          inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
00556          if (!inV) break;
00557       }
00558       if (inV) {
00559          in++;
00560       }
00561    }
00562    Log() << kVERBOSE << "debug: my test: " << in << Endl;// <- ***********tree
00563    Log() << kVERBOSE << "debug: binTree: " << count << Endl << Endl;// <- ***********tree
00564 
00565 #endif
00566 
00567    // -------------------------------------------------------------------------
00568 
00569    if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) { // Constant volume
00570 
00571       std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
00572       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
00573       std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
00574       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00575          (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
00576          (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
00577       }
00578       Volume* svolume = new Volume( lb, ub );
00579       // starting values
00580 
00581       fBinaryTree->SearchVolume( svolume, &events );
00582    }
00583    else if (fVRangeMode == kAdaptive) {      // adaptive volume
00584 
00585       // -----------------------------------------------------------------------
00586 
00587       // TODO: optimize, perhaps multi stage with broadening limits,
00588       // or a different root finding method entirely,
00589       if (MethodPDERS_UseFindRoot) {
00590 
00591          // that won't need to search through large volume, where the bottle neck probably is
00592 
00593          fHelpVolume = volume;
00594 
00595          UpdateThis(); // necessary update of static pointer
00596          RootFinder rootFinder( &IGetVolumeContentForRoot, 0.01, 50, 200, 10 );
00597          Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
00598 
00599          volume->ScaleInterval( scale );
00600 
00601          fBinaryTree->SearchVolume( volume, &events );
00602 
00603          fHelpVolume = NULL;
00604       }
00605       // -----------------------------------------------------------------------
00606       else {
00607 
00608          // starting values
00609          count = fBinaryTree->SearchVolume( volume );
00610 
00611          Float_t nEventsO = count;
00612          Int_t i_=0;
00613 
00614          while (nEventsO < fNEventsMin) { // this isn't a sain start... try again
00615             volume->ScaleInterval( 1.15 );
00616             count = fBinaryTree->SearchVolume( volume );
00617             nEventsO = count;
00618             i_++;
00619          }
00620          if (i_ > 50) Log() << kWARNING << "warning in event: " << e
00621                             << ": adaptive volume pre-adjustment reached "
00622                             << ">50 iterations in while loop (" << i_ << ")" << Endl;
00623 
00624          Float_t nEventsN    = nEventsO;
00625          Float_t nEventsE    = 0.5*(fNEventsMin + fNEventsMax);
00626          Float_t scaleO      = 1.0;
00627          Float_t scaleN      = fInitialScale;
00628          Float_t scale       = scaleN;
00629          Float_t scaleBest   = scaleN;
00630          Float_t nEventsBest = nEventsN;
00631 
00632          for (Int_t ic=1; ic<fMaxVIterations; ic++) {
00633             if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
00634 
00635                // search for events in rescaled volume
00636                Volume* v = new Volume( *volume );
00637                v->ScaleInterval( scale );
00638                nEventsN  = fBinaryTree->SearchVolume( v );
00639 
00640                // determine next iteration (linear approximation)
00641                if (nEventsN > 1 && nEventsN - nEventsO != 0)
00642                   if (scaleN - scaleO != 0)
00643                      scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
00644                   else
00645                      scale += (-0.01); // should not actually occur...
00646                else
00647                   scale += 0.5; // use much larger volume
00648 
00649                // save old scale
00650                scaleN   = scale;
00651 
00652                // take if better (don't accept it if too small number of events)
00653                if (TMath::Abs(nEventsN - nEventsE) < TMath::Abs(nEventsBest - nEventsE) &&
00654                    (nEventsN >= fNEventsMin || nEventsBest < nEventsN)) {
00655                   nEventsBest = nEventsN;
00656                   scaleBest   = scale;
00657                }
00658 
00659                v->Delete();
00660                delete v;
00661             }
00662             else break;
00663          }
00664 
00665          // last sanity check
00666          nEventsN = nEventsBest;
00667          // include "1" to cover float precision
00668          if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
00669             Log() << kWARNING << "warning in event " << e
00670                   << ": adaptive volume adjustment reached "
00671                   << "max. #iterations (" << fMaxVIterations << ")"
00672                   << "[ nEvents: " << nEventsN << "  " << fNEventsMin << "  " << fNEventsMax << "]"
00673                   << Endl;
00674 
00675          volume->ScaleInterval( scaleBest );
00676          fBinaryTree->SearchVolume( volume, &events );
00677       }
00678 
00679       // end of adaptive method
00680 
00681    } else if (fVRangeMode == kkNN) {
00682       Volume v(*volume);
00683 
00684       events.clear();
00685       // check number of signals in begining volume
00686       Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
00687       //if this number is too large return fkNNMax+1
00688 
00689       Int_t t_times = 0;  // number of iterations
00690 
00691       while ( !(kNNcount >= fkNNMin && kNNcount <= fkNNMax) ) {
00692          if (kNNcount < fkNNMin) {         //if we have too less points
00693             Float_t scale = 2;      //better scale needed
00694             volume->ScaleInterval( scale );
00695          }
00696          else if (kNNcount > fkNNMax) {    //if we have too many points
00697             Float_t scale = 0.1;      //better scale needed
00698             volume->ScaleInterval( scale );
00699          }
00700          events.clear();
00701 
00702          v = *volume ;
00703 
00704          kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );  //new search
00705 
00706          t_times++;
00707 
00708          if (t_times == fMaxVIterations) {
00709             Log() << kWARNING << "warining in event" << e
00710                   << ": kNN volume adjustment reached "
00711                   << "max. #iterations (" << fMaxVIterations << ")"
00712                   << "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
00713             break;
00714          }
00715       }
00716 
00717       //vector to normalize distance in each dimension
00718       Double_t *dim_normalization = new Double_t[GetNvar()];
00719       for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00720          dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
00721       }
00722 
00723       std::vector<const BinarySearchTreeNode*> tempVector;    // temporary vector for events
00724 
00725       if (kNNcount >= fkNNMin) {
00726          std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcount );
00727 
00728          //counting the distance for each event
00729          for (Int_t j=0;j< Int_t(events.size()) ;j++)
00730             (*distances)[j] = GetNormalizedDistance ( e, *events[j], dim_normalization );
00731 
00732          //counting the fkNNMin-th element
00733          std::vector<Double_t>::iterator wsk = distances->begin();
00734          for (Int_t j=0;j<fkNNMin-1;j++) wsk++;
00735          std::nth_element( distances->begin(), wsk, distances->end() );
00736 
00737          //getting all elements that are closer than fkNNMin-th element
00738          //signals
00739          for (Int_t j=0;j<Int_t(events.size());j++) {
00740             Double_t dist = GetNormalizedDistance( e, *events[j], dim_normalization );
00741 
00742             if (dist <= (*distances)[fkNNMin-1])
00743                tempVector.push_back( events[j] );
00744          }
00745          fMax_distance = (*distances)[fkNNMin-1];
00746          delete distances;
00747       }
00748       delete[] dim_normalization;
00749       events = tempVector;
00750 
00751    } else {
00752 
00753       // troubles ahead...
00754       Log() << kFATAL << "<GetSample> unknown RangeMode: " << fVRangeMode << Endl;
00755    }
00756    // -----------------------------------------------------------------------
00757 }
00758 
00759 //_______________________________________________________________________
00760 Double_t TMVA::MethodPDERS::CRScalc( const Event& e )
00761 {
00762    std::vector<const BinarySearchTreeNode*> events;
00763 
00764    // computes event weight by counting number of signal and background
00765    // events (of reference sample) that are found within given volume
00766    // defined by the event
00767    std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
00768    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
00769 
00770    std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
00771    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00772       (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
00773       (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
00774    }
00775 
00776    Volume *volume = new Volume( lb, ub );
00777    
00778    GetSample( e, events, volume );
00779    Double_t count = CKernelEstimate( e, events, *volume );
00780    delete volume;
00781    delete lb;
00782    delete ub;
00783 
00784    return count;
00785 }
00786 
00787 //_______________________________________________________________________
00788 void TMVA::MethodPDERS::RRScalc( const Event& e, std::vector<Float_t>* count )
00789 {
00790    std::vector<const BinarySearchTreeNode*> events;
00791 
00792    // computes event weight by counting number of signal and background
00793    // events (of reference sample) that are found within given volume
00794    // defined by the event
00795    std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
00796    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
00797 
00798    std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
00799    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00800       (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
00801       (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
00802    }
00803    Volume *volume = new Volume( lb, ub );
00804 
00805    GetSample( e, events, volume );
00806    RKernelEstimate( e, events, *volume, count );
00807 
00808    delete volume;
00809 
00810    return;
00811 }
00812 //_______________________________________________________________________
00813 Double_t TMVA::MethodPDERS::CKernelEstimate( const Event & event,
00814                                              std::vector<const BinarySearchTreeNode*>& events, Volume& v )
00815 {
00816    // normalization factors so we can work with radius 1 hyperspheres
00817    Double_t *dim_normalization = new Double_t[GetNvar()];
00818    for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
00819       dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
00820 
00821    Double_t pdfSumS = 0;
00822    Double_t pdfSumB = 0;
00823 
00824    // Iteration over sample points
00825    for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
00826 
00827    // First switch to the one dimensional distance
00828    Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
00829 
00830    // always working within the hyperelipsoid, except for when we don't
00831    // note that rejection ratio goes to 1 as nvar goes to infinity
00832    if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
00833 
00834    if ( (*iev)->IsSignal() )
00835       pdfSumS += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
00836    else
00837       pdfSumB += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
00838    }
00839    pdfSumS = KernelNormalization( pdfSumS < 0. ? 0. : pdfSumS );
00840    pdfSumB = KernelNormalization( pdfSumB < 0. ? 0. : pdfSumB );
00841 
00842    delete[] dim_normalization;
00843 
00844    if (pdfSumS < 1e-20 && pdfSumB < 1e-20) return 0.5;
00845    if (pdfSumB < 1e-20) return 1.0;
00846    if (pdfSumS < 1e-20) return 0.0;
00847 
00848    Float_t r = pdfSumB*fScaleB/(pdfSumS*fScaleS);
00849    return 1.0/(r + 1.0);   // TODO: propagate errors from here
00850 }
00851 
00852 //_______________________________________________________________________
00853 void TMVA::MethodPDERS::RKernelEstimate( const Event & event,
00854                                          std::vector<const BinarySearchTreeNode*>& events, Volume& v,
00855                                          std::vector<Float_t>* pdfSum )
00856 {
00857    // normalization factors so we can work with radius 1 hyperspheres
00858    Double_t *dim_normalization = new Double_t[GetNvar()];
00859    for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
00860       dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
00861 
00862    //   std::vector<Float_t> pdfSum;
00863    pdfSum->clear();
00864    Float_t pdfDiv = 0;
00865     fNRegOut = 1; // for now, regression is just for 1 dimension
00866 
00867    for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
00868       pdfSum->push_back( 0 );
00869 
00870    // Iteration over sample points
00871    for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
00872 
00873       // First switch to the one dimensional distance
00874       Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
00875 
00876       // always working within the hyperelipsoid, except for when we don't
00877       // note that rejection ratio goes to 1 as nvar goes to infinity
00878       if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
00879 
00880       for (Int_t ivar = 0; ivar < fNRegOut ; ivar++) {
00881          pdfSum->at(ivar) += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
00882          pdfDiv           += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
00883       }
00884    }
00885 
00886    delete[] dim_normalization;
00887 
00888    if (pdfDiv == 0)
00889       return;
00890 
00891    for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
00892       pdfSum->at(ivar) /= pdfDiv;
00893 
00894    return;
00895 }
00896 
00897 //_______________________________________________________________________
00898 Double_t TMVA::MethodPDERS::ApplyKernelFunction (Double_t normalized_distance)
00899 {
00900    // from the normalized euclidean distance calculate the distance
00901    // for a certain kernel
00902    switch (fKernelEstimator) {
00903    case kBox:
00904    case kSphere:
00905       return 1;
00906       break;
00907    case kTeepee:
00908       return (1 - normalized_distance);
00909       break;
00910    case kGauss:
00911       return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
00912       break;
00913    case kSinc3:
00914    case kSinc5:
00915    case kSinc7:
00916    case kSinc9:
00917    case kSinc11: {
00918       Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
00919       return NormSinc (side_crossings * normalized_distance);
00920    }
00921       break;
00922    case kLanczos2:
00923       return LanczosFilter (2, normalized_distance);
00924       break;
00925    case kLanczos3:
00926       return LanczosFilter (3, normalized_distance);
00927       break;
00928    case kLanczos5:
00929       return LanczosFilter (5, normalized_distance);
00930       break;
00931    case kLanczos8:
00932       return LanczosFilter (8, normalized_distance);
00933       break;
00934    case kTrim: {
00935       Double_t x = normalized_distance / fMax_distance;
00936       x = 1 - x*x*x;
00937       return x*x*x;
00938    }
00939       break;
00940    default:
00941       Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
00942       break;
00943    }
00944 
00945    return 0;
00946 }
00947       
00948 //_______________________________________________________________________
00949 Double_t TMVA::MethodPDERS::KernelNormalization (Double_t pdf) 
00950 {
00951    // Calculating the normalization factor only once (might need a reset at some point. 
00952    // Can the method be restarted with different params?)
00953 
00954    // Caching jammed to disable function. 
00955    // It's not really useful afterall, badly implemented and untested :-)
00956    static Double_t ret = 1.0; 
00957    
00958    if (ret != 0.0) return ret*pdf; 
00959 
00960    // We first normalize by the volume of the hypersphere.
00961    switch (fKernelEstimator) {
00962    case kBox:
00963    case kSphere:
00964       ret = 1.;
00965       break;
00966    case kTeepee:
00967       ret =   (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
00968          ( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
00969       break;
00970    case kGauss:
00971       // We use full range integral here. Reasonable because of the fast function decay.
00972       ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
00973       break;
00974    case kSinc3:
00975    case kSinc5:
00976    case kSinc7:
00977    case kSinc9:
00978    case kSinc11:
00979    case kLanczos2:
00980    case kLanczos3:
00981    case kLanczos5:
00982    case kLanczos8:
00983       // We use the full range integral here. Reasonable because the central lobe domintes it.
00984       ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
00985       break;
00986    default:
00987       Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
00988    }
00989 
00990    // Normalizing by the full volume
00991    ret *= ( TMath::Power (2., GetNvar()) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
00992       TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
00993 
00994    return ret*pdf;
00995 }
00996 
00997 //_______________________________________________________________________
00998 Double_t TMVA::MethodPDERS::GetNormalizedDistance ( const Event &base_event,
00999                                                     const BinarySearchTreeNode &sample_event,
01000                                                     Double_t *dim_normalization) 
01001 {
01002    // We use Euclidian metric here. Might not be best or most efficient.
01003    Double_t ret=0;
01004    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01005       Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetValue(ivar));
01006       ret += dist*dist;
01007    }
01008    // DD 26.11.2008: bugfix: division by (GetNvar()) was missing for correct normalisation
01009    ret /= GetNvar();
01010    return TMath::Sqrt (ret);
01011 }
01012 
01013 //_______________________________________________________________________
01014 Double_t TMVA::MethodPDERS::NormSinc (Double_t x)
01015 {
01016    // NormSinc
01017    if (x < 10e-10 && x > -10e-10) {
01018       return 1; // Poor man's l'Hopital
01019    }
01020 
01021    Double_t pix = TMath::Pi() * x;
01022    Double_t sinc = TMath::Sin(pix) / pix;
01023    Double_t ret;
01024 
01025    if (GetNvar() % 2)
01026       ret = TMath::Power (sinc, GetNvar());
01027    else
01028       ret = TMath::Abs (sinc) * TMath::Power (sinc, GetNvar() - 1);
01029 
01030    return ret;
01031 }
01032 
01033 //_______________________________________________________________________
01034 Double_t TMVA::MethodPDERS::LanczosFilter (Int_t level, Double_t x)
01035 {
01036    // Lanczos Filter
01037    if (x < 10e-10 && x > -10e-10) {
01038       return 1; // Poor man's l'Hopital
01039    }
01040 
01041    Double_t pix = TMath::Pi() * x;
01042    Double_t pixtimesn = pix * ((Double_t) level);
01043    Double_t lanczos = (TMath::Sin(pix) / pix) * (TMath::Sin(pixtimesn) / pixtimesn);
01044    Double_t ret;
01045 
01046    if (GetNvar() % 2) ret = TMath::Power (lanczos, GetNvar());
01047    else               ret = TMath::Abs (lanczos) * TMath::Power (lanczos, GetNvar() - 1);
01048 
01049    return ret;
01050 }
01051 
01052 //_______________________________________________________________________
01053 Float_t TMVA::MethodPDERS::GetError( Float_t countS, Float_t countB,
01054                                      Float_t sumW2S, Float_t sumW2B ) const
01055 {
01056    // statistical error estimate for RS estimator
01057 
01058    Float_t c = fScaleB/fScaleS;
01059    Float_t d = countS + c*countB; d *= d;
01060 
01061    if (d < 1e-10) return 1; // Error is zero because of B = S = 0
01062 
01063    Float_t f = c*c/d/d;
01064    Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
01065 
01066    if (err < 1e-10) return 1; // Error is zero because of B or S = 0
01067 
01068    return sqrt(err);
01069 }
01070 
01071 //_______________________________________________________________________
01072 void TMVA::MethodPDERS::AddWeightsXMLTo( void* parent ) const
01073 {
01074    // write weights to xml file
01075    void* wght = gTools().AddChild(parent, "Weights");
01076    if (fBinaryTree)
01077       fBinaryTree->AddXMLTo(wght);
01078    else
01079       Log() << kFATAL << "Signal and background binary search tree not available" << Endl;
01080    //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
01081 }
01082 
01083 //_______________________________________________________________________
01084 void TMVA::MethodPDERS::ReadWeightsFromXML( void* wghtnode)
01085 {
01086    if (NULL != fBinaryTree) delete fBinaryTree;
01087    void* treenode = gTools().GetChild(wghtnode);
01088    fBinaryTree = TMVA::BinarySearchTree::CreateFromXML(treenode);
01089    if(!fBinaryTree)
01090       Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
01091    if(!fBinaryTree)
01092       Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
01093    fBinaryTree->SetPeriode( GetNvar() );
01094    fBinaryTree->CalcStatistics();
01095    fBinaryTree->CountNodes();
01096    fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
01097    fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
01098    Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
01099    CalcAverages();
01100    SetVolumeElement();
01101    fInitializedVolumeEle = kTRUE;
01102 }
01103 
01104 //_______________________________________________________________________
01105 void TMVA::MethodPDERS::ReadWeightsFromStream( istream& istr)
01106 {
01107    // read weight info from file
01108    if (NULL != fBinaryTree) delete fBinaryTree;
01109 
01110    fBinaryTree = new BinarySearchTree();
01111 
01112    istr >> *fBinaryTree;
01113 
01114    fBinaryTree->SetPeriode( GetNvar() );
01115 
01116    fBinaryTree->CalcStatistics();
01117 
01118    fBinaryTree->CountNodes();
01119 
01120    // these are the signal and background scales for the weights
01121    fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
01122    fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
01123 
01124    Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
01125 
01126    CalcAverages();
01127 
01128    SetVolumeElement();
01129 
01130    fInitializedVolumeEle = kTRUE;
01131 }
01132 
01133 //_______________________________________________________________________
01134 void TMVA::MethodPDERS::WriteWeightsToStream( TFile& ) const
01135 {
01136    // write training sample (TTree) to file
01137 }
01138 
01139 //_______________________________________________________________________
01140 void TMVA::MethodPDERS::ReadWeightsFromStream( TFile& /*rf*/ )
01141 {
01142    // read training sample from file
01143 }
01144 
01145 //_______________________________________________________________________
01146 TMVA::MethodPDERS* TMVA::MethodPDERS::ThisPDERS( void ) 
01147 { 
01148    // static pointer to this object
01149    return fgThisPDERS; 
01150 }
01151 //_______________________________________________________________________
01152 void TMVA::MethodPDERS::UpdateThis( void ) 
01153 {
01154    // update static this pointer
01155    fgThisPDERS = this; 
01156 }
01157 
01158 //_______________________________________________________________________
01159 void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
01160 {
01161    // write specific classifier response
01162    fout << "   // not implemented for class: \"" << className << "\"" << std::endl;
01163    fout << "};" << std::endl;
01164 }
01165 
01166 //_______________________________________________________________________
01167 void TMVA::MethodPDERS::GetHelpMessage() const
01168 {
01169    // get help message text
01170    //
01171    // typical length of text line: 
01172    //         "|--------------------------------------------------------------|"
01173    Log() << Endl;
01174    Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
01175    Log() << Endl;
01176    Log() << "PDERS is a generalization of the projective likelihood classifier " << Endl;
01177    Log() << "to N dimensions, where N is the number of input variables used." << Endl;
01178    Log() << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
01179    Log() << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
01180    Log() << "were known, this classifier would exploit the full information" << Endl;
01181    Log() << "contained in the input variables, and would hence be optimal. In " << Endl;
01182    Log() << "practice however, huge training samples are necessary to sufficiently " << Endl;
01183    Log() << "populate the multidimensional phase space. " << Endl;
01184    Log() << Endl;
01185    Log() << "The simplest implementation of PDERS counts the number of signal" << Endl;
01186    Log() << "and background events in the vicinity of a test event, and returns" << Endl;
01187    Log() << "a weight according to the majority species of the neighboring events." << Endl;
01188    Log() << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
01189    Log() << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
01190    Log() << Endl;
01191    Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
01192    Log() << Endl;
01193    Log() << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
01194    Log() << "e.g., distinct islands of signal and background regions. Because of " << Endl;
01195    Log() << "the exponential growth of the phase space, it is important to restrict" << Endl;
01196    Log() << "the number of input variables (dimension) to the strictly necessary." << Endl;
01197    Log() << Endl;
01198    Log() << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
01199    Log() << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
01200    Log() << "memory, limits the number of training events that can effectively be " << Endl;
01201    Log() << "used to model the multidimensional PDF." << Endl;
01202    Log() << Endl;
01203    Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
01204    Log() << Endl;
01205    Log() << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
01206    Log() << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
01207    Log() << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
01208    Log() << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
01209    Log() << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
01210 }

Generated on Tue Jul 5 15:25:04 2011 for ROOT_528-00b_version by  doxygen 1.5.1