TKDE.cxx

Go to the documentation of this file.
00001 // // @(#)root/hist:$Id: TKDE.cxx 37563 2010-12-13 13:13:31Z moneta $
00002 // Authors: Bartolomeu Rabacal    07/2010
00003 /**********************************************************************
00004  *                                                                    *
00005  * Copyright (c) 2006 , ROOT MathLib Team                             *
00006  *                                                                    *
00007  * For the licensing terms see $ROOTSYS/LICENSE.                      *
00008  * For the list of contributors see $ROOTSYS/README/CREDITS.          *
00009  *                                                                    *
00010  **********************************************************************/
00011 //////////////////////////////////////////////////////////////////////////////
00012 /*
00013     Kernel Density Estimation class.
00014     The three main references are (1) "Scott DW, Multivariate Density Estimation. Theory, Practice and Visualization. New York: Wiley",
00015     (2) "Jann Ben - ETH Zurich, Switzerland -, Univariate kernel density estimation document for KDENS:
00016          Stata module for univariate kernel density estimation."
00017     (3) "Hardle W, Muller M, Sperlich S, Werwatz A, Nonparametric and Semiparametric Models. Springer."
00018    The algorithm is briefly described in (4) "Cranmer KS, Kernel Estimation in High-Energy
00019    Physics. Computer Physics Communications 136:198-207,2001" - e-Print Archive: hep ex/0011057.
00020    A binned version is also implemented to address the performance issue due to its data size dependance.
00021 */
00022 
00023 
00024 #include <functional>
00025 #include <algorithm>
00026 #include <numeric>
00027 #include <limits>
00028 
00029 #include "Math/Error.h"
00030 #include "TMath.h"
00031 #include "Math/Functor.h"
00032 #include "Math/Integrator.h"
00033 #include "Math/QuantFuncMathCore.h"
00034 #include "Math/RichardsonDerivator.h"
00035 #include "TGraphErrors.h"
00036 #include "TF1.h"
00037 #include "TCanvas.h"
00038 #include "TKDE.h"
00039 
00040 
00041 ClassImp(TKDE)
00042 
00043 class TKDE::TKernel {
00044    TKDE* fKDE;
00045    UInt_t fNWeights; // Number of kernel weights (bandwidth as vectorized for binning)
00046    std::vector<Double_t> fWeights; // Kernel weights (bandwidth)
00047 public:
00048    TKernel(Double_t weight, TKDE* kde);
00049    void ComputeAdaptiveWeights();
00050    Double_t operator()(Double_t x) const;
00051    Double_t GetWeight(Double_t x) const;
00052    Double_t GetFixedWeight() const;
00053    const std::vector<Double_t> & GetAdaptiveWeights() const;
00054 };
00055 
00056 struct TKDE::KernelIntegrand {
00057    enum EIntegralResult{kNorm, kMu, kSigma2, kUnitIntegration};
00058    KernelIntegrand(const TKDE* kde, EIntegralResult intRes);
00059    Double_t operator()(Double_t x) const;
00060 private:
00061    const TKDE* fKDE;
00062    EIntegralResult fIntegralResult;
00063 };
00064 
00065 TKDE::TKDE(UInt_t events, const Double_t* data, Double_t xMin, Double_t xMax, const Option_t* option, Double_t rho) :
00066    fData(events, 0.0),
00067    fEvents(events, 0.0),
00068    fPDF(0),
00069    fUpperPDF(0),
00070    fLowerPDF(0),
00071    fApproximateBias(0),
00072    fGraph(0),
00073    fNewData(false),
00074    fUseMinMaxFromData((xMin >= xMax)),
00075    fNBins(events < 10000 ? 100: events / 10),
00076    fNEvents(events),
00077    fUseBinsNEvents(10000),
00078    fMean(0.0),
00079    fSigma(0.0),
00080    fXMin(xMin),
00081    fXMax(xMax),
00082    fAdaptiveBandwidthFactor(1.0),
00083    fCanonicalBandwidths(std::vector<Double_t>(kTotalKernels, 0.0)),
00084    fKernelSigmas2(std::vector<Double_t>(kTotalKernels, -1.0)),
00085    fSettedOptions(std::vector<Bool_t>(4, kFALSE))
00086 {
00087    //Class constructor
00088    SetOptions(option, rho);
00089    CheckOptions();
00090    SetMirror();
00091    SetUseBins();
00092    SetKernelFunction();
00093    SetData(data);
00094    SetCanonicalBandwidths();
00095    SetKernelSigmas2();
00096    SetKernel();
00097 }
00098 
00099 TKDE::~TKDE() {
00100    //Class destructor
00101    if (fPDF)              delete fPDF;
00102    if (fUpperPDF)         delete fUpperPDF;
00103    if (fLowerPDF)         delete fLowerPDF;
00104    if (fGraph)         delete fGraph;
00105    if (fApproximateBias)  delete fApproximateBias;
00106    delete fKernelFunction;
00107    delete fKernel;
00108 }
00109 
00110 void TKDE::Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t* data, Double_t xMin, Double_t xMax, const Option_t* option, Double_t rho) {
00111    // Template's constructor surrogate
00112    fData = std::vector<Double_t>(events, 0.0);
00113    fEvents = std::vector<Double_t>(events, 0.0);
00114    fPDF = 0;
00115    fUpperPDF = 0;
00116    fLowerPDF = 0;
00117    fApproximateBias = 0;
00118    fNBins = events < 10000 ? 100 : events / 10;
00119    fNEvents = events;
00120    fUseBinsNEvents = 10000;
00121    fMean = 0.0;
00122    fSigma = 0.0;
00123    fXMin = xMin;
00124    fXMax = xMax;
00125    fUseMinMaxFromData = (fXMin >= fXMax);
00126    fAdaptiveBandwidthFactor = 1.;
00127    fCanonicalBandwidths = std::vector<Double_t>(kTotalKernels, 0.0);
00128    fKernelSigmas2 = std::vector<Double_t>(kTotalKernels, -1.0);
00129    fSettedOptions = std::vector<Bool_t>(4, kFALSE);
00130    SetOptions(option, rho);
00131    CheckOptions(kTRUE);
00132    SetMirror();
00133    SetUseBins();
00134    SetKernelFunction(kernfunc);
00135    SetData(data);
00136    SetCanonicalBandwidths();
00137    SetKernelSigmas2();
00138    SetKernel();
00139 }
00140 
00141 void TKDE::SetOptions(const Option_t* option, Double_t rho) {
00142    //Sets User defined construction options
00143    TString opt = option;
00144    opt.ToLower();
00145    std::string options = opt.Data();
00146    size_t numOpt = 4;
00147    std::vector<std::string> voption(numOpt, "");
00148    for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end() && !options.empty(); ++it) {
00149       size_t pos = options.find_last_of(';');
00150       if (pos == std::string::npos) {
00151          *it = options;
00152          break;
00153       }
00154       *it = options.substr(pos + 1);
00155       options = options.substr(0, pos);
00156    }
00157    for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end(); ++it) {
00158       size_t pos = (*it).find(':');
00159       if (pos != std::string::npos) {
00160          GetOptions((*it).substr(0, pos) , (*it).substr(pos + 1));
00161       }
00162    }
00163    AssureOptions();
00164    fRho = rho;
00165 }
00166 
00167 void TKDE::SetDrawOptions(const Option_t* option, TString& plotOpt, TString& drawOpt) {
00168    // Sets User defined drawing options
00169    size_t numOpt = 2;
00170    std::string options = TString(option).Data();
00171    std::vector<std::string> voption(numOpt, "");
00172    for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end() && !options.empty(); ++it) {
00173       size_t pos = options.find_last_of(';');
00174       if (pos == std::string::npos) {
00175          *it = options;
00176          break;
00177       }
00178       *it = options.substr(pos + 1);
00179       options = options.substr(0, pos);
00180    }
00181    Bool_t foundPlotOPt = kFALSE;
00182    Bool_t foundDrawOPt = kFALSE;
00183    for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end() && !options.empty(); ++it) {
00184       size_t pos = (*it).find(':');
00185       if (pos == std::string::npos) break;
00186       TString optionType = (*it).substr(0, pos);
00187       TString optionInstance = (*it).substr(pos + 1);
00188       optionType.ToLower();
00189       optionInstance.ToLower();
00190       if (optionType.Contains("plot")) {
00191          foundPlotOPt = kTRUE;
00192          if (optionInstance.Contains("estimate") || optionInstance.Contains("errors") || optionInstance.Contains("confidenceinterval"))
00193             plotOpt = optionInstance;
00194          else
00195             this->Warning("SetDrawOptions", "Unknown plotting option: setting to KDE estimate plot.");
00196       } else if (optionType.Contains("drawoptions")) {
00197          foundDrawOPt = kTRUE;
00198          drawOpt = optionInstance;
00199       }
00200    }
00201    if (!foundPlotOPt) {
00202       this->Warning("SetDrawOptions", "No plotting option: setting to KDE estimate plot.");
00203       plotOpt = "estimate";
00204    }
00205    if (!foundDrawOPt) {
00206       this->Warning("SetDrawOptions", "No drawing options: setting to default ones.");
00207       drawOpt = "apl4";
00208    }
00209 }
00210 
00211 void TKDE::GetOptions(std::string optionType, std::string option) {
00212    // Gets User defined KDE construction options
00213    if (optionType.compare("kerneltype") == 0) {
00214       fSettedOptions[0] = kTRUE;
00215       if (option.compare("gaussian") == 0) {
00216          fKernelType = kGaussian;
00217       } else if (option.compare("epanechnikov") == 0) {
00218          fKernelType = kEpanechnikov;
00219       } else if (option.compare("biweight") == 0) {
00220          fKernelType = kBiweight;
00221       } else if (option.compare("cosinearch") == 0) {
00222          fKernelType = kCosineArch;
00223       } else if (option.compare("userdefined") == 0) {
00224          fKernelType = kUserDefined;
00225       } else {
00226          this->Warning("GetOptions", "Unknown kernel type option: setting to Gaussian");
00227          fKernelType = kGaussian;
00228       }
00229    } else if (optionType.compare("iteration") == 0) {
00230       fSettedOptions[1] = kTRUE;
00231       if (option.compare("adaptive") == 0) {
00232          fIteration = kAdaptive;
00233       } else if (option.compare("fixed") == 0) {
00234          fIteration = kFixed;
00235       } else {
00236          this->Warning("GetOptions", "Unknown iteration option: setting to Adaptive");
00237          fIteration = kAdaptive;
00238       }
00239    } else if (optionType.compare("mirror") == 0) {
00240       fSettedOptions[2] = kTRUE;
00241       if (option.compare("nomirror") == 0) {
00242          fMirror = kNoMirror;
00243       } else if (option.compare("mirrorleft") == 0) {
00244          fMirror = kMirrorLeft;
00245       } else if (option.compare("mirrorright") == 0) {
00246          fMirror = kMirrorRight;
00247       } else if (option.compare("mirrorboth") == 0) {
00248          fMirror = kMirrorBoth;
00249       } else if (option.compare("mirrorasymleft") == 0) {
00250          fMirror = kMirrorAsymLeft;
00251       } else if (option.compare("mirrorasymleftright") == 0) {
00252          fMirror = kMirrorAsymLeftRight;
00253       } else if (option.compare("mirrorasymright") == 0) {
00254          fMirror = kMirrorAsymRight;
00255       } else if (option.compare("mirrorleftasymright") == 0) {
00256          fMirror = kMirrorLeftAsymRight;
00257       } else if (option.compare("mirrorasymboth") == 0) {
00258          fMirror = kMirrorAsymBoth;
00259       } else {
00260          this->Warning("GetOptions", "Unknown mirror option: setting to NoMirror");
00261          fMirror = kNoMirror;
00262       }
00263    } else if (optionType.compare("binning") == 0) {
00264       fSettedOptions[3] = kTRUE;
00265       if (option.compare("unbinned") == 0) {
00266          fBinning = kUnbinned;
00267       } else if (option.compare("relaxedbinning") == 0) {
00268          fBinning = kRelaxedBinning;
00269       } else if (option.compare("forcedbinning") == 0) {
00270          fBinning = kForcedBinning;
00271       } else {
00272          this->Warning("GetOptions", "Unknown binning option: setting to RelaxedBinning");
00273          fBinning = kRelaxedBinning;
00274       }
00275    }
00276 }
00277 
00278 void TKDE::AssureOptions() {
00279    // Sets missing construction options to default ones
00280    if (!fSettedOptions[0]) {
00281       fKernelType = kGaussian;
00282    }
00283    if (!fSettedOptions[1]) {
00284       fIteration = kAdaptive;
00285    }
00286    if (!fSettedOptions[2]) {
00287       fMirror = kNoMirror;
00288    }
00289    if (!fSettedOptions[3]) {
00290       fBinning = kRelaxedBinning;
00291    }
00292 }
00293 
00294 void TKDE::CheckOptions(Bool_t isUserDefinedKernel) {
00295    // Sets User global options
00296    if (!(isUserDefinedKernel) && !(fKernelType >= kGaussian && fKernelType < kUserDefined)) {
00297       Error("CheckOptions", "Illegal user kernel type input! Use template constructor for user defined kernel.");
00298    }
00299    if (fIteration != kAdaptive && fIteration != kFixed) {
00300       Warning("CheckOptions", "Illegal user iteration type input - use default value !");
00301       fIteration = kAdaptive;
00302    }
00303    if (!(fMirror >= kNoMirror && fMirror <= kMirrorAsymBoth)) {
00304       Warning("CheckOptions", "Illegal user mirroring type input - use default value !");
00305       fMirror = kNoMirror;
00306    }
00307    if (!(fBinning >= kUnbinned && fBinning <= kForcedBinning)) {
00308       Warning("CheckOptions", "Illegal user binning type input - use default value !");
00309       fBinning = kRelaxedBinning;
00310    }
00311    if (fRho <= 0.0) {
00312       Warning("CheckOptions", "Tuning factor rho cannot be non-positive - use default value !");
00313       fRho = 1.0;
00314    }
00315 }
00316 
00317 void TKDE::SetKernelType(EKernelType kern) {
00318    // Sets User option for the choice of kernel estimator
00319    fKernelType = kern;
00320    CheckOptions();
00321    SetKernel();
00322 }
00323 
00324 void TKDE::SetIteration(EIteration iter) {
00325    // Sets User option for fixed or adaptive iteration
00326    fIteration = iter;
00327    CheckOptions();
00328    SetKernel();
00329 }
00330 
00331 
00332 void TKDE::SetMirror(EMirror mir) {
00333    // Sets User option for mirroring the data
00334    fMirror = mir;
00335    CheckOptions();
00336    SetMirror();
00337    if (fUseMirroring) {
00338       SetMirroredEvents();
00339    }
00340    SetKernel();
00341 }
00342 
00343 
00344 void TKDE::SetBinning(EBinning bin) {
00345    // Sets User option for binning the weights
00346    fBinning = bin;
00347    CheckOptions();
00348    SetUseBins();
00349    SetKernel();
00350 }
00351 
00352 void TKDE::SetNBins(UInt_t nbins) {
00353    // Sets User option for number of bins
00354    if (!nbins) {
00355       Error("SetNBins", "Number of bins must be greater than zero.");
00356       return;
00357    }
00358    fNBins = nbins;
00359    fWeightSize = fNBins / (fXMax - fXMin);
00360    SetBinCentreData(fXMin, fXMax);
00361    SetBinCountData();
00362 
00363    if (fBinning == kUnbinned) {
00364       Warning("SetNBins", "Bin type using SetBinning must set for using a binned evaluation");
00365       return;
00366    }
00367 
00368    SetKernel();
00369 }
00370 
00371 void TKDE::SetUseBinsNEvents(UInt_t nEvents) {
00372    // Sets User option for the minimum number of events for allowing automatic binning
00373    fUseBinsNEvents = nEvents;
00374    SetUseBins();
00375    SetKernel();
00376 }
00377 
00378 void TKDE::SetTuneFactor(Double_t rho) {
00379    // Factor which can be used to tune the smoothing.
00380    // It is used as multiplicative factor for the fixed and adaptive bandwidth.
00381    // A value < 1 will reproduce better the tails but oversmooth the peak
00382    // while a factor > 1 will overestimate the tail
00383    fRho = rho;
00384    CheckOptions();
00385    SetKernel();
00386 }
00387 
00388 void TKDE::SetRange(Double_t xMin, Double_t xMax) {
00389    // Sets minimum range value and maximum range value
00390    if (xMin >= xMax) {
00391       Error("SetRange", "Minimum range cannot be bigger or equal than the maximum range! Present range values remain the same.");
00392       return;
00393    }
00394    fXMin = xMin;
00395    fXMax = xMax;
00396    fUseMinMaxFromData = false;
00397    SetKernel();
00398 }
00399 
00400 // private methods
00401 
00402 void TKDE::SetUseBins() {
00403    // Sets User option for using binned weights
00404    switch (fBinning) {
00405       default:
00406       case kRelaxedBinning:
00407          if (fNEvents >= fUseBinsNEvents) {
00408             fUseBins = kTRUE;
00409          } else {
00410             fUseBins = kFALSE;
00411          }
00412          break;
00413       case kForcedBinning:
00414          fUseBins = kTRUE;
00415          break;
00416       case kUnbinned:
00417          fUseBins = kFALSE;
00418    }
00419 }
00420 
00421 void TKDE::SetMirror() {
00422    // Sets the mirroring
00423    fMirrorLeft   = fMirror == kMirrorLeft      || fMirror == kMirrorBoth          || fMirror == kMirrorLeftAsymRight;
00424    fMirrorRight  = fMirror == kMirrorRight     || fMirror == kMirrorBoth          || fMirror == kMirrorAsymLeftRight;
00425    fAsymLeft     = fMirror == kMirrorAsymLeft  || fMirror == kMirrorAsymLeftRight || fMirror == kMirrorAsymBoth;
00426    fAsymRight    = fMirror == kMirrorAsymRight || fMirror == kMirrorLeftAsymRight || fMirror == kMirrorAsymBoth;
00427    fUseMirroring = fMirrorLeft                 || fMirrorRight ;
00428 }
00429 
00430 void TKDE::SetData(const Double_t* data) {
00431    // Sets the data events input sample or bin centres for binned option and computes basic estimators
00432    if (!data) {
00433       if (fNEvents) fData.reserve(fNEvents);
00434       return;
00435    }
00436    fEvents.assign(data, data + fNEvents);
00437    if (fUseMinMaxFromData) {
00438       fXMin = *std::min_element(fEvents.begin(), fEvents.end());
00439       fXMax = *std::max_element(fEvents.begin(), fEvents.end());
00440    }
00441    Double_t midspread = ComputeMidspread();
00442    SetMean();
00443    SetSigma(midspread);
00444    if (fUseBins) {
00445       if (fNBins >= fNEvents) {
00446          this->Warning("SetData", "Default number of bins is greater or equal to number of events. Use SetNBins(UInt_t) to set the appropriate number of bins");
00447       }
00448       fWeightSize = fNBins / (fXMax - fXMin);
00449       SetBinCentreData(fXMin, fXMax);
00450       SetBinCountData();
00451    } else {
00452       fWeightSize = fNEvents / (fXMax - fXMin);
00453       fData = fEvents;
00454    }
00455    if (fUseMirroring) {
00456       SetMirroredEvents();
00457    }
00458 }
00459 
00460 void TKDE::InitFromNewData() {
00461    // re-initialize when new data have been filled in TKDE
00462    // re-compute kernel quantities and mean and sigma
00463    fNewData = false;
00464    fEvents = fData;
00465    if (fUseMinMaxFromData) {
00466       fXMin = *std::min_element(fEvents.begin(), fEvents.end());
00467       fXMax = *std::max_element(fEvents.begin(), fEvents.end());
00468    }
00469    Double_t midspread = ComputeMidspread();
00470    SetMean();
00471    SetSigma(midspread);
00472 //    if (fUseBins) {
00473 // } // bin usage is not supported in this case
00474 //
00475    fWeightSize = fNEvents / (fXMax - fXMin);
00476    if (fUseMirroring) {
00477       SetMirroredEvents();
00478    }
00479    SetKernel();
00480 }
00481 
00482 void TKDE::SetMirroredEvents() {
00483    // Mirrors the data
00484    std::vector<Double_t> originalEvents = fEvents;
00485    if (fMirrorLeft) {
00486       fEvents.resize(2 * fNEvents, 0.0);
00487       transform(fEvents.begin(), fEvents.begin() + fNEvents, fEvents.begin() + fNEvents, std::bind1st(std::minus<Double_t>(), 2 * fXMin));
00488    }
00489    if (fMirrorRight) {
00490       fEvents.resize((fMirrorLeft + 2) * fNEvents, 0.0);
00491       transform(fEvents.begin(), fEvents.begin() + fNEvents, fEvents.begin() + (fMirrorLeft + 1) * fNEvents, std::bind1st(std::minus<Double_t>(), 2 * fXMax));
00492    }
00493    if(fUseBins) {
00494       fNBins *= (fMirrorLeft + fMirrorRight + 1);
00495       Double_t xmin = fMirrorLeft  ? 2 * fXMin - fXMax : fXMin;
00496       Double_t xmax = fMirrorRight ? 2 * fXMax - fXMin : fXMax;
00497       SetBinCentreData(xmin, xmax);
00498       SetBinCountData();
00499    } else {
00500       fData = fEvents;
00501    }
00502    fEvents = originalEvents;
00503 }
00504 
00505 void TKDE::SetMean() {
00506    // Computes input data's mean
00507    fMean = std::accumulate(fEvents.begin(), fEvents.end(), 0.0) / fEvents.size();
00508 }
00509 
00510 void TKDE::SetSigma(Double_t R) {
00511    // Computes input data's sigma
00512    fSigma = std::sqrt(1. / (fEvents.size() - 1.) * (std::inner_product(fEvents.begin(), fEvents.end(), fEvents.begin(), 0.0) - fEvents.size() * std::pow(fMean, 2.)));
00513    fSigmaRob = std::min(fSigma, R / 1.349); // Sigma's robust estimator
00514 }
00515 
00516 void TKDE::SetKernel() {
00517    // Sets the kernel density estimator
00518    UInt_t n = fData.size();
00519    if (n == 0) return;
00520    // Optimal bandwidth (Silverman's rule of thumb with assumed Gaussian density)
00521    Double_t weight(fCanonicalBandwidths[kGaussian] * fSigmaRob * std::pow(3. / (8. * std::sqrt(M_PI)) * n, -0.2));
00522    weight *= fRho * fCanonicalBandwidths[fKernelType] / fCanonicalBandwidths[kGaussian];
00523    fKernel = new TKernel(weight, this);
00524    if (fIteration == kAdaptive) {
00525       fKernel->ComputeAdaptiveWeights();
00526    }
00527 }
00528 
00529 void TKDE::SetKernelFunction(KernelFunction_Ptr kernfunc) {
00530    // Sets kernel estimator
00531    switch (fKernelType) {
00532       case kGaussian :
00533          fKernelFunction = new ROOT::Math::WrappedMemFunction<TKDE, Double_t (TKDE::*)(Double_t) const>(*this, &TKDE::GaussianKernel);
00534          break;
00535       case kEpanechnikov :
00536          fKernelFunction = new ROOT::Math::WrappedMemFunction<TKDE, Double_t (TKDE::*)(Double_t) const>(*this, &TKDE::EpanechnikovKernel);
00537          break;
00538       case kBiweight :
00539          fKernelFunction = new ROOT::Math::WrappedMemFunction<TKDE, Double_t (TKDE::*)(Double_t) const>(*this, &TKDE::BiweightKernel);
00540          break;
00541       case kCosineArch :
00542          fKernelFunction = new ROOT::Math::WrappedMemFunction<TKDE, Double_t (TKDE::*)(Double_t) const>(*this, &TKDE::CosineArchKernel);
00543          break;
00544       case kUserDefined :
00545       case kTotalKernels :
00546       default:
00547          fKernelFunction = kernfunc;
00548          if (fKernelFunction) {
00549             CheckKernelValidity();
00550             SetCanonicalBandwidth();
00551             SetKernelSigma2();
00552             SetKernel();
00553          } else {
00554             Error("SetKernelFunction", "Undefined user kernel function input!");
00555             //exit(EXIT_FAILURE);
00556          }
00557    }
00558 }
00559 
00560 void TKDE::SetCanonicalBandwidths() {
00561    // Sets the canonical bandwidths according to the kernel type
00562    fCanonicalBandwidths[kGaussian] = 0.7764;     // Checked in Mathematica
00563    fCanonicalBandwidths[kEpanechnikov] = 1.7188; // Checked in Mathematica
00564    fCanonicalBandwidths[kBiweight] = 2.03617;    // Checked in Mathematica
00565    fCanonicalBandwidths[kCosineArch] = 1.7663;   // Checked in Mathematica
00566 }
00567 
00568 void TKDE::SetKernelSigmas2() {
00569    // Sets the kernel sigmas2 according to the kernel type
00570    fKernelSigmas2[kGaussian] = 1.0;
00571    fKernelSigmas2[kEpanechnikov] = 1.0 / 5.0;
00572    fKernelSigmas2[kBiweight] = 1.0 / 7.0;
00573    fKernelSigmas2[kCosineArch] = 1.0 - 8.0 / std::pow(M_PI, 2);
00574 }
00575 
00576 TF1* TKDE::GetFunction(UInt_t npx, Double_t xMin, Double_t xMax) {
00577    // Returns the PDF estimate as a function sampled in npx between xMin and xMax
00578    // the KDE is not re-normalized to the xMin/xMax range.
00579    // The user manages the returned function
00580    // For getting a non-sampled TF1, one can create a TF1 directly from the TKDE by doing
00581    // TF1 * f1  = new TF1("f1",kde,xMin,xMax,0);
00582    return GetKDEFunction(npx,xMin,xMax);
00583 }
00584 
00585 TF1* TKDE::GetUpperFunction(Double_t confidenceLevel, UInt_t npx, Double_t xMin, Double_t xMax) {
00586     // Returns the PDF upper estimate (upper confidence interval limit)
00587     return GetPDFUpperConfidenceInterval(confidenceLevel,npx,xMin,xMax);
00588 }
00589 
00590 TF1* TKDE::GetLowerFunction(Double_t confidenceLevel, UInt_t npx, Double_t xMin, Double_t xMax) {
00591     // Returns the PDF lower estimate (lower confidence interval limit)
00592     return GetPDFLowerConfidenceInterval(confidenceLevel,npx,xMin,xMax);
00593 }
00594 
00595 TF1* TKDE::GetApproximateBias(UInt_t npx, Double_t xMin, Double_t xMax) {
00596    // Returns the PDF estimate bias
00597    return GetKDEApproximateBias(npx,xMin,xMax);
00598 }
00599 
00600 void TKDE::Fill(Double_t data) {
00601    // Fills data member with User input data event for the unbinned option
00602    if (fUseBins) {
00603       this->Warning("Fill", "Cannot fill data with data binned option. Data input ignored.");
00604       return;
00605    }
00606    fData.push_back(data);
00607    fNEvents++;
00608    fNewData = kTRUE;
00609 }
00610 
00611 Double_t TKDE::operator()(const Double_t* x, const Double_t*) const {
00612    // The class's unary function: returns the kernel density estimate
00613    return (*this)(*x);
00614 }
00615 
00616 Double_t TKDE::operator()(Double_t x) const {
00617    // The class's unary function: returns the kernel density estimate
00618    if (fNewData) (const_cast<TKDE*>(this))->InitFromNewData();
00619    return (*fKernel)(x);
00620 }
00621 
00622 Double_t TKDE::GetMean() const {
00623    // return the mean of the data
00624    if (fNewData) (const_cast<TKDE*>(this))->InitFromNewData();
00625    return fMean;
00626 }
00627 
00628 Double_t TKDE::GetSigma() const {
00629    // return the standard deviation  of the data
00630    if (fNewData) (const_cast<TKDE*>(this))->InitFromNewData();
00631    return fSigma;
00632 }
00633 
00634 Double_t TKDE::GetRAMISE() const {
00635    // Returns the Root Asymptotic Mean Integrated Squared Error according to Silverman's rule of thumb with assumed Gaussian density
00636    Double_t result = 5. / 4. * fKernelSigmas2[fKernelType] * std::pow(fCanonicalBandwidths[fKernelType], 4) * std::pow(3. / (8. * std::sqrt(M_PI)) , -0.2) * fSigmaRob * std::pow(fNEvents, -0.8);
00637    return std::sqrt(result);
00638 }
00639 
00640 TKDE::TKernel::TKernel(Double_t weight, TKDE* kde) :
00641    // Internal class constructor
00642    fKDE(kde),
00643    fNWeights(kde->fData.size()),
00644    fWeights(fNWeights, weight)
00645 {}
00646 
00647 void TKDE::TKernel::ComputeAdaptiveWeights() {
00648    // Gets the adaptive weights (bandwidths) for TKernel internal computation
00649    std::vector<Double_t> weights = fWeights;
00650    std::vector<Double_t>::iterator weight = weights.begin();
00651    Double_t minWeight = *weight * 0.05;
00652    std::vector<Double_t>::iterator data = fKDE->fData.begin();
00653    Double_t f = 0.0;
00654    for (; weight != weights.end(); ++weight, ++data) {
00655       f = (*fKDE->fKernel)(*data);
00656       *weight = std::max(*weight /= std::sqrt(f), minWeight);
00657       fKDE->fAdaptiveBandwidthFactor += std::log(f);
00658    }
00659    Double_t kAPPROX_GEO_MEAN = 0.241970724519143365; // 1 / TMath::Power(2 * TMath::Pi(), .5) * TMath::Exp(-.5). Approximated geometric mean over pointwise data (the KDE function is substituted by the "real Gaussian" pdf) and proportional to sigma. Used directly when the mirroring is enabled, otherwise computed from the data
00660    fKDE->fAdaptiveBandwidthFactor = fKDE->fUseMirroring ? kAPPROX_GEO_MEAN / fKDE->fSigmaRob : std::sqrt(std::exp(fKDE->fAdaptiveBandwidthFactor / fKDE->fData.size()));
00661    transform(weights.begin(), weights.end(), fWeights.begin(), std::bind2nd(std::multiplies<Double_t>(), fKDE->fAdaptiveBandwidthFactor));
00662  }
00663 
00664 Double_t TKDE::TKernel::GetWeight(Double_t x) const {
00665    // Returns the bandwidth
00666    return fWeights[fKDE->Index(x)];
00667 }
00668 
00669 void TKDE::SetBinCentreData(Double_t xmin, Double_t xmax) {
00670    // Returns the bins' centres from the data for using with the binned option
00671    fData.assign(fNBins, 0.0);
00672    Double_t binWidth((xmax - xmin) / fNBins);
00673    for (UInt_t i = 0; i < fNBins; ++i) {
00674       fData[i] = xmin + (i + 0.5) * binWidth;
00675    }
00676 }
00677 
00678 void TKDE::SetBinCountData() {
00679    // Returns the bins' count from the data for using with the binned option
00680    fBinCount.resize(fNBins);
00681    for (UInt_t i = 0; i < fNEvents; ++i) {
00682       if (fEvents[i] >= fXMin && fEvents[i] < fXMax)
00683          fBinCount[Index(fEvents[i])]++;
00684    }
00685 }
00686 
00687 void TKDE::Draw(const Option_t* opt) {
00688    // Draws either the KDE functions or its errors
00689    // Possible options: 
00690    //                    ""  (default) - draw just the kde
00691    //                    "same" draw on top of existing pad
00692    //                    "Errors" draw a TGraphErrors with the point and errors
00693    //                    "confidenceinterval" draw KDE + conf interval functions (default is 95%)
00694    //                    "confidenceinterval@0.90" draw KDE + conf interval functions at 90%
00695    //                      Extra options can be passed in opt for drawing the TF1 or the TGraph 
00696    //
00697    //NOTE:  The functions GetDrawnFunction(), GetDrawnUpperFunction(), GetDrawnLowerFunction() 
00698    //  and GetGraphWithErrors() return the corresponding drawn objects (which are maneged by the TKDE) 
00699    // They can be used to changes style, color, etc...
00700 
00701    // TString plotOpt = "";
00702    // TString drawOpt = "";
00703    // LM : this is too complicates - skip it - not needed for just 
00704    // three options
00705    // SetDrawOptions(opt, plotOpt, drawOpt);
00706    TString plotOpt = opt; 
00707    plotOpt.ToLower();
00708    TString drawOpt = plotOpt;
00709    if(gPad && !plotOpt.Contains("same")) {
00710       gPad->Clear();
00711    }
00712    if (plotOpt.Contains("errors"))  {
00713       drawOpt.ReplaceAll("errors","");
00714       DrawErrors(drawOpt);
00715    }
00716    else if (plotOpt.Contains("confidenceinterval") || 
00717             plotOpt.Contains("confinterval")) {
00718       // parse level option
00719       drawOpt.ReplaceAll("confidenceinterval","");
00720       drawOpt.ReplaceAll("confinterval","");
00721       Double_t level = 0.95;
00722       const char * s = strstr(plotOpt.Data(),"interval@");
00723       // coverity [secure_coding : FALSE]
00724       if (s != 0) sscanf(s,"interval@%lf",&level);
00725       if((level <= 0) || (level >= 1)) {
00726          Warning("Draw","given confidence level %.3lf is invalid - use default 0.95",level);
00727          level = 0.95;
00728       }
00729       DrawConfidenceInterval(drawOpt,level);
00730    }
00731    else {
00732       if (fPDF) delete fPDF; 
00733       fPDF = GetKDEFunction();
00734       fPDF->Draw(drawOpt);
00735    }
00736 }
00737 
00738 void TKDE::DrawErrors(TString& drawOpt) {
00739    // Draws a TGraphErrors for the KDE errors
00740    if (fGraph) delete fGraph;
00741    fGraph = GetGraphWithErrors();
00742    fGraph->Draw(drawOpt.Data());
00743 }
00744 
00745 TGraphErrors* TKDE::GetGraphWithErrors(UInt_t npx, double xmin, double xmax) {
00746    if (xmin>= xmax) { xmin = fXMin; xmax = fXMax; }
00747    // return a TGraphErrors for the KDE errors
00748    UInt_t n = npx;
00749    Double_t* x = new Double_t[n + 1];
00750    Double_t* ex = new Double_t[n + 1];
00751    Double_t* y = new Double_t[n + 1];
00752    Double_t* ey = new Double_t[n + 1];
00753    for (UInt_t i = 0; i <= n; ++i) {
00754       x[i] = xmin + i * (xmax - xmin) / n;
00755       y[i] = (*this)(x[i]);
00756       ey[i] = this->GetError(x[i]);
00757    }
00758    TGraphErrors* ge = new TGraphErrors(n, &x[0], &y[0], &ex[0], &ey[0]);
00759    ge->SetName("kde_graph_error");
00760    ge->SetTitle("Errors");
00761    delete [] x;
00762    delete [] ex;
00763    delete [] y;
00764    delete [] ey;
00765    return ge;
00766 }
00767 
00768 void TKDE::DrawConfidenceInterval(TString& drawOpt,double cl) {
00769    // Draws the KDE and its confidence interval
00770    GetKDEFunction()->Draw(drawOpt.Data());
00771    TF1* upper = GetPDFUpperConfidenceInterval(cl);
00772    upper->SetLineColor(kBlue);
00773    upper->Draw(("same" + drawOpt).Data());
00774    TF1* lower = GetPDFLowerConfidenceInterval(cl);
00775    lower->SetLineColor(kRed);
00776    lower->Draw(("same" + drawOpt).Data());
00777    if (fUpperPDF) delete fUpperPDF;
00778    if (fLowerPDF) delete fLowerPDF;
00779    fUpperPDF = upper;
00780    fLowerPDF = lower;
00781 }
00782 
00783 Double_t TKDE::GetFixedWeight() const {
00784    // Returns the bandwidth for the non adaptive KDE
00785    Double_t result = -1.;
00786    if (fIteration == TKDE::kAdaptive) {
00787       this->Warning("GetFixedWeight()", "Fixed iteration option not enabled. Returning %f.", result);
00788    } else {
00789       result = fKernel->GetFixedWeight();
00790    }
00791    return result;
00792 }
00793 
00794 const Double_t *  TKDE::GetAdaptiveWeights() const {
00795    // Returns the bandwidths for the adaptive KDE
00796    if (fIteration != TKDE::kAdaptive) {
00797       this->Warning("GetFixedWeight()", "Adaptive iteration option not enabled. Returning a NULL pointer<");
00798       return 0;
00799    }
00800    if (fNewData) (const_cast<TKDE*>(this))->InitFromNewData();
00801    return &(fKernel->GetAdaptiveWeights()).front();
00802 }
00803 
00804 Double_t TKDE::TKernel::GetFixedWeight() const {
00805    // Returns the bandwidth for the non adaptive KDE
00806    return fWeights[0];
00807 }
00808 
00809 const std::vector<Double_t> & TKDE::TKernel::GetAdaptiveWeights() const {
00810    // Returns the bandwidth for the non adaptive KDE
00811    return fWeights;
00812 }
00813 
00814 Double_t TKDE::TKernel::operator()(Double_t x) const {
00815    // The internal class's unary function: returns the kernel density estimate
00816    Double_t result(0.0);
00817    UInt_t n = fKDE->fData.size();
00818    Bool_t useBins = (fKDE->fBinCount.size() == n);
00819    for (UInt_t i = 0; i < n; ++i) {
00820       Double_t binCount = (useBins) ? fKDE->fBinCount[i] : 1.0;
00821       result += binCount / fWeights[i] * (*fKDE->fKernelFunction)((x - fKDE->fData[i]) / fWeights[i]);
00822       if (fKDE->fAsymLeft) {
00823          result -= binCount / fWeights[i] * (*fKDE->fKernelFunction)((x - (2. * fKDE->fXMin - fKDE->fData[i])) / fWeights[i]);
00824       }
00825       if (fKDE->fAsymRight) {
00826          result -= binCount / fWeights[i] * (*fKDE->fKernelFunction)((x - (2. * fKDE->fXMax - fKDE->fData[i])) / fWeights[i]);
00827       }
00828    }
00829    return result / fKDE->fNEvents;
00830 }
00831 
00832 UInt_t TKDE::Index(Double_t x) const {
00833    // Returns the indices (bins) for the binned weights
00834    Int_t bin = Int_t((x - fXMin) * fWeightSize);
00835    if (bin == (Int_t)fData.size()) return --bin;
00836    if (fUseMirroring && (fMirrorLeft || !fMirrorRight)) {
00837       bin += fData.size() / (fMirrorLeft + fMirrorRight + 1);
00838    }
00839    if (bin > (Int_t)fData.size()) {
00840       return (Int_t)(fData.size()) - 1;
00841    } else if (bin <= 0) {
00842       return 0;
00843    }
00844    return bin;
00845 }
00846 
00847 Double_t TKDE::UpperConfidenceInterval(const Double_t* x, const Double_t* p) const {
00848    // Returns the pointwise upper estimated density
00849    Double_t f = (*this)(x);
00850    Double_t sigma = GetError(*x);
00851    Double_t prob = 1. - (1.-*p)/2;   // this is 1.-alpha/2
00852    Double_t z = ROOT::Math::normal_quantile(prob, 1.0);
00853    return f + z * sigma;
00854 }
00855 
00856 Double_t TKDE::LowerConfidenceInterval(const Double_t* x, const Double_t* p) const {
00857    // Returns the pointwise lower estimated density
00858    Double_t f = (*this)(x);
00859    Double_t sigma = GetError(*x);
00860    Double_t prob = (1.-*p)/2;    // this is alpha/2
00861    Double_t z = ROOT::Math::normal_quantile(prob, 1.0);
00862    return f + z * sigma;
00863 }
00864 
00865 
00866 Double_t TKDE::GetBias(Double_t x) const {
00867    // Returns the pointwise approximate estimated density bias
00868    ROOT::Math::WrappedFunction<const TKDE&> kern(*this);
00869    ROOT::Math::RichardsonDerivator rd;
00870    rd.SetFunction(kern);
00871    Double_t df2 = rd.Derivative2(x);
00872    Double_t weight = fKernel->GetWeight(x); // Bandwidth
00873    return  0.5 * fKernelSigmas2[fKernelType] * std::pow(weight, 2) * df2;
00874 }
00875 Double_t TKDE::GetError(Double_t x) const {
00876    // Returns the pointwise sigma of estimated density
00877    Double_t kernelL2Norm = ComputeKernelL2Norm();
00878    Double_t f = (*this)(x);
00879    Double_t weight = fKernel->GetWeight(x); // Bandwidth
00880    Double_t result = f * kernelL2Norm / (fNEvents * weight);
00881    return std::sqrt(result);
00882 }
00883 
00884 void TKDE::CheckKernelValidity() {
00885    // Checks if kernel has unit integral, mu = 0 and positive finite sigma conditions
00886    Double_t valid = kTRUE;
00887    Double_t unity = ComputeKernelIntegral();
00888    valid = valid && unity == 1.;
00889    if (!valid) {
00890       Error("CheckKernelValidity", "Kernel's integral is %f",unity);
00891    }
00892    Double_t mu = ComputeKernelMu();
00893    valid = valid && mu == 0.;
00894    if (!valid) {
00895       Error("CheckKernelValidity", "Kernel's mu is %f" ,mu);
00896    }
00897    Double_t sigma2 = ComputeKernelSigma2();
00898    valid = valid && sigma2 > 0 && sigma2 != std::numeric_limits<Double_t>::infinity();
00899    if (!valid) {
00900       Error("CheckKernelValidity", "Kernel's sigma2 is %f",sigma2);
00901    }
00902    if (!valid) {
00903       Error("CheckKernelValidity", "Validation conditions: the kernel's integral must be 1, the kernel's mu must be zero and the kernel's sigma2 must be finite positive to be a suitable kernel.");
00904       //exit(EXIT_FAILURE);
00905    }
00906 }
00907 
00908 Double_t TKDE::ComputeKernelL2Norm() const {
00909    // Computes the kernel's L2 norm
00910    ROOT::Math::IntegratorOneDim ig(ROOT::Math::IntegrationOneDim::kGAUSS);
00911    KernelIntegrand kernel(this, TKDE::KernelIntegrand::kNorm);
00912    ig.SetFunction(kernel);
00913    Double_t result = ig.Integral();
00914    return result;
00915 }
00916 
00917 Double_t TKDE::ComputeKernelSigma2() const {
00918    // Computes the kernel's sigma squared
00919    ROOT::Math::IntegratorOneDim ig( ROOT::Math::IntegrationOneDim::kGAUSS);
00920    KernelIntegrand kernel(this, TKDE::KernelIntegrand::kSigma2);
00921    ig.SetFunction(kernel);
00922    Double_t result = ig.Integral();
00923    return result;
00924 }
00925 
00926 Double_t TKDE::ComputeKernelMu() const {
00927    // Computes the kernel's mu
00928    ROOT::Math::IntegratorOneDim ig(ROOT::Math::IntegrationOneDim::kGAUSS);
00929    KernelIntegrand kernel(this, TKDE::KernelIntegrand::kMu);
00930    ig.SetFunction(kernel);
00931    Double_t result = ig.Integral();
00932    return result;
00933 }
00934 
00935 Double_t TKDE::ComputeKernelIntegral() const {
00936    // Computes the kernel's integral which ought to be unity
00937    ROOT::Math::IntegratorOneDim ig(ROOT::Math::IntegrationOneDim::kGAUSS);
00938    KernelIntegrand kernel(this, TKDE::KernelIntegrand::kUnitIntegration);
00939    ig.SetFunction(kernel);
00940    Double_t result = ig.Integral();
00941    return result;
00942 }
00943 
00944 Double_t TKDE::ComputeMidspread () {
00945    // Computes the inter-quartile range from the data
00946    std::sort(fEvents.begin(), fEvents.end());
00947    Double_t quantiles[2] = {0.0, 0.0};
00948    Double_t prob[2] = {0.25, 0.75};
00949    TMath::Quantiles(fEvents.size(), 2, &fEvents[0], quantiles, prob);
00950    Double_t lowerquartile = quantiles[0];
00951    Double_t upperquartile = quantiles[1];
00952    return upperquartile - lowerquartile;
00953 }
00954 
00955 void TKDE::SetCanonicalBandwidth() {
00956    // Computes the user's input kernel function canonical bandwidth
00957    fCanonicalBandwidths[kUserDefined] = std::pow(ComputeKernelL2Norm() / std::pow(ComputeKernelSigma2(), 2), 1. / 5.);
00958 }
00959 
00960 void TKDE::SetKernelSigma2() {
00961    // Computes the user's input kernel function sigma2
00962    fKernelSigmas2[kUserDefined] = ComputeKernelSigma2();
00963 }
00964 
00965 TKDE::KernelIntegrand::KernelIntegrand(const TKDE* kde, EIntegralResult intRes) : fKDE(kde), fIntegralResult(intRes) {
00966    // Internal class constructor
00967 }
00968 
00969 Double_t TKDE::KernelIntegrand::operator()(Double_t x) const {
00970    // Internal class unary function
00971    if (fIntegralResult == kNorm) {
00972      return std::pow((*fKDE->fKernelFunction)(x), 2);
00973    } else if (fIntegralResult == kMu) {
00974       return x * (*fKDE->fKernelFunction)(x);
00975    } else if (fIntegralResult == kSigma2) {
00976       return std::pow(x, 2) * (*fKDE->fKernelFunction)(x);
00977    } else if (fIntegralResult == kUnitIntegration) {
00978       return (*fKDE->fKernelFunction)(x);
00979    } else {
00980       return -1;
00981    }
00982 }
00983 
00984 TF1* TKDE::GetKDEFunction(UInt_t npx, Double_t xMin , Double_t xMax) {
00985    //Returns the estimated density
00986    TString name = "KDEFunc_"; name+= GetName();
00987    TString title = "KDE "; title+= GetTitle();
00988    if (xMin >= xMax) { xMin = fXMin; xMax = fXMax; }
00989    TF1 * pdf = new TF1(name.Data(), this, xMin, xMax, 0);
00990    if (npx > 0) pdf->SetNpx(npx);
00991    pdf->SetTitle(title);
00992    TF1 * f =  (TF1*)pdf->Clone();
00993    delete pdf; 
00994    return f;
00995 }
00996 
00997 TF1* TKDE::GetPDFUpperConfidenceInterval(Double_t confidenceLevel, UInt_t npx, Double_t xMin , Double_t xMax) {
00998    // Returns the upper estimated density
00999    TString name;
01000    name.Form("KDE_UpperCL%f5.3_%s",confidenceLevel,GetName());
01001    if (xMin >= xMax) { xMin = fXMin; xMax = fXMax; }
01002    TF1 * upperPDF = new TF1(name, this, &TKDE::UpperConfidenceInterval, xMin, xMax, 1, "TKDE", "UpperConfidenceInterval");
01003    upperPDF->SetParameter(0, confidenceLevel);
01004    if (npx > 0) upperPDF->SetNpx(npx);
01005    TF1 * f =  (TF1*)upperPDF->Clone();
01006    delete upperPDF;
01007    return f;
01008    }
01009 
01010 TF1* TKDE::GetPDFLowerConfidenceInterval(Double_t confidenceLevel, UInt_t npx, Double_t xMin , Double_t xMax) {
01011    // Returns the upper estimated density
01012    TString name;
01013    name.Form("KDE_LowerCL%f5.3_%s",confidenceLevel,GetName());
01014    if (xMin >= xMax) { xMin = fXMin; xMax = fXMax; }
01015    TF1 * lowerPDF = new TF1(name, this, &TKDE::LowerConfidenceInterval, xMin, xMax, 1);
01016    lowerPDF->SetParameter(0, confidenceLevel);
01017    if (npx > 0) lowerPDF->SetNpx(npx);
01018    TF1 * f = (TF1*)lowerPDF->Clone();
01019    delete lowerPDF;
01020    return f;
01021 }
01022 
01023 TF1* TKDE::GetKDEApproximateBias(UInt_t npx, Double_t xMin , Double_t xMax) {
01024    // Returns the approximate bias
01025    TString name = "KDE_Bias_"; name += GetName();
01026    if (xMin >= xMax) { xMin = fXMin; xMax = fXMax; }
01027    TF1 * approximateBias = new TF1(name, this, &TKDE::ApproximateBias, xMin, xMax, 0);
01028    if (npx > 0) approximateBias->SetNpx(npx);
01029    TF1 * f =  (TF1*)approximateBias->Clone();
01030    delete approximateBias;
01031    return f;
01032 }

Generated on Tue Jul 5 14:24:21 2011 for ROOT_528-00b_version by  doxygen 1.5.1