ROOT_528-00b_version: include/TKDE.h Source File

00001 // @(#)root/hist:$Id: TKDE.h 37543 2010-12-11 14:14:05Z moneta $
00002 // Authors: Bartolomeu Rabacal    07/2010
00003 /**********************************************************************
00004  *                                                                    *
00005  * Copyright (c) 2006 , LCG ROOT MathLib Team                         *
00006  *                                                                    *
00007  *                                                                    *
00008  **********************************************************************/
00009 // Header file for TKDE
00010 
00011 #ifndef ROOT_TKDE
00012 #define ROOT_TKDE
00013 
00014 #ifndef ROOT_Math_WrappedFunction
00015    #include "Math/WrappedFunction.h"
00016 #endif
00017 
00018 #ifndef ROOT_TNamed
00019    #include "TNamed.h"
00020 #endif
00021 
00022 #ifndef ROOT_Math_Math
00023 #include "Math/Math.h"
00024 #endif
00025 
00026 //#include "TF1.h"
00027 class TGraphErrors;
00028 class TF1;
00029 
00030 /*
00031    Kernel Density Estimation class. The three main references are (1) "Scott DW, Multivariate Density Estimation.
00032 Theory, Practice and Visualization. New York: Wiley", (2) "Jann Ben - ETH Zurich, Switzerland -, Univariate kernel density estimation document for KDENS: Stata module for univariate kernel density estimation." and (3) "Hardle W, Muller M, Sperlich S, Werwatz A, Nonparametric and Semiparametric Models. Springer."
00033    The algorithm is briefly described in (4) "Cranmer KS, Kernel Estimation in High-Energy
00034 Physics. Computer Physics Communications 136:198-207,2001" - e-Print Archive: hep ex/0011057.
00035    A binned version is also implemented to address the performance issue due to its data size dependence.
00036 */
00037 class TKDE : public TNamed  {
00038 public:
00039 
00040    enum EKernelType { // Kernel function type option
00041       kGaussian,
00042       kEpanechnikov,
00043       kBiweight,
00044       kCosineArch,
00045       kUserDefined, // Internal use only for the class's template constructor
00046       kTotalKernels // Internal use only for member initialization
00047    };
00048 
00049    enum EIteration { // KDE fitting option
00050       kAdaptive,
00051       kFixed
00052    };
00053 
00054    enum EMirror { // Data "mirroring" option to address the probability "spill out" boundary effect
00055       kNoMirror,
00056       kMirrorLeft,
00057       kMirrorRight,
00058       kMirrorBoth,
00059       kMirrorAsymLeft,
00060       kMirrorAsymLeftRight,
00061       kMirrorAsymRight,
00062       kMirrorLeftAsymRight,
00063       kMirrorAsymBoth
00064    };
00065 
00066    enum EBinning{ // Data binning option
00067       kUnbinned,
00068       kRelaxedBinning, // The algorithm is allowed to use binning if the data is large enough
00069       kForcedBinning
00070    };
00071 
00072    explicit TKDE(UInt_t events = 0, const Double_t* data = 0, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0);
00073 
00074    template<class KernelFunction>
00075    TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0)  {
00076       Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, xMin, xMax, option, rho);
00077    }
00078 
00079    virtual ~TKDE();
00080 
00081    void Fill(Double_t data);
00082    void SetKernelType(EKernelType kern);
00083    void SetIteration(EIteration iter);
00084    void SetMirror(EMirror mir);
00085    void SetBinning(EBinning);
00086    void SetNBins(UInt_t nbins);
00087    void SetUseBinsNEvents(UInt_t nEvents);
00088    void SetTuneFactor(Double_t rho);
00089    void SetRange(Double_t xMin, Double_t xMax); // By default computed from the data
00090 
00091    virtual void Draw(const Option_t* option = "");
00092 
00093    Double_t operator()(Double_t x) const;
00094    Double_t operator()(const Double_t* x, const Double_t* p=0) const;  // Needed for creating TF1
00095 
00096    Double_t GetValue(Double_t x) const { return (*this)(x); }
00097    Double_t GetError(Double_t x) const;
00098 
00099    Double_t GetBias(Double_t x) const;
00100    Double_t GetMean() const;
00101    Double_t GetSigma() const;
00102    Double_t GetRAMISE() const;
00103 
00104    Double_t GetFixedWeight() const;
00105 
00106    TF1* GetFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
00107    TF1* GetUpperFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
00108    TF1* GetLowerFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
00109    TF1* GetApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
00110    TGraphErrors * GetGraphWithErrors(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
00111 
00112    // get the drawn object to chanage settings
00113    // These objects are managed by TKDE and should not be deleted by the user
00114    TF1 * GetDrawnFunction() { return fPDF;}  
00115    TF1 * GetDrawnUpperFunction() { return fUpperPDF;}  
00116    TF1 * GetDrawnLowerFunction() { return fLowerPDF;}  
00117    TGraphErrors * GetDrawnGraph() { return fGraph;}  
00118 
00119    const Double_t * GetAdaptiveWeights() const;
00120 
00121 
00122 private:
00123 
00124    TKDE(TKDE& kde);           // Disallowed copy constructor
00125    TKDE operator=(TKDE& kde); // Disallowed assign operator
00126 
00127    typedef ROOT::Math::IBaseFunctionOneDim* KernelFunction_Ptr;
00128    KernelFunction_Ptr fKernelFunction;
00129 
00130    class TKernel;
00131    friend class TKernel;
00132 
00133    TKernel* fKernel;
00134 
00135    std::vector<Double_t> fData;   // Data events
00136    std::vector<Double_t> fEvents; // Original data storage
00137 
00138    TF1* fPDF;             // Output Kernel Density Estimation PDF function
00139    TF1* fUpperPDF;        // Output Kernel Density Estimation upper confidence interval PDF function
00140    TF1* fLowerPDF;        // Output Kernel Density Estimation lower confidence interval PDF function
00141    TF1* fApproximateBias; // Output Kernel Density Estimation approximate bias
00142    TGraphErrors* fGraph;  // Graph with the errors
00143 
00144    EKernelType fKernelType;
00145    EIteration fIteration;
00146    EMirror fMirror;
00147    EBinning fBinning;
00148 
00149    Bool_t fUseMirroring, fMirrorLeft, fMirrorRight, fAsymLeft, fAsymRight;
00150    Bool_t fUseBins;
00151    Bool_t fNewData;        // flag to control when new data are given
00152    Bool_t fUseMinMaxFromData; // flag top control if min and max must be used from data
00153 
00154    UInt_t fNBins;          // Number of bins for binned data option
00155    UInt_t fNEvents;        // Data's number of events
00156    UInt_t fUseBinsNEvents; // If the algorithm is allowed to use binning this is the minimum number of events to do so
00157 
00158    Double_t fMean;  // Data mean
00159    Double_t fSigma; // Data std deviation
00160    Double_t fSigmaRob; // Data std deviation (robust estimation)
00161    Double_t fXMin;  // Data minimum value
00162    Double_t fXMax;  // Data maximum value
00163    Double_t fRho;   // Adjustment factor for sigma
00164    Double_t fAdaptiveBandwidthFactor; // Geometric mean of the kernel density estimation from the data for adaptive iteration
00165 
00166    Double_t fWeightSize; // Caches the weight size
00167 
00168    std::vector<Double_t> fCanonicalBandwidths;
00169    std::vector<Double_t> fKernelSigmas2;
00170 
00171    std::vector<UInt_t> fBinCount; // Number of events per bin for binned data option
00172 
00173    std::vector<Bool_t> fSettedOptions; // User input options flag
00174 
00175    struct KernelIntegrand;
00176    friend struct KernelIntegrand;
00177 
00178    void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t* data,
00179                     Double_t xMin, Double_t xMax, const Option_t* option, Double_t rho);
00180 
00181    inline Double_t GaussianKernel(Double_t x) const {
00182       // Returns the kernel evaluation at x
00183       Double_t k2_PI_ROOT_INV = 0.398942280401432703; // (2 * M_PI)**-0.5
00184       return (x > -9. && x < 9.) ? k2_PI_ROOT_INV * std::exp(-.5 * x * x) : 0.0;
00185    }
00186    inline Double_t EpanechnikovKernel(Double_t x) const {
00187       return (x > -1. &&  x < 1.) ? 3. / 4. * (1. - x * x) : 0.0;
00188    }
00189    inline Double_t BiweightKernel(Double_t x) const {
00190       // Returns the kernel evaluation at x
00191       return (x > -1. &&  x < 1.) ? 15. / 16. * (1. - x * x) * (1. - x * x) : 0.0;
00192    }
00193    inline Double_t CosineArchKernel(Double_t x) const {
00194       // Returns the kernel evaluation at x
00195       return (x > -1. &&  x < 1.) ? M_PI_4 * std::cos(M_PI_2 * x) : 0.0;
00196    }
00197    Double_t UpperConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
00198    Double_t LowerConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
00199    Double_t ApproximateBias(const Double_t* x, const Double_t* ) const { return GetBias(*x); }
00200    Double_t ComputeKernelL2Norm() const;
00201    Double_t ComputeKernelSigma2() const;
00202    Double_t ComputeKernelMu() const;
00203    Double_t ComputeKernelIntegral() const;
00204    Double_t ComputeMidspread() ;
00205 
00206    UInt_t Index(Double_t x) const;
00207 
00208    void SetBinCentreData(Double_t xmin, Double_t xmax);
00209    void SetBinCountData();
00210    void CheckKernelValidity();
00211    void SetCanonicalBandwidth();
00212    void SetKernelSigma2();
00213    void SetCanonicalBandwidths();
00214    void SetKernelSigmas2();
00215    void SetHistogram();
00216    void SetUseBins();
00217    void SetMirror();
00218    void SetMean();
00219    void SetSigma(Double_t R);
00220    void SetKernel();
00221    void SetKernelFunction(KernelFunction_Ptr kernfunc = 0);
00222    void SetOptions(const Option_t* option, Double_t rho);
00223    void CheckOptions(Bool_t isUserDefinedKernel = kFALSE);
00224    void GetOptions(std::string optionType, std::string option);
00225    void AssureOptions();
00226    void SetData(const Double_t* data);
00227    void InitFromNewData();
00228    void SetMirroredEvents();
00229    void SetDrawOptions(const Option_t* option, TString& plotOpt, TString& drawOpt);
00230    void DrawErrors(TString& drawOpt);
00231    void DrawConfidenceInterval(TString& drawOpt, double cl=0.95);
00232 
00233    TF1* GetKDEFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
00234    TF1* GetKDEApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
00235    // The density to estimate should be at least twice differentiable.
00236    TF1* GetPDFUpperConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
00237    TF1* GetPDFLowerConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
00238 
00239    ClassDef(TKDE, 1) // One dimensional semi-parametric Kernel Density Estimation
00240 
00241 };
00242 
00243 #endif