GoFTest.h

Go to the documentation of this file.
00001 // @(#)root/mathcore:$Id: GoFTest.h 36911 2010-11-24 17:08:43Z moneta $
00002 // Authors: Bartolomeu Rabacal    05/2010 
00003 /**********************************************************************
00004  *                                                                    *
00005  * Copyright (c) 2006 , LCG ROOT MathLib Team                         *
00006  *                                                                    *
00007  *                                                                    *
00008  **********************************************************************/
00009 // Header file for GoFTest
00010 
00011 #include <memory>
00012 
00013 #ifndef ROOT_Math_GoFTest
00014 #define ROOT_Math_GoFTest
00015 
00016 #ifndef ROOT_Math_WrappedFunction
00017 #include "Math/WrappedFunction.h"
00018 #endif
00019 #ifndef ROOT_TMath
00020 #include "TMath.h"
00021 #endif
00022 
00023 /*
00024   Goodness of Fit Statistical Tests Toolkit -- Anderson-Darling and Kolmogorov-Smirnov 1- and 2-Samples Tests
00025 */
00026                    
00027 namespace ROOT {
00028 namespace Math {
00029 
00030 class GoFTest {
00031 public:
00032 
00033    enum EDistribution { // H0 distributions for using only with 1-sample tests
00034       kUndefined,       // Default value for non templated 1-sample test. Set with SetDistribution
00035       kUserDefined,     // For internal use only within the class's template constructor
00036       kGaussian,  
00037       kLogNormal,
00038       kExponential
00039    };
00040    
00041    enum EUserDistribution { // User input distribution option
00042       kCDF,
00043       kPDF                  // Default value
00044    };
00045    
00046    enum ETestType { // Goodness of Fit test types for using with the class's unary funtions as a shorthand for the in-built methods
00047       kAD,   // Anderson-Darling Test. Default value
00048       kAD2s, // Anderson-Darling 2-Samples Test
00049       kKS,   // Kolmogorov-Smirnov Test
00050       kKS2s  // Kolmogorov-Smirnov 2-Samples Test
00051    };
00052    
00053    /* Constructor for using only with 2-samples tests */
00054    GoFTest(UInt_t sample1Size, const Double_t* sample1, UInt_t sample2Size, const Double_t* sample2);
00055   
00056    /* Constructor for using only with 1-sample tests with a specified distribution */
00057    GoFTest(UInt_t sampleSize, const Double_t* sample, EDistribution dist = kUndefined);
00058   
00059    /* Templated constructor for using only with 1-sample tests with a user specified distribution */  
00060    template<class Dist>
00061    GoFTest(UInt_t sampleSize, const Double_t* sample, Dist& dist, EUserDistribution userDist = kPDF,
00062            Double_t xmin = 1, Double_t xmax = 0) 
00063    {
00064       Instantiate(sample, sampleSize);
00065       SetUserDistribution<Dist>(dist, userDist, xmin, xmax);
00066    }
00067 
00068    /* Specialization using IGenFunction interface */
00069    GoFTest(UInt_t sampleSize, const Double_t* sample, const IGenFunction& dist, EUserDistribution userDist = kPDF, 
00070            Double_t xmin = 1, Double_t xmax = 0) 
00071    {
00072       Instantiate(sample, sampleSize);
00073       SetUserDistribution(dist, userDist, xmin, xmax);
00074    }
00075 
00076    /* Sets the user input distribution function for 1-sample tests. */
00077    template<class Dist>
00078    void SetUserDistribution(Dist& dist, EUserDistribution userDist = kPDF, Double_t xmin = 1, Double_t xmax = 0) {
00079       WrappedFunction<Dist&> wdist(dist); 
00080       SetDistributionFunction(wdist, userDist, xmin, xmax);
00081    }
00082 
00083    /* Template specialization to set the user input distribution for 1-sample tests */
00084    void SetUserDistribution(const IGenFunction& dist, GoFTest::EUserDistribution userDist = kPDF, Double_t xmin = 1, Double_t xmax = 0) {
00085       SetDistributionFunction(dist, userDist, xmin, xmax); 
00086    }
00087    
00088    /* Sets the user input distribution as a probability density function for 1-sample tests */
00089    template<class Dist>
00090    void SetUserPDF(Dist& pdf, Double_t xmin = 1, Double_t xmax = 0) {
00091       SetUserDistribution<Dist>(pdf, kPDF, xmin, xmax);
00092    }
00093 
00094    /* Template specialization to set the user input distribution as a probability density function for 1-sample tests */
00095    void SetUserPDF(const IGenFunction& pdf, Double_t xmin = 1, Double_t xmax = 0) {
00096       SetUserDistribution(pdf, kPDF, xmin, xmax);
00097    }
00098 
00099    /* Sets the user input distribution as a cumulative distribution function for 1-sample tests 
00100       The CDF must return zero 
00101     */
00102    template<class Dist>
00103    void SetUserCDF(Dist& cdf, Double_t xmin = 1, Double_t xmax = 0) {
00104       SetUserDistribution<Dist>(cdf, kCDF, xmin, xmax);
00105    }
00106 
00107    /* Template specialization to set the user input distribution as a cumulative distribution function for 1-sample tests */
00108    void SetUserCDF(const IGenFunction& cdf, Double_t xmin = 1, Double_t xmax = 0)  {
00109       SetUserDistribution(cdf, kCDF, xmin, xmax);
00110    }
00111 
00112    
00113    /* Sets the distribution for the predefined distribution types  */
00114    void SetDistribution(EDistribution dist);
00115 
00116    
00117    virtual ~GoFTest();
00118 
00119 /*
00120   The Anderson-Darling K-Sample Test algorithm is described and taken from 
00121   http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andeksam.htm
00122   and described and taken from (1)
00123   Scholz F.W., Stephens M.A. (1987), K-sample Anderson-Darling Tests, Journal of the American Statistical Association, 82, 918–924. (2-samples variant implemented)
00124 */ void AndersonDarling2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
00125    Double_t AndersonDarling2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
00126 
00127 /*
00128   The Anderson-Darling 1-Sample Test algorithm for a specific distribution is described at 
00129   http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andedarl.htm
00130   and described and taken from (2)
00131   Marsaglia J.C.W., Marsaglia G. (2004), Evaluating the Anderson-Darling Distribution, Journal of Statistical Software, Volume 09, Issue i02.
00132   and described and taken from (3)
00133   Lewis P.A.W. (1961), The Annals of Mathematical Statistics, Distribution of the Anderson-Darling Statistic, Volume 32, Number 4, 1118-1124. 
00134 */ void AndersonDarlingTest(Double_t& pvalue, Double_t& testStat) const;
00135    Double_t AndersonDarlingTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
00136 
00137 /*
00138   The Kolmogorov-Smirnov 2-Samples Test algorithm is described at
00139   http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ks2samp.htm
00140   and described and taken from
00141   http://root.cern.ch/root/html/TMath.html#TMath:KolmogorovTest
00142 */ void KolmogorovSmirnov2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
00143    Double_t KolmogorovSmirnov2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
00144 
00145 /*
00146   The Kolmogorov-Smirnov 1-Sample Test algorithm for a specific distribution is described at
00147   http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/kstest.htm
00148   and described and taken from (4)
00149   Press W. H., Teukolsky S.A., Vetterling W.T., Flannery B.P. (2007), Numerical Recipes - The Art of Scientific Computing (Third Edition), Cambridge Univerdity Press
00150 */ void KolmogorovSmirnovTest(Double_t& pvalue, Double_t& testStat) const;
00151    Double_t KolmogorovSmirnovTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
00152 
00153    // The class's unary functions
00154    void operator()(ETestType test, Double_t& pvalue, Double_t& testStat) const;
00155    Double_t operator()(ETestType test = kAD, const Char_t* option = "p") const; // Returns default Anderson Darling 1-Sample Test and default p-value; option "t" returns the test statistic value specific to the test type
00156 
00157 private:
00158   
00159    GoFTest();                       // Disallowed default constructor
00160    GoFTest(GoFTest& gof);           // Disallowed copy constructor
00161    GoFTest operator=(GoFTest& gof); // Disallowed assign operator
00162 
00163    std::auto_ptr<IGenFunction> fCDF;
00164 
00165   
00166    EDistribution fDist;
00167   
00168    Double_t fMean;
00169    Double_t fSigma;
00170 
00171    std::vector<Double_t> fCombinedSamples;
00172   
00173    std::vector<std::vector<Double_t> > fSamples;
00174   
00175    Bool_t fTestSampleFromH0;
00176    
00177    void SetCDF();
00178    void SetDistributionFunction(const IGenFunction& cdf, Bool_t isPDF, Double_t xmin, Double_t xmax);
00179   
00180    void Instantiate(const Double_t* sample, UInt_t sampleSize);
00181     
00182   
00183    Double_t LogNormalCDF(Double_t x) const;
00184    Double_t GaussianCDF(Double_t x) const;
00185    Double_t ExponentialCDF(Double_t x) const;
00186   
00187    Double_t GetSigmaN(UInt_t N) const; // Computation of sigma_N as described in (1) 
00188   
00189    Double_t InterpolatePValues(Double_t dA2, Int_t bin) const; // Linear interpolation used in GoFTest::PValueAD2Samples
00190   
00191    Double_t PValueAD2Samples(Double_t& A2, UInt_t N) const; // Computation of the 2-Sample Anderson-Darling Test's p-value as described in (1)
00192   
00193    Double_t PValueAD1Sample(Double_t A2) const; // Computation of the 1-Sample Anderson-Darling Test's p-value 
00194     
00195    void LogSample(); // Applies the logarithm to the sample when the specified distribution to test is LogNormal
00196     
00197    void SetSamples(std::vector<const Double_t*> samples, const std::vector<UInt_t> samplesSizes);
00198   
00199    void SetParameters(); // Sets the estimated mean and standard-deviation from the samples 
00200 }; // end GoFTest class
00201 
00202 
00203 } // ROOT namespace
00204 } // Math namespace
00205 #endif

Generated on Tue Jul 5 14:25:25 2011 for ROOT_528-00b_version by  doxygen 1.5.1