ROOT_528-00b_version: math/mathcore/src/GoFTest.cxx Source File

00001 // @(#)root/mathcore:$Id: GoFTest.cxx 36911 2010-11-24 17:08:43Z moneta $
00002 // Authors: Bartolomeu Rabacal    05/2010 
00003 /**********************************************************************
00004  *                                                                    *
00005  * Copyright (c) 2006 , LCG ROOT MathLib Team                         *
00006  *                                                                    *
00007  *                                                                    *
00008  **********************************************************************/
00009 // implementation file for GoFTest
00010 
00011 
00012 #include <algorithm>
00013 #include <functional>
00014 #include <iostream>
00015 #include <map>
00016 #include <numeric>
00017 #include <string.h>
00018 #include <cassert>
00019 
00020 #include "Math/Error.h"
00021 #include "Math/Integrator.h"
00022 #include "Math/ProbFuncMathCore.h"
00023 
00024 #include "Math/GoFTest.h"
00025 
00026 
00027 /* Note: The references mentioned here are stated in GoFTest.h */
00028 
00029 namespace ROOT {
00030 namespace Math {
00031    
00032    struct CDFWrapper : public IGenFunction { 
00033       // wrapper around a cdf funciton to re-scale for the range
00034       Double_t fXmin; // lower range for x 
00035       Double_t fXmax; // lower range for x 
00036       Double_t fNorm; // normalization
00037       const IGenFunction* fCDF; // cdf pointer (owned by the class) 
00038 
00039       
00040       virtual ~CDFWrapper() { if (fCDF) delete fCDF; }
00041 
00042       CDFWrapper(const IGenFunction& cdf, Double_t xmin=0, Double_t xmax=-1) : 
00043          fCDF(cdf.Clone())
00044       {
00045          if (xmin >= xmax) { 
00046             fNorm = 1; 
00047             fXmin = -std::numeric_limits<double>::infinity();
00048             fXmax = std::numeric_limits<double>::infinity();
00049          }
00050          else {
00051             fNorm = cdf(xmax) - cdf(xmin); 
00052             fXmin = xmin; 
00053             fXmax = xmax; 
00054          }
00055       }
00056 
00057       Double_t DoEval(Double_t x) const {
00058          if (x <= fXmin) return 0; 
00059          if (x >= fXmax) return 1.0; 
00060          return (*fCDF)(x)/fNorm;
00061       }
00062       
00063       IGenFunction* Clone() const {
00064          return new CDFWrapper(*fCDF,fXmin,fXmax);
00065       }
00066    };
00067 
00068 
00069    class PDFIntegral : public IGenFunction { 
00070       Double_t fXmin; // lower range for x 
00071       Double_t fXmax; // lower range for x 
00072       Double_t fNorm; // normalization
00073       mutable IntegratorOneDim fIntegral;
00074       const IGenFunction* fPDF; // pdf pointer (owned by the class) 
00075    public:
00076 
00077       virtual ~PDFIntegral() { if (fPDF) delete fPDF; }
00078 
00079       PDFIntegral(const IGenFunction& pdf, Double_t xmin = 0, Double_t xmax = -1) : 
00080          fXmin(xmin), 
00081          fXmax(xmax),
00082          fNorm(1),
00083          fPDF(pdf.Clone())
00084       {
00085          // compute normalization
00086          fIntegral.SetFunction(*fPDF);  // N.B. must be fPDF (the cloned copy) and not pdf which can disappear
00087          if (fXmin >= fXmax) {  
00088             fXmin = -std::numeric_limits<double>::infinity();
00089             fXmax = std::numeric_limits<double>::infinity();
00090          }
00091          if (fXmin == -std::numeric_limits<double>::infinity() && fXmax == std::numeric_limits<double>::infinity() ) { 
00092             fNorm = fIntegral.Integral();
00093          }
00094          else if (fXmin == -std::numeric_limits<double>::infinity() )
00095             fNorm = fIntegral.IntegralLow(fXmax);
00096          else if (fXmax == std::numeric_limits<double>::infinity() )
00097             fNorm = fIntegral.IntegralUp(fXmin);
00098          else
00099             fNorm = fIntegral.Integral(fXmin, fXmax);
00100       }
00101             
00102       Double_t DoEval(Double_t x) const {
00103          if (x <= fXmin) return 0; 
00104          if (x >= fXmax) return 1.0; 
00105          if (fXmin == -std::numeric_limits<double>::infinity() )
00106             return fIntegral.IntegralLow(x)/fNorm;
00107          else 
00108             return fIntegral.Integral(fXmin,x)/fNorm;
00109       }
00110       
00111       IGenFunction* Clone() const {
00112          return new PDFIntegral(*fPDF, fXmin, fXmax);
00113       }
00114    };
00115 
00116    void GoFTest::SetDistribution(EDistribution dist) {
00117       if (!(kGaussian <= dist && dist <= kExponential)) {
00118          MATH_ERROR_MSG("SetDistribution", "Cannot set distribution type! Distribution type option must be ennabled.");
00119          return;
00120       }
00121       fDist = dist;
00122       SetCDF();
00123    }
00124   
00125    GoFTest::GoFTest( UInt_t sample1Size, const Double_t* sample1, UInt_t sample2Size, const Double_t* sample2 )
00126    : fCDF(std::auto_ptr<IGenFunction>((IGenFunction*)0)), 
00127      fDist(kUndefined), 
00128      fSamples(std::vector<std::vector<Double_t> >(2)), 
00129      fTestSampleFromH0(kFALSE) {
00130       Bool_t badSampleArg = sample1 == 0 || sample1Size == 0;
00131       if (badSampleArg) { 
00132          std::string msg = "'sample1";
00133          msg += !sample1Size ? "Size' cannot be zero" : "' cannot be zero-length";
00134          MATH_ERROR_MSG("GoFTest", msg.c_str());
00135          assert(!badSampleArg);
00136       }
00137       badSampleArg = sample2 == 0 || sample2Size == 0;
00138       if (badSampleArg) { 
00139          std::string msg = "'sample2";         
00140          msg += !sample2Size ? "Size' cannot be zero" : "' cannot be zero-length";
00141          MATH_ERROR_MSG("GoFTest", msg.c_str());
00142          assert(!badSampleArg);
00143       }
00144       std::vector<const Double_t*> samples(2);
00145       std::vector<UInt_t> samplesSizes(2);
00146       samples[0] = sample1;
00147       samples[1] = sample2;
00148       samplesSizes[0] = sample1Size;
00149       samplesSizes[1] = sample2Size;
00150       SetSamples(samples, samplesSizes);
00151       SetParameters();
00152    }
00153 
00154    GoFTest::GoFTest(UInt_t sampleSize, const Double_t* sample, EDistribution dist)   
00155    : fCDF(std::auto_ptr<IGenFunction>((IGenFunction*)0)), 
00156      fDist(dist), 
00157      fSamples(std::vector<std::vector<Double_t> >(1)), 
00158      fTestSampleFromH0(kTRUE) {
00159       Bool_t badSampleArg = sample == 0 || sampleSize == 0;
00160       if (badSampleArg) { 
00161          std::string msg = "'sample";
00162          msg += !sampleSize ? "Size' cannot be zero" : "' cannot be zero-length";
00163          MATH_ERROR_MSG("GoFTest", msg.c_str());
00164          assert(!badSampleArg);
00165       }
00166       std::vector<const Double_t*> samples(1, sample);
00167       std::vector<UInt_t> samplesSizes(1, sampleSize);
00168       SetSamples(samples, samplesSizes);
00169       SetParameters();
00170       SetCDF();
00171    }
00172 
00173    GoFTest::~GoFTest() {}
00174 
00175    void GoFTest::SetSamples(std::vector<const Double_t*> samples, const std::vector<UInt_t> samplesSizes) {
00176       fCombinedSamples.assign(std::accumulate(samplesSizes.begin(), samplesSizes.end(), 0), 0.0);
00177       UInt_t combinedSamplesSize = 0;
00178       for (UInt_t i = 0; i < samples.size(); ++i) {
00179          fSamples[i].assign(samples[i], samples[i] + samplesSizes[i]);
00180          std::sort(fSamples[i].begin(), fSamples[i].end());
00181          for (UInt_t j = 0; j < samplesSizes[i]; ++j) {
00182             fCombinedSamples[combinedSamplesSize + j] = samples[i][j];
00183          }
00184          combinedSamplesSize += samplesSizes[i];
00185       }
00186       std::sort(fCombinedSamples.begin(), fCombinedSamples.end());
00187    
00188       Bool_t degenerateSamples = *(fCombinedSamples.begin()) == *(fCombinedSamples.end() - 1);
00189       if (degenerateSamples) { 
00190          std::string msg = "Degenerate sample";
00191          msg += samplesSizes.size() > 1 ? "s!" : "!";
00192          msg += " Sampling values all identical.";
00193          MATH_ERROR_MSG("SetSamples", msg.c_str());
00194          assert(!degenerateSamples);
00195       }
00196    }
00197 
00198    void GoFTest::SetParameters() {
00199       fMean = std::accumulate(fSamples[0].begin(), fSamples[0].end(), 0.0) / fSamples[0].size();
00200       fSigma = TMath::Sqrt(1. / (fSamples[0].size() - 1) * (std::inner_product(fSamples[0].begin(), fSamples[0].end(),     fSamples[0].begin(), 0.0) - fSamples[0].size() * TMath::Power(fMean, 2)));
00201    }
00202    
00203    void GoFTest::operator()(ETestType test, Double_t& pvalue, Double_t& testStat) const {
00204       switch (test) {
00205          default:
00206          case kAD:
00207             AndersonDarlingTest(pvalue, testStat);
00208             break;
00209          case kAD2s:
00210             AndersonDarling2SamplesTest(pvalue, testStat);
00211             break;
00212          case kKS:
00213             KolmogorovSmirnovTest(pvalue, testStat);
00214             break;
00215          case kKS2s:
00216             KolmogorovSmirnov2SamplesTest(pvalue, testStat);
00217       }
00218    }
00219    
00220    Double_t GoFTest::operator()(ETestType test, const Char_t* option) const {
00221       Double_t result = 0.0;
00222       switch (test) {
00223          default:
00224          case kAD:
00225             result = AndersonDarlingTest(option);
00226             break;
00227          case kAD2s:
00228             result = AndersonDarling2SamplesTest(option);
00229             break;
00230          case kKS:
00231             result = KolmogorovSmirnovTest(option);
00232             break;
00233          case kKS2s:
00234             result = KolmogorovSmirnov2SamplesTest(option);
00235       }
00236       return result;
00237    }
00238 
00239    void GoFTest::SetCDF() { // Setting parameter-free distributions 
00240       IGenFunction* cdf = 0;
00241       switch (fDist) {
00242       case kLogNormal:
00243          LogSample();
00244       case kGaussian :
00245          cdf = new ROOT::Math::WrappedMemFunction<GoFTest, Double_t (GoFTest::*)(Double_t) const>(*this, &GoFTest::GaussianCDF);
00246          break;
00247       case kExponential:
00248          cdf = new ROOT::Math::WrappedMemFunction<GoFTest, Double_t (GoFTest::*)(Double_t) const>(*this, &GoFTest::ExponentialCDF);
00249          break;
00250       case kUserDefined:
00251       case kUndefined:
00252       default:
00253          break;   
00254       }
00255       fCDF = std::auto_ptr<IGenFunction>(cdf);
00256    }
00257    
00258    void GoFTest::SetDistributionFunction(const IGenFunction& f, Bool_t isPDF, Double_t xmin, Double_t xmax) {
00259       if (fDist > kUserDefined) {
00260          MATH_WARN_MSG("SetDistributionFunction","Distribution type is changed to user defined");
00261       }
00262       fDist = kUserDefined; 
00263       // function will be cloned inside the wrapper PDFIntegral of CDFWrapper classes
00264       if (isPDF) 
00265          fCDF = std::auto_ptr<IGenFunction>(new PDFIntegral(f, xmin, xmax) ); 
00266       else 
00267          fCDF = std::auto_ptr<IGenFunction>(new CDFWrapper(f, xmin, xmax) ); 
00268    }
00269 
00270    void GoFTest::Instantiate(const Double_t* sample, UInt_t sampleSize) {
00271       // initialization function for the template constructors
00272       Bool_t badSampleArg = sample == 0 || sampleSize == 0;
00273       if (badSampleArg) { 
00274          std::string msg = "'sample";
00275          msg += !sampleSize ? "Size' cannot be zero" : "' cannot be zero-length";
00276          MATH_ERROR_MSG("GoFTest", msg.c_str());
00277          assert(!badSampleArg);
00278       }
00279       fCDF = std::auto_ptr<IGenFunction>((IGenFunction*)0);
00280       fDist = kUserDefined;
00281       fMean = 0;
00282       fSigma = 0; 
00283       fSamples = std::vector<std::vector<Double_t> >(1);
00284       fTestSampleFromH0 = kTRUE;
00285       SetSamples(std::vector<const Double_t*>(1, sample), std::vector<UInt_t>(1, sampleSize));
00286    }
00287    
00288    Double_t GoFTest::GaussianCDF(Double_t x) const {
00289       return ROOT::Math::normal_cdf(x, fSigma, fMean);
00290    }
00291 
00292    Double_t GoFTest::ExponentialCDF(Double_t x) const {
00293       return ROOT::Math::exponential_cdf(x, 1.0 / fMean);
00294    }
00295 
00296    void GoFTest::LogSample() {
00297       transform(fSamples[0].begin(), fSamples[0].end(), fSamples[0].begin(), std::ptr_fun<Double_t, Double_t>(TMath::Log));
00298       SetParameters();
00299    }
00300 
00301 /*
00302   Taken from (1)
00303 */ Double_t GoFTest::GetSigmaN(UInt_t N) const {
00304       Double_t sigmaN = 0.0, h = 0.0, H = 0.0, g = 0.0, a, b, c, d, k = fSamples.size();
00305       for (UInt_t i = 0; i < k; ++i) {
00306          H += 1.0 / fSamples[i].size();
00307       }
00308       for (UInt_t i = 1; i <= N - 1; ++i) {
00309          h += 1.0 / i;
00310       }
00311       for (UInt_t i = 1; i <= N - 2; ++i) {
00312          for (UInt_t j = i + 1; j <= N - 1; ++j) {
00313             g += 1.0 / ((N - i) * j);
00314          }
00315       }
00316       a = (4 * g - 6) * k + (10 - 6 * g) * H - 4 * g + 6;
00317       b = (2 * g - 4) * TMath::Power(k, 2) + 8 * h * k + (2 * g - 14 * h - 4) * H - 8 * h + 4 * g - 6;
00318       c = (6 * h + 2 * g - 2) * TMath::Power(k, 2) + (4 * h - 4 *g + 6) * k + (2 * h - 6) * H + 4 * h;
00319       d = (2 * h + 6) * TMath::Power(k, 2) - 4 * h * k;
00320       sigmaN +=  a * TMath::Power(N, 3) + b * TMath::Power(N, 2) + c * N + d;
00321       sigmaN /= (N - 1) * (N - 2) * (N - 3);
00322       sigmaN = TMath::Sqrt(sigmaN);
00323       return sigmaN;
00324    }
00325 
00326    Double_t GoFTest::InterpolatePValues(Double_t dA2, Int_t bin) const {
00327       static const Double_t pvalue[450] = { // The p-value table for the 2-sample Anderson-Darling Anderson-Darling test statistic's asymtotic distribution
00328          1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9999, 0.9996, 0.9987, 0.9968, 0.9936,
00329          0.9900, 0.9836, 0.9747, 0.9638, 0.9505, 0.9363, 0.9182, 0.9003, 0.8802, 0.8608,
00330          0.8437, 0.8251, 0.8033, 0.7839, 0.7643, 0.7452, 0.7273, 0.7081, 0.6900, 0.6704,
00331          0.6544, 0.6370, 0.6196, 0.6021, 0.5845, 0.5677, 0.5518, 0.5357, 0.5219, 0.5083,
00332          0.4917, 0.4779, 0.4632, 0.4507, 0.4405, 0.4263, 0.4131, 0.4010, 0.3892, 0.3780,
00333          0.3680, 0.3563, 0.3467, 0.3359, 0.3277, 0.3191, 0.3096, 0.3019, 0.2937, 0.2858,
00334          0.2793, 0.2720, 0.2642, 0.2564, 0.2490, 0.2415, 0.2348, 0.2288, 0.2219, 0.2176,
00335          0.2126, 0.2068, 0.2020, 0.1963, 0.1907, 0.1856, 0.1803, 0.1752, 0.1708, 0.1659,
00336          0.1615, 0.1569, 0.1515, 0.1463, 0.1431, 0.1405, 0.1377, 0.1338, 0.1303, 0.1278,
00337          0.1250, 0.1218, 0.1190, 0.1161, 0.1138, 0.1112, 0.1082, 0.1055, 0.1033, 0.1007,
00338          0.0981, 0.0951, 0.0930, 0.0907, 0.0888, 0.0865, 0.0844, 0.0829, 0.0812, 0.0795,
00339          0.0774, 0.0754, 0.0739, 0.0723, 0.0709, 0.0692, 0.0673, 0.0658, 0.0645, 0.0626,
00340          0.0614, 0.0601, 0.0591, 0.0578, 0.0566, 0.0551, 0.0541, 0.0531, 0.0516, 0.0506,
00341          0.0494, 0.0486, 0.0473, 0.0457, 0.0448, 0.0439, 0.0428, 0.0417, 0.0408, 0.0392,
00342          0.0385, 0.0376, 0.0364, 0.0352, 0.0347, 0.0338, 0.0334, 0.0330, 0.0324, 0.0318,
00343          0.0310, 0.0304, 0.0300, 0.0293, 0.0290, 0.0285, 0.0280, 0.0273, 0.0268, 0.0259,
00344          0.0256, 0.0250, 0.0242, 0.0236, 0.0225, 0.0224, 0.0219, 0.0214, 0.0211, 0.0203,
00345          0.0196, 0.0193, 0.0188, 0.0182, 0.0179, 0.0175, 0.0172, 0.0168, 0.0164, 0.0160,
00346          0.0154, 0.0151, 0.0144, 0.0140, 0.0137, 0.0137, 0.0133, 0.0130, 0.0128, 0.0126,
00347          0.0126, 0.0125, 0.0123, 0.0122, 0.0120, 0.0120, 0.0117, 0.0114, 0.0112, 0.0111,
00348          0.0109, 0.0107, 0.0106, 0.0106, 0.0105, 0.0105, 0.0102, 0.0100, 0.0097, 0.0096,
00349          0.0095, 0.0092, 0.0089, 0.0087, 0.0084, 0.0082, 0.0079, 0.0078, 0.0077, 0.0074,
00350          0.0073, 0.0073, 0.0069, 0.0069, 0.0067, 0.0066, 0.0065, 0.0064, 0.0063, 0.0062,
00351          0.0060, 0.0060, 0.0057, 0.0057, 0.0056, 0.0054, 0.0054, 0.0053, 0.0052, 0.0050,
00352          0.0050, 0.0049, 0.0048, 0.0046, 0.0042, 0.0041, 0.0039, 0.0038, 0.0036, 0.0036,
00353          0.0036, 0.0036, 0.0033, 0.0032, 0.0031, 0.0030, 0.0029, 0.0028, 0.0027, 0.0027,
00354          0.0027, 0.0027, 0.0027, 0.0027, 0.0027, 0.0027, 0.0027, 0.0027, 0.0027, 0.0027,
00355          0.0027, 0.0026, 0.0026, 0.0025, 0.0025, 0.0024, 0.0024, 0.0024, 0.0023, 0.0023,
00356          0.0022, 0.0022, 0.0022, 0.0022, 0.0022, 0.0022, 0.0022, 0.0021, 0.0021, 0.0021,
00357          0.0021, 0.0017, 0.0016, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015, 0.0015,
00358          0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0013, 0.0011, 0.0011, 0.0011, 0.0011,
00359          0.0011, 0.0011, 0.0011, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0009, 0.0009,
00360          0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009,
00361          0.0009, 0.0008, 0.0008, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0006,
00362          0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0005, 0.0004, 0.0004,
00363          0.0004, 0.0004, 0.0004, 0.0004, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002,
00364          0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,
00365          0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,
00366          0.0002, 0.0002, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001,
00367          0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001,
00368          0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001,
00369          0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001,
00370          0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001,
00371          0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001,
00372          0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0000
00373       };
00374       Double_t pvl, pvr;
00375       if (dA2 >= 0.0) {
00376          pvl = pvalue[bin];
00377          pvr = pvalue[bin - 1];
00378       } else {
00379          dA2 *= -1;
00380          pvl = pvalue[bin + 1];
00381          pvr = pvalue[bin];
00382       }
00383       return pvl + dA2 * (pvr - pvl);
00384    }
00385 
00386    Double_t GoFTest::PValueAD2Samples(Double_t& A2, UInt_t N) const {
00387       Double_t pvalue, dA2, W2 = A2, sigmaN = GetSigmaN(N);
00388       A2 -= fSamples.size() - 1;
00389       A2 /= sigmaN; // standartized test statistic
00390       if (W2 >= 8.0)
00391          return 0.0;
00392       else if (W2 <= 0.0)
00393          return 1.0;
00394       if (A2 <= 0.0) A2 = W2;
00395       Int_t bin = Int_t(50 * A2);
00396       dA2 = Double_t(bin) / 50 + 0.01 - A2; // Difference between the bin center and A2 
00397       pvalue = InterpolatePValues(dA2, bin);
00398       return pvalue;
00399    }
00400 
00401 /*
00402   Taken from (2)
00403 */ Double_t GoFTest::PValueAD1Sample(Double_t A2) const {
00404       Double_t pvalue = 0.0;
00405       if (A2 <= 0.0) {
00406          return pvalue;
00407       } else if (A2 < 2.) {
00408          pvalue = std::pow(A2, -0.5) * std::exp(-1.2337141 / A2) * (2.00012 + (0.247105 - (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * A2) * A2) * A2) * A2) * A2);
00409       } else {
00410          pvalue = std::exp(-1. * std::exp(1.0776 - (2.30695 - (0.43424 - (.082433 - (0.008056 - 0.0003146 * A2) * A2) * A2) * A2) * A2));
00411       }   
00412       return 1. - pvalue;
00413    }
00414 
00415 /*
00416   Taken from (1) -- Named for 2 samples but implemented for K. Restricted to K = 2 by the class's constructors 
00417 */ void GoFTest::AndersonDarling2SamplesTest(Double_t& pvalue, Double_t& testStat) const {
00418       pvalue = -1;
00419       testStat = -1;
00420       if (fTestSampleFromH0) {
00421          MATH_ERROR_MSG("AndersonDarling2SamplesTest", "Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
00422          return;
00423       }
00424       std::vector<Double_t> z(fCombinedSamples); 
00425       std::vector<Double_t>::iterator endUnique = std::unique(z.begin(), z.end()); //z_j's in (1)
00426       std::vector<UInt_t> h; // h_j's in (1)
00427       std::vector<Double_t> H; // H_j's in (1)
00428       UInt_t N = fCombinedSamples.size();
00429       for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
00430          UInt_t n = std::count(fCombinedSamples.begin(), fCombinedSamples.end(), *data);
00431          h.push_back(n);
00432          H.push_back(std::count_if(fCombinedSamples.begin(), fCombinedSamples.end(), bind2nd(std::less<Double_t>(), *data)) + n / 2.);
00433       }
00434       std::vector<std::vector<Double_t> > F(fSamples.size()); // F_ij's in (1)
00435       for (UInt_t i = 0; i < fSamples.size(); ++i) {
00436          for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
00437             UInt_t n = std::count(fSamples[i].begin(), fSamples[i].end(), *data);
00438             F[i].push_back(std::count_if(fSamples[i].begin(), fSamples[i].end(), bind2nd(std::less<Double_t>(), *data)) + n / 2.);
00439          }
00440       }
00441       Double_t A2 = 0.0; // Anderson-Darling A^2 Test Statistic
00442       for (UInt_t i = 0; i < fSamples.size(); ++i) {
00443          Double_t sum_result = 0.0;
00444          UInt_t j = 0;
00445          for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
00446             sum_result += h[j] *  TMath::Power(N * F[i][j]- fSamples[i].size() * H[j], 2) / (H[j] * (N - H[j]) - N * h[j] / 4.0); 
00447             ++j;
00448          }
00449          A2 += 1.0 / fSamples[i].size() * sum_result;
00450       }
00451       A2 *= (N - 1) / (TMath::Power(N, 2)); // A2_akN in (1)
00452       pvalue = PValueAD2Samples(A2, N); // standartized A2
00453       testStat = A2;
00454    }
00455    
00456    Double_t GoFTest::AndersonDarling2SamplesTest(const Char_t* option) const {
00457       Double_t pvalue, testStat;
00458       AndersonDarling2SamplesTest(pvalue, testStat);
00459       return (strncmp(option, "p", 1) == 0 || strncmp(option, "t", 1) != 0) ? pvalue : testStat;   
00460    }
00461    
00462 /*
00463   Taken from (3)
00464 */ void GoFTest::AndersonDarlingTest(Double_t& pvalue, Double_t& testStat) const {
00465       pvalue = -1;
00466       testStat = -1;
00467       if (!fTestSampleFromH0) {
00468          MATH_ERROR_MSG("AndersonDarlingTest", "Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
00469          return;
00470       } 
00471       if (fDist == kUndefined) {
00472          MATH_ERROR_MSG("AndersonDarlingTest", "Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
00473          return;
00474       }
00475       Double_t A2 = 0.0;
00476       Int_t n = fSamples[0].size();
00477       for (Int_t i = 0; i < n ; ++i) {
00478          Double_t x1 = fSamples[0][i];
00479          Double_t w1 = (*fCDF)(x1);
00480          Double_t result = (2 * (i + 1) - 1) * TMath::Log(w1) + (2 * (n - (i + 1)) + 1) * TMath::Log(1 - w1);
00481          A2 += result; 
00482       }
00483       (A2 /= -n) -= n;
00484       if (A2 != A2) {
00485          MATH_ERROR_MSG("AndersonDarlingTest", "Cannot compute p-value: data below or above the distribution's thresholds. Check sample consistency.");
00486          return;
00487       }
00488       pvalue = PValueAD1Sample(A2);
00489       testStat = A2;
00490    }
00491    
00492    Double_t GoFTest::AndersonDarlingTest(const Char_t* option) const {
00493       Double_t pvalue, testStat;
00494       AndersonDarlingTest(pvalue, testStat);
00495       return (strncmp(option, "p", 1) == 0 || strncmp(option, "t", 1) != 0) ? pvalue : testStat;
00496    }
00497 
00498    void GoFTest::KolmogorovSmirnov2SamplesTest(Double_t& pvalue, Double_t& testStat) const {
00499       pvalue = -1;
00500       testStat = -1;
00501       if (fTestSampleFromH0) {
00502          MATH_ERROR_MSG("KolmogorovSmirnov2SamplesTest", "Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
00503          return;
00504       }
00505       const UInt_t na = fSamples[0].size();
00506       const UInt_t nb = fSamples[1].size();
00507       Double_t* a = new Double_t[na];
00508       Double_t* b = new Double_t[nb]; 
00509       std::copy(fSamples[0].begin(), fSamples[0].end(), a);
00510       std::copy(fSamples[1].begin(), fSamples[1].end(), b);
00511       pvalue = TMath::KolmogorovTest(na, a, nb, b, 0);
00512       testStat = TMath::KolmogorovTest(na, a, nb, b, "M");
00513    }
00514    
00515    Double_t GoFTest::KolmogorovSmirnov2SamplesTest(const Char_t* option) const {
00516       Double_t pvalue, testStat;
00517       KolmogorovSmirnov2SamplesTest(pvalue, testStat);
00518       return (strncmp(option, "p", 1) == 0 || strncmp(option, "t", 1) != 0) ? pvalue : testStat;    
00519    }
00520 
00521 /* 
00522    Algorithm taken from (3) in page 737
00523 */ void GoFTest::KolmogorovSmirnovTest(Double_t& pvalue, Double_t& testStat) const {
00524       pvalue = -1;
00525       testStat = -1;
00526       if (!fTestSampleFromH0) {
00527          MATH_ERROR_MSG("KolmogorovSmirnovTest", "Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
00528          return;
00529       }
00530       if (fDist == kUndefined) {
00531          MATH_ERROR_MSG("KolmogorovSmirnovTest", "Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
00532          return;
00533       }
00534       Double_t Fo = 0.0, Dn = 0.0;
00535       UInt_t n = fSamples[0].size();
00536       for (UInt_t i = 0; i < n; ++i) {
00537          Double_t Fn = (i + 1.0) / n;
00538          Double_t F = (*fCDF)(fSamples[0][i]);
00539          Double_t result = std::max(TMath::Abs(Fn - F), TMath::Abs(Fo - Fn));
00540          if (result > Dn) Dn = result;
00541          Fo = Fn;
00542       }
00543       pvalue = TMath::KolmogorovProb(Dn * (TMath::Sqrt(n) + 0.12 + 0.11 / TMath::Sqrt(n)));
00544       testStat = Dn;
00545    }
00546    
00547    Double_t GoFTest::KolmogorovSmirnovTest(const Char_t* option) const {
00548       Double_t pvalue, testStat;
00549       KolmogorovSmirnovTest(pvalue, testStat);
00550       return (strncmp(option, "p", 1) == 0 || strncmp(option, "t", 1) != 0) ? pvalue : testStat;
00551    }
00552 
00553 } // ROOT namespace
00554 } // Math namespace
00555