ROOT_528-00b_version: tutorials/math/goftest.C Source File

00001 // ------------------------------------------------------------------------
00002 //
00003 // GoFTest tutorial macro
00004 // 
00005 // Using Anderson-Darling and Kolmogorov-Smirnov goodness of fit tests
00006 // 1 sample test is performed comparing data with a log-normal distribution
00007 // and a 2 sample test is done comparing two gaussian data sets. 
00008 //
00009 //
00010 // Author:   Bartolomeu Rabacal    6/2010 
00011 // 
00012 // ------------------------------------------------------------------------
00013 
00014 #include <cassert>
00015 #include "TCanvas.h"
00016 #include "TPaveText.h"
00017 #include "TH1.h"
00018 #include "TF1.h"
00019 #include "Math/GoFTest.h"
00020 #include "Math/Functor.h"
00021 #include "TRandom3.h"
00022 #include "Math/DistFunc.h"
00023 
00024 void goftest() {
00025 
00026    // ------------------------------------------------------------------------
00027    // C a s e  1 :  C r e a t e   l o g N o r m a l  r a n d o m  s a m p l e
00028    // ------------------------------------------------------------------------
00029    
00030    UInt_t nEvents1 = 1000;
00031 
00032    //ROOT::Math::Random<ROOT::Math::GSLRngMT> r;
00033    TF1 * f1 = new TF1("logNormal","ROOT::Math::lognormal_pdf(x,[0],[1])",0,500);
00034    // set the lognormal parameters (m and s) 
00035    f1->SetParameters(5.0,2.0);
00036    f1->SetNpx(1000);
00037       
00038 
00039    Double_t* sample1 = new Double_t[nEvents1];
00040 
00041    TH1D* h1smp = new TH1D("h1smp", "LogNormal distribution histogram", 100, 0, 500);
00042    h1smp->SetStats(kFALSE);
00043    
00044    for (UInt_t i = 0; i < nEvents1; ++i) { 
00045       Double_t data = f1->GetRandom();
00046       sample1[i] = data;
00047       h1smp->Fill(data);
00048    }
00049    // normalize correctly the histogram
00050    h1smp->Scale( ROOT::Math::lognormal_cdf(500.,5.,2) / nEvents1, "width");
00051 
00052    TCanvas* c = new TCanvas("c","1-Sample and 2-Samples GoF Tests");
00053    c->Divide(1, 2);
00054    c->cd(1);
00055    h1smp->Draw();
00056    h1smp->SetLineColor(kBlue);
00057    f1->SetNpx(100); // use same points as histo for drawing
00058    f1->SetLineColor(kRed);
00059    f1->Draw("SAME");
00060       
00061    // -----------------------------------------
00062    // C r e a t e   G o F T e s t  o b j e c t 
00063    // -----------------------------------------
00064    
00065    ROOT::Math::GoFTest* goftest_1 = new ROOT::Math::GoFTest(nEvents1, sample1, ROOT::Math::GoFTest::kLogNormal);
00066       
00067    /* Possible calls for the Anderson - DarlingTest test */
00068    /*----------------------------------------------------*/
00069    
00070    /* a) Returning the Anderson-Darling standardized test statistic */
00071    Double_t A2_1 = goftest_1-> AndersonDarlingTest("t"); 
00072    Double_t A2_2 = (*goftest_1)(ROOT::Math::GoFTest::kAD, "t");
00073    assert(A2_1 == A2_2);
00074   
00075    /* b) Returning the p-value for the Anderson-Darling test statistic */
00076    Double_t pvalueAD_1 = goftest_1-> AndersonDarlingTest(); // p-value is the default choice
00077    Double_t pvalueAD_2 = (*goftest_1)(); // p-value and Anderson - Darling Test are the default choices
00078    assert(pvalueAD_1 == pvalueAD_2);
00079    
00080    /* Rebuild the test using the default 1-sample construtor */
00081    delete goftest_1;
00082    goftest_1 = new ROOT::Math::GoFTest(nEvents1, sample1 ); // User must then input a distribution type option
00083    goftest_1->SetDistribution(ROOT::Math::GoFTest::kLogNormal);
00084    
00085    
00086    /* Possible calls for the Kolmogorov - Smirnov test */
00087    /*--------------------------------------------------*/              
00088        
00089    /* a) Returning the Kolmogorov-Smirnov standardized test statistic */
00090    Double_t Dn_1 = goftest_1-> KolmogorovSmirnovTest("t");
00091    Double_t Dn_2 = (*goftest_1)(ROOT::Math::GoFTest::kKS, "t");
00092    assert(Dn_1 == Dn_2);
00093    
00094    /* b) Returning the p-value for the Kolmogorov-Smirnov test statistic */
00095    Double_t pvalueKS_1 = goftest_1-> KolmogorovSmirnovTest();
00096    Double_t pvalueKS_2 = (*goftest_1)(ROOT::Math::GoFTest::kKS);
00097    assert(pvalueKS_1 == pvalueKS_2);
00098    
00099    /* Valid but incorrect calls for the 2-samples methods of the 1-samples constucted goftest_1 */
00100 #ifdef TEST_ERROR_MESSAGE
00101     Double_t A2 = (*goftest_1)(ROOT::Math::GoFTest::kAD2s, "t");     // Issues error message
00102     Double_t pvalueKS = (*goftest_1)(ROOT::Math::GoFTest::kKS2s);    // Issues error message
00103     assert(A2 == pvalueKS);
00104 #endif
00105   
00106    TPaveText* pt1 = new TPaveText(0.58, 0.6, 0.88, 0.80, "brNDC");
00107    Char_t str1[50];
00108    sprintf(str1, "p-value for A-D 1-smp test: %f", pvalueAD_1);
00109    pt1->AddText(str1);
00110    pt1->SetFillColor(18);
00111    pt1->SetTextFont(20);
00112    pt1->SetTextColor(4);
00113    Char_t str2[50];
00114    sprintf(str2, "p-value for K-S 1-smp test: %f", pvalueKS_1);
00115    pt1->AddText(str2);
00116    pt1->Draw();
00117    
00118    // ------------------------------------------------------------------------
00119    // C a s e  2 :  C r e a t e   G a u s s i a n  r a n d o m  s a m p l e s
00120    // ------------------------------------------------------------------------
00121 
00122    UInt_t nEvents2 = 2000;
00123 
00124    Double_t* sample2 = new Double_t[nEvents2];
00125 
00126    TH1D* h2smps_1 = new TH1D("h2smps_1", "Gaussian distribution histograms", 100, 0, 500);
00127    h2smps_1->SetStats(kFALSE);   
00128    
00129    TH1D* h2smps_2 = new TH1D("h2smps_2", "Gaussian distribution histograms", 100, 0, 500);
00130    h2smps_2->SetStats(kFALSE);
00131    
00132    TRandom3 r;
00133    for (UInt_t i = 0; i < nEvents1; ++i) { 
00134       Double_t data = r.Gaus(300, 50);
00135       sample1[i] = data;
00136       h2smps_1->Fill(data);
00137    }
00138    h2smps_1->Scale(1. / nEvents1, "width");
00139    c->cd(2);
00140    h2smps_1->Draw();
00141    h2smps_1->SetLineColor(kBlue);
00142    
00143    for (UInt_t i = 0; i < nEvents2; ++i) { 
00144       Double_t data = r.Gaus(300, 50);
00145       sample2[i] = data;
00146       h2smps_2->Fill(data);
00147    }
00148    h2smps_2->Scale(1. / nEvents2, "width");
00149    h2smps_2->Draw("SAME");
00150    h2smps_2->SetLineColor(kRed);
00151 
00152    // -----------------------------------------
00153    // C r e a t e   G o F T e s t  o b j e c t 
00154    // -----------------------------------------
00155    
00156    ROOT::Math::GoFTest* goftest_2 = new ROOT::Math::GoFTest(nEvents1, sample1, nEvents2, sample2);
00157    
00158    /* Possible calls for the Anderson - DarlingTest test */
00159    /*----------------------------------------------------*/
00160    
00161    /* a) Returning the Anderson-Darling standardized test statistic */
00162    A2_1 = goftest_2->AndersonDarling2SamplesTest("t"); 
00163    A2_2 = (*goftest_2)(ROOT::Math::GoFTest::kAD2s, "t");
00164    assert(A2_1 == A2_2);
00165   
00166    /* b) Returning the p-value for the Anderson-Darling test statistic */
00167    pvalueAD_1 = goftest_2-> AndersonDarling2SamplesTest(); // p-value is the default choice
00168    pvalueAD_2 = (*goftest_2)(ROOT::Math::GoFTest::kAD2s);  // p-value is the default choices
00169    assert(pvalueAD_1 == pvalueAD_2);
00170    
00171    /* Possible calls for the Kolmogorov - Smirnov test */
00172    /*--------------------------------------------------*/              
00173        
00174    /* a) Returning the Kolmogorov-Smirnov standardized test statistic */
00175    Dn_1 = goftest_2-> KolmogorovSmirnov2SamplesTest("t");
00176    Dn_2 = (*goftest_2)(ROOT::Math::GoFTest::kKS2s, "t");
00177    assert(Dn_1 == Dn_2);
00178    
00179    /* b) Returning the p-value for the Kolmogorov-Smirnov test statistic */
00180    pvalueKS_1 = goftest_2-> KolmogorovSmirnov2SamplesTest();
00181    pvalueKS_2 = (*goftest_2)(ROOT::Math::GoFTest::kKS2s);
00182    assert(pvalueKS_1 == pvalueKS_2);
00183 
00184 #ifdef TEST_ERROR_MESSAGE   
00185    /* Valid but incorrect calls for the 1-sample methods of the 2-samples constucted goftest_2 */
00186    A2 = (*goftest_2)(ROOT::Math::GoFTest::kAD, "t");     // Issues error message
00187    pvalueKS = (*goftest_2)(ROOT::Math::GoFTest::kKS);    // Issues error message
00188    assert(A2 == pvalueKS);
00189 #endif
00190    
00191    TPaveText* pt2 = new TPaveText(0.13, 0.6, 0.43, 0.8, "brNDC");
00192    sprintf(str1, "p-value for A-D 2-smps test: %f", pvalueAD_1);
00193    pt2->AddText(str1);
00194    pt2->SetFillColor(18);
00195    pt2->SetTextFont(20);
00196    pt2->SetTextColor(4);
00197    sprintf(str2, "p-value for K-S 2-smps test: %f", pvalueKS_1);
00198    pt2-> AddText(str2);
00199    pt2-> Draw();
00200    
00201       
00202    // ------------------------------------------------------------------------
00203    // C a s e  3 :  C r e a t e   L a n d a u  r a n d o m  s a m p l e
00204    // ------------------------------------------------------------------------
00205    
00206    UInt_t nEvents3 = 1000;
00207 
00208    Double_t* sample3 = new Double_t[nEvents3];
00209    for (UInt_t i = 0; i < nEvents3; ++i) { 
00210       Double_t data = r.Landau();
00211       sample3[i] = data;
00212    }
00213 
00214    // ------------------------------------------
00215    // C r e a t e   G o F T e s t  o b j e c t s 
00216    // ------------------------------------------
00217    
00218    /* Possible constructors for the user input distribution */
00219    /*-------------------------------------------------------*/
00220    
00221    /* a) User input PDF */
00222    ROOT::Math::Functor1D f(&TMath::Landau);
00223    double min = 3*TMath::MinElement(nEvents3, sample3);
00224    double max = 3*TMath::MaxElement(nEvents3, sample3);
00225    ROOT::Math::GoFTest* goftest_3a = new ROOT::Math::GoFTest(nEvents3, sample3, f,  ROOT::Math::GoFTest::kPDF, min,max);  // need to specify am interval
00226    /* b) User input CDF */
00227    ROOT::Math::Functor1D fI(&TMath::LandauI);
00228    ROOT::Math::GoFTest* goftest_3b = new ROOT::Math::GoFTest(nEvents3, sample3, fI, ROOT::Math::GoFTest::kCDF,min,max);
00229 
00230    
00231    /* Returning the p-value for the Anderson-Darling test statistic */
00232    pvalueAD_1 = goftest_3a-> AndersonDarlingTest(); // p-value is the default choice
00233    
00234    pvalueAD_2 = (*goftest_3b)(); // p-value and Anderson - Darling Test are the default choices
00235    
00236    /* Checking consistency between both tests */ 
00237    std::cout << " \n\nTEST with LANDAU distribution:\t";
00238    if (TMath::Abs(pvalueAD_1 - pvalueAD_2) > 1.E-1 * pvalueAD_2) { 
00239       std::cout << "FAILED " << std::endl;
00240       Error("goftest","Error in comparing testing using Landau and Landau CDF");
00241       std::cerr << " pvalues are " << pvalueAD_1 << "  " << pvalueAD_2 << std::endl;
00242    }
00243    else 
00244       std::cout << "OK ( pvalues = " << pvalueAD_2 << "  )" << std::endl;
00245 }