TGraphQQ.cxx

Go to the documentation of this file.
00001 // @(#)root/graf:$Id: TGraphQQ.cxx 31648 2009-12-08 12:59:44Z couet $
00002 // Author: Anna Kreshuk 18/11/2005
00003 
00004 /*************************************************************************
00005  * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 #include "TGraphQQ.h"
00013 #include "TAxis.h"
00014 #include "TF1.h"
00015 #include "TMath.h"
00016 #include "TVirtualPad.h"
00017 #include "TLine.h"
00018 
00019 ClassImp(TGraphQQ)
00020 
00021 //______________________________________________________________________________
00022 //
00023 // This class allows to draw quantile-quantile plots
00024 // 
00025 // Plots can be drawn for 2 datasets or for a dataset and a theoretical 
00026 // distribution function
00027 // 
00028 // 2 datasets:
00029 //   Quantile-quantile plots are used to determine whether 2 samples come from
00030 //   the same distribution. 
00031 //   A qq-plot draws the quantiles of one dataset against the quantile of the 
00032 //   the other. The quantiles of the dataset with fewer entries are on Y axis,
00033 //   with more entries - on X axis. 
00034 //   A straight line, going through 0.25 and 0.75 quantiles is also plotted 
00035 //   for reference. It represents a robust linear fit, not sensitive to the 
00036 //   extremes of the datasets.
00037 //   If the datasets come from the same distribution, points of the plot should 
00038 //   fall approximately on the 45 degrees line. If they have the same 
00039 //   distribution function, but location or scale different parameters, 
00040 //   they should still fall on the straight line, but not the 45 degrees one. 
00041 //   The greater their departure from the straight line, the more evidence there
00042 //   is, that the datasets come from different distributions.
00043 //   The advantage of qq-plot is that it not only shows that the underlying
00044 //   distributions are different, but, unlike the analytical methods, it also
00045 //   gives information on the nature of this difference: heavier tails, 
00046 //   different location/scale, different shape, etc.
00047 //      
00048 //   Some examples of qqplots of 2 datasets:
00049 //Begin_Html
00050 /*
00051 <img src="gif/qqplots.gif">
00052 */
00053 //End_Html
00054 //
00055 // 1 dataset: 
00056 //   Quantile-quantile plots are used to determine if the dataset comes from the 
00057 //   specified theoretical distribution, such as normal.
00058 //   A qq-plot draws quantiles of the dataset against quantiles of the specified
00059 //   theoretical distribution. 
00060 //   (NOTE, that density, not CDF should be specified)
00061 //   A straight line, going through 0.25 and 0.75 quantiles can also be plotted 
00062 //   for reference. It represents a robust linear fit, not sensitive to the 
00063 //   extremes of the dataset. 
00064 //   As in the 2 datasets case, departures from straight line indicate departures
00065 //   from the specified distribution.
00066 //
00067 //   " The correlation coefficient associated with the linear fit to the data 
00068 //     in the probability plot (qq plot in our case) is a measure of the 
00069 //     goodness of the fit. 
00070 //     Estimates of the location and scale parameters  of the distribution 
00071 //     are given by the intercept and slope. Probability plots can be generated 
00072 //     for several competing distributions to see which provides the best fit, 
00073 //     and the probability plot generating the highest correlation coefficient 
00074 //     is the best choice since it generates the straightest probability plot."
00075 //   From "Engineering statistic handbook", 
00076 //   http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
00077 //
00078 //   Example of a qq-plot of a dataset from N(3, 2) distribution and 
00079 //           TMath::Gaus(0, 1) theoretical function. Fitting parameters
00080 //           are estimates of the distribution mean and sigma.
00081 //
00082 //Begin_Html
00083 /*
00084 <img src="gif/qqnormal.gif">
00085 */
00086 //End_Html//
00087 //   
00088 //   
00089 // References:
00090 // http://www.itl.nist.gov/div898/handbook/eda/section3/qqplot.htm
00091 // http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
00092 //   
00093  
00094 
00095 
00096 //______________________________________________________________________________
00097 TGraphQQ::TGraphQQ()
00098 {
00099    //default constructor
00100    
00101    fF   = 0;
00102    fY0  = 0;
00103    fNy0 = 0;
00104    fXq1 = 0.;
00105    fXq2 = 0.;
00106    fYq1 = 0.;
00107    fYq2 = 0.;
00108 
00109 }
00110 
00111 
00112 //______________________________________________________________________________
00113 TGraphQQ::TGraphQQ(Int_t n, Double_t *x) 
00114    : TGraph(n)
00115 {
00116    //Creates a quantile-quantile plot of dataset x. 
00117    //Theoretical distribution function can be defined later by SetFunction method
00118 
00119    fNy0 = 0;
00120    fXq1 = 0.;
00121    fXq2 = 0.;
00122    fYq1 = 0.;
00123    fYq2 = 0.;
00124 
00125    Int_t *index = new Int_t[n];
00126    TMath::Sort(n, x, index, kFALSE);
00127    for (Int_t i=0; i<fNpoints; i++)
00128       fY[i] = x[index[i]];
00129    fF=0;
00130    fY0=0;
00131    delete [] index;
00132 } 
00133 
00134 //______________________________________________________________________________
00135 TGraphQQ::TGraphQQ(Int_t n, Double_t *x, TF1 *f)
00136    : TGraph(n)
00137 {
00138    //Creates a quantile-quantile plot of dataset x against function f
00139 
00140    fNy0 = 0;
00141 
00142    Int_t *index = new Int_t[n];
00143    TMath::Sort(n, x, index, kFALSE);
00144    for (Int_t i=0; i<fNpoints; i++)
00145       fY[i] = x[index[i]];
00146    delete [] index;
00147    fF = f;
00148    fY0=0;
00149    MakeFunctionQuantiles();
00150 } 
00151 
00152 
00153 //______________________________________________________________________________
00154 TGraphQQ::TGraphQQ(Int_t nx, Double_t *x, Int_t ny, Double_t *y)
00155 {
00156    //Creates a quantile-quantile plot of dataset x against dataset y
00157    //Parameters nx and ny are respective array sizes
00158 
00159    fNy0 = 0;
00160    fXq1 = 0.;
00161    fXq2 = 0.;
00162    fYq1 = 0.;
00163    fYq2 = 0.;
00164 
00165    nx<=ny ? fNpoints=nx : fNpoints=ny;
00166 
00167    if (!CtorAllocate()) return;
00168    fF=0;
00169    Int_t *index = new Int_t[TMath::Max(nx, ny)];
00170    TMath::Sort(nx, x, index, kFALSE);
00171    if (nx <=ny){
00172       for (Int_t i=0; i<fNpoints; i++)
00173          fY[i] = x[index[i]];
00174       TMath::Sort(ny, y, index, kFALSE);
00175       if (nx==ny){
00176          for (Int_t i=0; i<fNpoints; i++)
00177             fX[i] = y[index[i]];
00178          fY0 = 0;
00179          Quartiles();
00180       } else {
00181          fNy0 = ny;
00182          fY0 = new Double_t[ny];
00183          for (Int_t i=0; i<ny; i++)
00184             fY0[i] = y[i];
00185          MakeQuantiles();
00186       }
00187    } else {
00188       fNy0 = nx;
00189       fY0 = new Double_t[nx];
00190       for (Int_t i=0; i<nx; i++)
00191          fY0[i] = x[index[i]];
00192       TMath::Sort(ny, y, index, kFALSE);
00193       for (Int_t i=0; i<ny; i++)
00194          fY[i] = y[index[i]];
00195       MakeQuantiles();
00196    }
00197 
00198 
00199    delete [] index;
00200 }
00201 
00202 
00203 //______________________________________________________________________________
00204 TGraphQQ::~TGraphQQ()
00205 {
00206    //Destroys a TGraphQQ
00207 
00208    if (fY0)
00209       delete [] fY0;
00210    if (fF)
00211       fF = 0;
00212 }
00213 
00214 
00215 //______________________________________________________________________________
00216 void TGraphQQ::MakeFunctionQuantiles()
00217 {
00218    //Computes quantiles of theoretical distribution function
00219 
00220    if (!fF) return;
00221    TString s = fF->GetTitle();
00222    Double_t pk;
00223    if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
00224       //use plotting positions optimal for normal distribution
00225       for (Int_t k=1; k<=fNpoints; k++){
00226          pk = (k-0.375)/(fNpoints+0.25);
00227          fX[k-1]=TMath::NormQuantile(pk);
00228       }
00229    } else {
00230       Double_t *prob = new Double_t[fNpoints];
00231       if (fNpoints > 10){
00232          for (Int_t k=1; k<=fNpoints; k++)
00233             prob[k-1] = (k-0.5)/fNpoints;
00234       } else {
00235          for (Int_t k=1; k<=fNpoints; k++)
00236             prob[k-1] = (k-0.375)/(fNpoints+0.25);
00237       }
00238       //fF->GetQuantiles(fNpoints, prob, fX);
00239       fF->GetQuantiles(fNpoints, fX, prob);
00240       delete [] prob;
00241    }
00242 
00243    Quartiles();
00244 }
00245 
00246 
00247 //______________________________________________________________________________
00248 
00249 void TGraphQQ::MakeQuantiles()
00250 {
00251    //When sample sizes are not equal, computes quantiles of the bigger sample
00252    //by linear interpolation
00253 
00254    if (!fY0) return;
00255 
00256    Double_t pi, pfrac;
00257    Int_t pint;
00258    for (Int_t i=0; i<fNpoints-1; i++){
00259       pi = (fNy0-1)*Double_t(i)/Double_t(fNpoints-1);
00260       pint = TMath::FloorNint(pi);
00261       pfrac = pi - pint;
00262       fX[i] = (1-pfrac)*fY0[pint]+pfrac*fY0[pint+1];
00263    }
00264    fX[fNpoints-1]=fY0[fNy0-1];
00265 
00266    Quartiles();
00267 }
00268 
00269 
00270 //______________________________________________________________________________
00271 void TGraphQQ::Quartiles()
00272 {
00273    // compute quartiles
00274    // a quartile is a 25 per cent or 75 per cent quantile
00275    
00276    Double_t prob[]={0.25, 0.75};
00277    Double_t x[2];
00278    Double_t y[2];
00279    TMath::Quantiles(fNpoints, 2, fY, y, prob, kTRUE);
00280    if (fY0)
00281       TMath::Quantiles(fNy0, 2, fY0, x, prob, kTRUE);
00282    else if (fF) {
00283       TString s = fF->GetTitle();
00284       if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
00285          x[0] = TMath::NormQuantile(0.25);
00286          x[1] = TMath::NormQuantile(0.75);
00287       } else 
00288          fF->GetQuantiles(2, x, prob);
00289    }
00290    else 
00291       TMath::Quantiles(fNpoints, 2, fX, x, prob, kTRUE);
00292 
00293    fXq1=x[0]; fXq2=x[1]; fYq1=y[0]; fYq2=y[1];
00294 }
00295 
00296 
00297 //______________________________________________________________________________
00298 void TGraphQQ::SetFunction(TF1 *f)
00299 {
00300    //Sets the theoretical distribution function (density!) 
00301    //and computes its quantiles
00302 
00303    fF = f;
00304    MakeFunctionQuantiles();
00305 }

Generated on Tue Jul 5 14:14:27 2011 for ROOT_528-00b_version by  doxygen 1.5.1