00001 // @(#)root/graf:$Id: TGraphQQ.cxx 31648 2009-12-08 12:59:44Z couet $ 00002 // Author: Anna Kreshuk 18/11/2005 00003 00004 /************************************************************************* 00005 * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. * 00006 * All rights reserved. * 00007 * * 00008 * For the licensing terms see $ROOTSYS/LICENSE. * 00009 * For the list of contributors see $ROOTSYS/README/CREDITS. * 00010 *************************************************************************/ 00011 00012 #include "TGraphQQ.h" 00013 #include "TAxis.h" 00014 #include "TF1.h" 00015 #include "TMath.h" 00016 #include "TVirtualPad.h" 00017 #include "TLine.h" 00018 00019 ClassImp(TGraphQQ) 00020 00021 //______________________________________________________________________________ 00022 // 00023 // This class allows to draw quantile-quantile plots 00024 // 00025 // Plots can be drawn for 2 datasets or for a dataset and a theoretical 00026 // distribution function 00027 // 00028 // 2 datasets: 00029 // Quantile-quantile plots are used to determine whether 2 samples come from 00030 // the same distribution. 00031 // A qq-plot draws the quantiles of one dataset against the quantile of the 00032 // the other. The quantiles of the dataset with fewer entries are on Y axis, 00033 // with more entries - on X axis. 00034 // A straight line, going through 0.25 and 0.75 quantiles is also plotted 00035 // for reference. It represents a robust linear fit, not sensitive to the 00036 // extremes of the datasets. 00037 // If the datasets come from the same distribution, points of the plot should 00038 // fall approximately on the 45 degrees line. If they have the same 00039 // distribution function, but location or scale different parameters, 00040 // they should still fall on the straight line, but not the 45 degrees one. 00041 // The greater their departure from the straight line, the more evidence there 00042 // is, that the datasets come from different distributions. 00043 // The advantage of qq-plot is that it not only shows that the underlying 00044 // distributions are different, but, unlike the analytical methods, it also 00045 // gives information on the nature of this difference: heavier tails, 00046 // different location/scale, different shape, etc. 00047 // 00048 // Some examples of qqplots of 2 datasets: 00049 //Begin_Html 00050 /* 00051 <img src="gif/qqplots.gif"> 00052 */ 00053 //End_Html 00054 // 00055 // 1 dataset: 00056 // Quantile-quantile plots are used to determine if the dataset comes from the 00057 // specified theoretical distribution, such as normal. 00058 // A qq-plot draws quantiles of the dataset against quantiles of the specified 00059 // theoretical distribution. 00060 // (NOTE, that density, not CDF should be specified) 00061 // A straight line, going through 0.25 and 0.75 quantiles can also be plotted 00062 // for reference. It represents a robust linear fit, not sensitive to the 00063 // extremes of the dataset. 00064 // As in the 2 datasets case, departures from straight line indicate departures 00065 // from the specified distribution. 00066 // 00067 // " The correlation coefficient associated with the linear fit to the data 00068 // in the probability plot (qq plot in our case) is a measure of the 00069 // goodness of the fit. 00070 // Estimates of the location and scale parameters of the distribution 00071 // are given by the intercept and slope. Probability plots can be generated 00072 // for several competing distributions to see which provides the best fit, 00073 // and the probability plot generating the highest correlation coefficient 00074 // is the best choice since it generates the straightest probability plot." 00075 // From "Engineering statistic handbook", 00076 // http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm 00077 // 00078 // Example of a qq-plot of a dataset from N(3, 2) distribution and 00079 // TMath::Gaus(0, 1) theoretical function. Fitting parameters 00080 // are estimates of the distribution mean and sigma. 00081 // 00082 //Begin_Html 00083 /* 00084 <img src="gif/qqnormal.gif"> 00085 */ 00086 //End_Html// 00087 // 00088 // 00089 // References: 00090 // http://www.itl.nist.gov/div898/handbook/eda/section3/qqplot.htm 00091 // http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm 00092 // 00093 00094 00095 00096 //______________________________________________________________________________ 00097 TGraphQQ::TGraphQQ() 00098 { 00099 //default constructor 00100 00101 fF = 0; 00102 fY0 = 0; 00103 fNy0 = 0; 00104 fXq1 = 0.; 00105 fXq2 = 0.; 00106 fYq1 = 0.; 00107 fYq2 = 0.; 00108 00109 } 00110 00111 00112 //______________________________________________________________________________ 00113 TGraphQQ::TGraphQQ(Int_t n, Double_t *x) 00114 : TGraph(n) 00115 { 00116 //Creates a quantile-quantile plot of dataset x. 00117 //Theoretical distribution function can be defined later by SetFunction method 00118 00119 fNy0 = 0; 00120 fXq1 = 0.; 00121 fXq2 = 0.; 00122 fYq1 = 0.; 00123 fYq2 = 0.; 00124 00125 Int_t *index = new Int_t[n]; 00126 TMath::Sort(n, x, index, kFALSE); 00127 for (Int_t i=0; i<fNpoints; i++) 00128 fY[i] = x[index[i]]; 00129 fF=0; 00130 fY0=0; 00131 delete [] index; 00132 } 00133 00134 //______________________________________________________________________________ 00135 TGraphQQ::TGraphQQ(Int_t n, Double_t *x, TF1 *f) 00136 : TGraph(n) 00137 { 00138 //Creates a quantile-quantile plot of dataset x against function f 00139 00140 fNy0 = 0; 00141 00142 Int_t *index = new Int_t[n]; 00143 TMath::Sort(n, x, index, kFALSE); 00144 for (Int_t i=0; i<fNpoints; i++) 00145 fY[i] = x[index[i]]; 00146 delete [] index; 00147 fF = f; 00148 fY0=0; 00149 MakeFunctionQuantiles(); 00150 } 00151 00152 00153 //______________________________________________________________________________ 00154 TGraphQQ::TGraphQQ(Int_t nx, Double_t *x, Int_t ny, Double_t *y) 00155 { 00156 //Creates a quantile-quantile plot of dataset x against dataset y 00157 //Parameters nx and ny are respective array sizes 00158 00159 fNy0 = 0; 00160 fXq1 = 0.; 00161 fXq2 = 0.; 00162 fYq1 = 0.; 00163 fYq2 = 0.; 00164 00165 nx<=ny ? fNpoints=nx : fNpoints=ny; 00166 00167 if (!CtorAllocate()) return; 00168 fF=0; 00169 Int_t *index = new Int_t[TMath::Max(nx, ny)]; 00170 TMath::Sort(nx, x, index, kFALSE); 00171 if (nx <=ny){ 00172 for (Int_t i=0; i<fNpoints; i++) 00173 fY[i] = x[index[i]]; 00174 TMath::Sort(ny, y, index, kFALSE); 00175 if (nx==ny){ 00176 for (Int_t i=0; i<fNpoints; i++) 00177 fX[i] = y[index[i]]; 00178 fY0 = 0; 00179 Quartiles(); 00180 } else { 00181 fNy0 = ny; 00182 fY0 = new Double_t[ny]; 00183 for (Int_t i=0; i<ny; i++) 00184 fY0[i] = y[i]; 00185 MakeQuantiles(); 00186 } 00187 } else { 00188 fNy0 = nx; 00189 fY0 = new Double_t[nx]; 00190 for (Int_t i=0; i<nx; i++) 00191 fY0[i] = x[index[i]]; 00192 TMath::Sort(ny, y, index, kFALSE); 00193 for (Int_t i=0; i<ny; i++) 00194 fY[i] = y[index[i]]; 00195 MakeQuantiles(); 00196 } 00197 00198 00199 delete [] index; 00200 } 00201 00202 00203 //______________________________________________________________________________ 00204 TGraphQQ::~TGraphQQ() 00205 { 00206 //Destroys a TGraphQQ 00207 00208 if (fY0) 00209 delete [] fY0; 00210 if (fF) 00211 fF = 0; 00212 } 00213 00214 00215 //______________________________________________________________________________ 00216 void TGraphQQ::MakeFunctionQuantiles() 00217 { 00218 //Computes quantiles of theoretical distribution function 00219 00220 if (!fF) return; 00221 TString s = fF->GetTitle(); 00222 Double_t pk; 00223 if (s.Contains("TMath::Gaus") || s.Contains("gaus")){ 00224 //use plotting positions optimal for normal distribution 00225 for (Int_t k=1; k<=fNpoints; k++){ 00226 pk = (k-0.375)/(fNpoints+0.25); 00227 fX[k-1]=TMath::NormQuantile(pk); 00228 } 00229 } else { 00230 Double_t *prob = new Double_t[fNpoints]; 00231 if (fNpoints > 10){ 00232 for (Int_t k=1; k<=fNpoints; k++) 00233 prob[k-1] = (k-0.5)/fNpoints; 00234 } else { 00235 for (Int_t k=1; k<=fNpoints; k++) 00236 prob[k-1] = (k-0.375)/(fNpoints+0.25); 00237 } 00238 //fF->GetQuantiles(fNpoints, prob, fX); 00239 fF->GetQuantiles(fNpoints, fX, prob); 00240 delete [] prob; 00241 } 00242 00243 Quartiles(); 00244 } 00245 00246 00247 //______________________________________________________________________________ 00248 00249 void TGraphQQ::MakeQuantiles() 00250 { 00251 //When sample sizes are not equal, computes quantiles of the bigger sample 00252 //by linear interpolation 00253 00254 if (!fY0) return; 00255 00256 Double_t pi, pfrac; 00257 Int_t pint; 00258 for (Int_t i=0; i<fNpoints-1; i++){ 00259 pi = (fNy0-1)*Double_t(i)/Double_t(fNpoints-1); 00260 pint = TMath::FloorNint(pi); 00261 pfrac = pi - pint; 00262 fX[i] = (1-pfrac)*fY0[pint]+pfrac*fY0[pint+1]; 00263 } 00264 fX[fNpoints-1]=fY0[fNy0-1]; 00265 00266 Quartiles(); 00267 } 00268 00269 00270 //______________________________________________________________________________ 00271 void TGraphQQ::Quartiles() 00272 { 00273 // compute quartiles 00274 // a quartile is a 25 per cent or 75 per cent quantile 00275 00276 Double_t prob[]={0.25, 0.75}; 00277 Double_t x[2]; 00278 Double_t y[2]; 00279 TMath::Quantiles(fNpoints, 2, fY, y, prob, kTRUE); 00280 if (fY0) 00281 TMath::Quantiles(fNy0, 2, fY0, x, prob, kTRUE); 00282 else if (fF) { 00283 TString s = fF->GetTitle(); 00284 if (s.Contains("TMath::Gaus") || s.Contains("gaus")){ 00285 x[0] = TMath::NormQuantile(0.25); 00286 x[1] = TMath::NormQuantile(0.75); 00287 } else 00288 fF->GetQuantiles(2, x, prob); 00289 } 00290 else 00291 TMath::Quantiles(fNpoints, 2, fX, x, prob, kTRUE); 00292 00293 fXq1=x[0]; fXq2=x[1]; fYq1=y[0]; fYq2=y[1]; 00294 } 00295 00296 00297 //______________________________________________________________________________ 00298 void TGraphQQ::SetFunction(TF1 *f) 00299 { 00300 //Sets the theoretical distribution function (density!) 00301 //and computes its quantiles 00302 00303 fF = f; 00304 MakeFunctionQuantiles(); 00305 }