00001 ////////////////////////////////////////////////////////////////////////// 00002 // 00003 // 'DATA AND CATEGORIES' RooFit tutorial macro #403 00004 // 00005 // Using weights in unbinned datasets 00006 // 00007 // 00008 // 00009 // 07/2008 - Wouter Verkerke 00010 // 00011 ///////////////////////////////////////////////////////////////////////// 00012 00013 #ifndef __CINT__ 00014 #include "RooGlobalFunc.h" 00015 #endif 00016 #include "RooRealVar.h" 00017 #include "RooDataSet.h" 00018 #include "RooDataHist.h" 00019 #include "RooGaussian.h" 00020 #include "RooConstVar.h" 00021 #include "RooFormulaVar.h" 00022 #include "RooGenericPdf.h" 00023 #include "RooPolynomial.h" 00024 #include "RooChi2Var.h" 00025 #include "RooMinuit.h" 00026 #include "TCanvas.h" 00027 #include "TAxis.h" 00028 #include "RooPlot.h" 00029 #include "RooFitResult.h" 00030 using namespace RooFit ; 00031 00032 00033 void rf403_weightedevts() 00034 { 00035 // C r e a t e o b s e r v a b l e a n d u n w e i g h t e d d a t a s e t 00036 // ------------------------------------------------------------------------------- 00037 00038 // Declare observable 00039 RooRealVar x("x","x",-10,10) ; 00040 x.setBins(40) ; 00041 00042 // Construction a uniform pdf 00043 RooPolynomial p0("px","px",x) ; 00044 00045 // Sample 1000 events from pdf 00046 RooDataSet* data = p0.generate(x,1000) ; 00047 00048 00049 00050 // C a l c u l a t e w e i g h t a n d m a k e d a t a s e t w e i g h t e d 00051 // ----------------------------------------------------------------------------------- 00052 00053 // Construct formula to calculate (fake) weight for events 00054 RooFormulaVar wFunc("w","event weight","(x*x+10)",x) ; 00055 00056 // Add column with variable w to previously generated dataset 00057 RooRealVar* w = (RooRealVar*) data->addColumn(wFunc) ; 00058 00059 // Dataset d is now a dataset with two observable (x,w) with 1000 entries 00060 data->Print() ; 00061 00062 // Instruct dataset wdata in interpret w as event weight rather than as observable 00063 RooDataSet wdata(data->GetName(),data->GetTitle(),data,*data->get(),0,w->GetName()) ; 00064 00065 // Dataset d is now a dataset with one observable (x) with 1000 entries and a sum of weights of ~430K 00066 wdata.Print() ; 00067 00068 00069 00070 // U n b i n n e d M L f i t t o w e i g h t e d d a t a 00071 // --------------------------------------------------------------- 00072 00073 // Construction quadratic polynomial pdf for fitting 00074 RooRealVar a0("a0","a0",1) ; 00075 RooRealVar a1("a1","a1",0,-1,1) ; 00076 RooRealVar a2("a2","a2",1,0,10) ; 00077 RooPolynomial p2("p2","p2",x,RooArgList(a0,a1,a2),0) ; 00078 00079 // Fit quadratic polynomial to weighted data 00080 00081 // NOTE: A plain Maximum likelihood fit to weighted data does in general 00082 // NOT result in correct error estimates, unless individual 00083 // event weights represent Poisson statistics themselves. 00084 // 00085 // Fit with 'wrong' errors 00086 RooFitResult* r_ml_wgt = p2.fitTo(wdata,Save()) ; 00087 00088 // A first order correction to estimated parameter errors in an 00089 // (unbinned) ML fit can be obtained by calculating the 00090 // covariance matrix as 00091 // 00092 // V' = V C-1 V 00093 // 00094 // where V is the covariance matrix calculated from a fit 00095 // to -logL = - sum [ w_i log f(x_i) ] and C is the covariance 00096 // matrix calculated from -logL' = -sum [ w_i^2 log f(x_i) ] 00097 // (i.e. the weights are applied squared) 00098 // 00099 // A fit in this mode can be performed as follows: 00100 00101 RooFitResult* r_ml_wgt_corr = p2.fitTo(wdata,Save(),SumW2Error(kTRUE)) ; 00102 00103 00104 00105 // P l o t w e i g h e d d a t a a n d f i t r e s u l t 00106 // --------------------------------------------------------------- 00107 00108 // Construct plot frame 00109 RooPlot* frame = x.frame(Title("Unbinned ML fit, binned chi^2 fit to weighted data")) ; 00110 00111 // Plot data using sum-of-weights-squared error rather than Poisson errors 00112 wdata.plotOn(frame,DataError(RooAbsData::SumW2)) ; 00113 00114 // Overlay result of 2nd order polynomial fit to weighted data 00115 p2.plotOn(frame) ; 00116 00117 00118 00119 // M L F i t o f p d f t o e q u i v a l e n t u n w e i g h t e d d a t a s e t 00120 // ----------------------------------------------------------------------------------------- 00121 00122 // Construct a pdf with the same shape as p0 after weighting 00123 RooGenericPdf genPdf("genPdf","x*x+10",x) ; 00124 00125 // Sample a dataset with the same number of events as data 00126 RooDataSet* data2 = genPdf.generate(x,1000) ; 00127 00128 // Sample a dataset with the same number of weights as data 00129 RooDataSet* data3 = genPdf.generate(x,43000) ; 00130 00131 // Fit the 2nd order polynomial to both unweighted datasets and save the results for comparison 00132 RooFitResult* r_ml_unw10 = p2.fitTo(*data2,Save()) ; 00133 RooFitResult* r_ml_unw43 = p2.fitTo(*data3,Save()) ; 00134 00135 00136 // C h i 2 f i t o f p d f t o b i n n e d w e i g h t e d d a t a s e t 00137 // ------------------------------------------------------------------------------------ 00138 00139 // Construct binned clone of unbinned weighted dataset 00140 RooDataHist* binnedData = wdata.binnedClone() ; 00141 binnedData->Print("v") ; 00142 00143 // Perform chi2 fit to binned weighted dataset using sum-of-weights errors 00144 // 00145 // NB: Within the usual approximations of a chi2 fit, a chi2 fit to weighted 00146 // data using sum-of-weights-squared errors does give correct error 00147 // estimates 00148 RooChi2Var chi2("chi2","chi2",p2,*binnedData,DataError(RooAbsData::SumW2)) ; 00149 RooMinuit m(chi2) ; 00150 m.migrad() ; 00151 m.hesse() ; 00152 00153 // Plot chi^2 fit result on frame as well 00154 RooFitResult* r_chi2_wgt = m.save() ; 00155 p2.plotOn(frame,LineStyle(kDashed),LineColor(kRed)) ; 00156 00157 00158 00159 // C o m p a r e f i t r e s u l t s o f c h i 2 , M L f i t s t o ( u n ) w e i g h t e d d a t a 00160 // --------------------------------------------------------------------------------------------------------------- 00161 00162 // Note that ML fit on 1Kevt of weighted data is closer to result of ML fit on 43Kevt of unweighted data 00163 // than to 1Kevt of unweighted data, whereas the reference chi^2 fit with SumW2 error gives a result closer to 00164 // that of an unbinned ML fit to 1Kevt of unweighted data. 00165 00166 cout << "==> ML Fit results on 1K unweighted events" << endl ; 00167 r_ml_unw10->Print() ; 00168 cout << "==> ML Fit results on 43K unweighted events" << endl ; 00169 r_ml_unw43->Print() ; 00170 cout << "==> ML Fit results on 1K weighted events with a summed weight of 43K" << endl ; 00171 r_ml_wgt->Print() ; 00172 cout << "==> Corrected ML Fit results on 1K weighted events with a summed weight of 43K" << endl ; 00173 r_ml_wgt_corr->Print() ; 00174 cout << "==> Chi2 Fit results on 1K weighted events with a summed weight of 43K" << endl ; 00175 r_chi2_wgt->Print() ; 00176 00177 00178 new TCanvas("rf403_weightedevts","rf403_weightedevts",600,600) ; 00179 gPad->SetLeftMargin(0.15) ; frame->GetYaxis()->SetTitleOffset(1.8) ; frame->Draw() ; 00180 00181 00182 }