00001 ///////////////////////////////////////////////////////////////////////// 00002 // 00003 // 'Number Counting Example' RooStats tutorial macro #100 00004 // author: Kyle Cranmer 00005 // date Nov. 2008 00006 // 00007 // This tutorial shows an example of a combination of 00008 // two searches using number counting with background uncertainty. 00009 // 00010 // The macro uses a RooStats "factory" to construct a PDF 00011 // that represents the two number counting analyses with background 00012 // uncertainties. The uncertainties are taken into account by 00013 // considering a sideband measurement of a size that corresponds to the 00014 // background uncertainty. The problem has been studied in these references: 00015 // http://arxiv.org/abs/physics/0511028 00016 // http://arxiv.org/abs/physics/0702156 00017 // http://cdsweb.cern.ch/record/1099969?ln=en 00018 // 00019 // After using the factory to make the model, we use a RooStats 00020 // ProfileLikelihoodCalculator for a Hypothesis test and a confidence interval. 00021 // The calculator takes into account systematics by eliminating nuisance parameters 00022 // with the profile likelihood. This is equivalent to the method of MINOS. 00023 // 00024 ///////////////////////////////////////////////////////////////////////// 00025 00026 #ifndef __CINT__ 00027 #include "RooGlobalFunc.h" 00028 #endif 00029 #include "RooStats/ProfileLikelihoodCalculator.h" 00030 #include "RooStats/NumberCountingPdfFactory.h" 00031 #include "RooStats/ConfInterval.h" 00032 #include "RooStats/HypoTestResult.h" 00033 #include "RooStats/LikelihoodIntervalPlot.h" 00034 #include "RooRealVar.h" 00035 00036 // use this order for safety on library loading 00037 using namespace RooFit ; 00038 using namespace RooStats ; 00039 00040 00041 // declare three variations on the same tutorial 00042 void rs_numberCountingCombination_expected(); 00043 void rs_numberCountingCombination_observed(); 00044 void rs_numberCountingCombination_observedWithTau(); 00045 00046 //////////////////////////////////////////// 00047 // main driver to choose one 00048 void rs_numberCountingCombination(int flag=1) 00049 { 00050 if(flag==1) 00051 rs_numberCountingCombination_expected(); 00052 if(flag==2) 00053 rs_numberCountingCombination_observed(); 00054 if(flag==3) 00055 rs_numberCountingCombination_observedWithTau(); 00056 } 00057 00058 ///////////////////////////////////////////// 00059 void rs_numberCountingCombination_expected() 00060 { 00061 00062 ///////////////////////////////////////// 00063 // An example of a number counting combination with two channels. 00064 // We consider both hypothesis testing and the equivalent confidence interval. 00065 ///////////////////////////////////////// 00066 00067 00068 ///////////////////////////////////////// 00069 // The Model building stage 00070 ///////////////////////////////////////// 00071 00072 // Step 1, define arrays with signal & bkg expectations and background uncertainties 00073 Double_t s[2] = {20.,10.}; // expected signal 00074 Double_t b[2] = {100.,100.}; // expected background 00075 Double_t db[2] = {.0100,.0100}; // fractional background uncertainty 00076 00077 00078 // Step 2, use a RooStats factory to build a PDF for a 00079 // number counting combination and add it to the workspace. 00080 // We need to give the signal expectation to relate the masterSignal 00081 // to the signal contribution in the individual channels. 00082 // The model neglects correlations in background uncertainty, 00083 // but they could be added without much change to the example. 00084 NumberCountingPdfFactory f; 00085 RooWorkspace* wspace = new RooWorkspace(); 00086 f.AddModel(s,2,wspace,"TopLevelPdf", "masterSignal"); 00087 00088 // Step 3, use a RooStats factory to add datasets to the workspace. 00089 // Step 3a. 00090 // Add the expected data to the workspace 00091 f.AddExpData(s, b, db, 2, wspace, "ExpectedNumberCountingData"); 00092 00093 // see below for a printout of the workspace 00094 // wspace->Print(); //uncomment to see structure of workspace 00095 00096 ///////////////////////////////////////// 00097 // The Hypothesis testing stage: 00098 ///////////////////////////////////////// 00099 // Step 4, Define the null hypothesis for the calculator 00100 // Here you need to know the name of the variables corresponding to hypothesis. 00101 RooRealVar* mu = wspace->var("masterSignal"); 00102 RooArgSet* poi = new RooArgSet(*mu); 00103 RooArgSet* nullParams = new RooArgSet("nullParams"); 00104 nullParams->addClone(*mu); 00105 // here we explicitly set the value of the parameters for the null 00106 nullParams->setRealValue("masterSignal",0); 00107 00108 // Step 5, Create a calculator for doing the hypothesis test. 00109 // because this is a 00110 ProfileLikelihoodCalculator plc( *wspace->data("ExpectedNumberCountingData"), 00111 *wspace->pdf("TopLevelPdf"), *poi, 0.05, nullParams); 00112 00113 00114 // Step 6, Use the Calculator to get a HypoTestResult 00115 HypoTestResult* htr = plc.GetHypoTest(); 00116 assert(htr != 0); 00117 cout << "-------------------------------------------------" << endl; 00118 cout << "The p-value for the null is " << htr->NullPValue() << endl; 00119 cout << "Corresponding to a signifcance of " << htr->Significance() << endl; 00120 cout << "-------------------------------------------------\n\n" << endl; 00121 00122 /* expected case should return: 00123 ------------------------------------------------- 00124 The p-value for the null is 0.015294 00125 Corresponding to a signifcance of 2.16239 00126 ------------------------------------------------- 00127 */ 00128 00129 ////////////////////////////////////////// 00130 // Confidence Interval Stage 00131 00132 // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval. 00133 // We need to specify what are our parameters of interest 00134 RooArgSet* paramsOfInterest = nullParams; // they are the same as before in this case 00135 plc.SetParameters(*paramsOfInterest); 00136 LikelihoodInterval* lrint = (LikelihoodInterval*) plc.GetInterval(); // that was easy. 00137 lrint->SetConfidenceLevel(0.95); 00138 00139 // Step 9, make a plot of the likelihood ratio and the interval obtained 00140 //paramsOfInterest->setRealValue("masterSignal",1.); 00141 // find limits 00142 double lower = lrint->LowerLimit(*mu); 00143 double upper = lrint->UpperLimit(*mu); 00144 00145 LikelihoodIntervalPlot lrPlot(lrint); 00146 lrPlot.SetMaximum(3.); 00147 lrPlot.Draw(); 00148 00149 // Step 10a. Get upper and lower limits 00150 cout << "lower limit on master signal = " << lower << endl; 00151 cout << "upper limit on master signal = " << upper << endl; 00152 00153 00154 // Step 10b, Ask if masterSignal=0 is in the interval. 00155 // Note, this is equivalent to the question of a 2-sigma hypothesis test: 00156 // "is the parameter point masterSignal=0 inside the 95% confidence interval?" 00157 // Since the signficance of the Hypothesis test was > 2-sigma it should not be: 00158 // eg. we exclude masterSignal=0 at 95% confidence. 00159 paramsOfInterest->setRealValue("masterSignal",0.); 00160 cout << "-------------------------------------------------" << endl; 00161 std::cout << "Consider this parameter point:" << std::endl; 00162 paramsOfInterest->first()->Print(); 00163 if( lrint->IsInInterval(*paramsOfInterest) ) 00164 std::cout << "It IS in the interval." << std::endl; 00165 else 00166 std::cout << "It is NOT in the interval." << std::endl; 00167 cout << "-------------------------------------------------\n\n" << endl; 00168 00169 // Step 10c, We also ask about the parameter point masterSignal=2, which is inside the interval. 00170 paramsOfInterest->setRealValue("masterSignal",2.); 00171 cout << "-------------------------------------------------" << endl; 00172 std::cout << "Consider this parameter point:" << std::endl; 00173 paramsOfInterest->first()->Print(); 00174 if( lrint->IsInInterval(*paramsOfInterest) ) 00175 std::cout << "It IS in the interval." << std::endl; 00176 else 00177 std::cout << "It is NOT in the interval." << std::endl; 00178 cout << "-------------------------------------------------\n\n" << endl; 00179 00180 00181 delete lrint; 00182 delete htr; 00183 delete wspace; 00184 delete poi; 00185 delete nullParams; 00186 00187 00188 00189 /* 00190 // Here's an example of what is in the workspace 00191 // wspace->Print(); 00192 RooWorkspace(NumberCountingWS) Number Counting WS contents 00193 00194 variables 00195 --------- 00196 (x_0,masterSignal,expected_s_0,b_0,y_0,tau_0,x_1,expected_s_1,b_1,y_1,tau_1) 00197 00198 p.d.f.s 00199 ------- 00200 RooProdPdf::joint[ pdfs=(sigRegion_0,sideband_0,sigRegion_1,sideband_1) ] = 2.20148e-08 00201 RooPoisson::sigRegion_0[ x=x_0 mean=splusb_0 ] = 0.036393 00202 RooPoisson::sideband_0[ x=y_0 mean=bTau_0 ] = 0.00398939 00203 RooPoisson::sigRegion_1[ x=x_1 mean=splusb_1 ] = 0.0380088 00204 RooPoisson::sideband_1[ x=y_1 mean=bTau_1 ] = 0.00398939 00205 00206 functions 00207 -------- 00208 RooAddition::splusb_0[ set1=(s_0,b_0) set2=() ] = 120 00209 RooProduct::s_0[ compRSet=(masterSignal,expected_s_0) compCSet=() ] = 20 00210 RooProduct::bTau_0[ compRSet=(b_0,tau_0) compCSet=() ] = 10000 00211 RooAddition::splusb_1[ set1=(s_1,b_1) set2=() ] = 110 00212 RooProduct::s_1[ compRSet=(masterSignal,expected_s_1) compCSet=() ] = 10 00213 RooProduct::bTau_1[ compRSet=(b_1,tau_1) compCSet=() ] = 10000 00214 00215 datasets 00216 -------- 00217 RooDataSet::ExpectedNumberCountingData(x_0,y_0,x_1,y_1) 00218 00219 embedded precalculated expensive components 00220 ------------------------------------------- 00221 */ 00222 00223 } 00224 00225 00226 00227 void rs_numberCountingCombination_observed() 00228 { 00229 00230 ///////////////////////////////////////// 00231 // The same example with observed data in a main 00232 // measurement and an background-only auxiliary 00233 // measurement with a factor tau more background 00234 // than in the main measurement. 00235 00236 ///////////////////////////////////////// 00237 // The Model building stage 00238 ///////////////////////////////////////// 00239 00240 // Step 1, define arrays with signal & bkg expectations and background uncertainties 00241 // We still need the expectation to relate signal in different channels with the master signal 00242 Double_t s[2] = {20.,10.}; // expected signal 00243 00244 00245 // Step 2, use a RooStats factory to build a PDF for a 00246 // number counting combination and add it to the workspace. 00247 // We need to give the signal expectation to relate the masterSignal 00248 // to the signal contribution in the individual channels. 00249 // The model neglects correlations in background uncertainty, 00250 // but they could be added without much change to the example. 00251 NumberCountingPdfFactory f; 00252 RooWorkspace* wspace = new RooWorkspace(); 00253 f.AddModel(s,2,wspace,"TopLevelPdf", "masterSignal"); 00254 00255 // Step 3, use a RooStats factory to add datasets to the workspace. 00256 // Add the observed data to the workspace 00257 Double_t mainMeas[2] = {123.,117.}; // observed main measurement 00258 Double_t bkgMeas[2] = {111.23,98.76}; // observed background 00259 Double_t dbMeas[2] = {.011,.0095}; // observed fractional background uncertainty 00260 f.AddData(mainMeas, bkgMeas, dbMeas, 2, wspace,"ObservedNumberCountingData"); 00261 00262 // see below for a printout of the workspace 00263 // wspace->Print(); //uncomment to see structure of workspace 00264 00265 ///////////////////////////////////////// 00266 // The Hypothesis testing stage: 00267 ///////////////////////////////////////// 00268 // Step 4, Define the null hypothesis for the calculator 00269 // Here you need to know the name of the variables corresponding to hypothesis. 00270 RooRealVar* mu = wspace->var("masterSignal"); 00271 RooArgSet* poi = new RooArgSet(*mu); 00272 RooArgSet* nullParams = new RooArgSet("nullParams"); 00273 nullParams->addClone(*mu); 00274 // here we explicitly set the value of the parameters for the null 00275 nullParams->setRealValue("masterSignal",0); 00276 00277 // Step 5, Create a calculator for doing the hypothesis test. 00278 // because this is a 00279 ProfileLikelihoodCalculator plc( *wspace->data("ObservedNumberCountingData"), 00280 *wspace->pdf("TopLevelPdf"), *poi, 0.05, nullParams); 00281 00282 wspace->var("tau_0")->Print(); 00283 wspace->var("tau_1")->Print(); 00284 00285 // Step 7, Use the Calculator to get a HypoTestResult 00286 HypoTestResult* htr = plc.GetHypoTest(); 00287 cout << "-------------------------------------------------" << endl; 00288 cout << "The p-value for the null is " << htr->NullPValue() << endl; 00289 cout << "Corresponding to a signifcance of " << htr->Significance() << endl; 00290 cout << "-------------------------------------------------\n\n" << endl; 00291 00292 /* observed case should return: 00293 ------------------------------------------------- 00294 The p-value for the null is 0.0351669 00295 Corresponding to a signifcance of 1.80975 00296 ------------------------------------------------- 00297 */ 00298 00299 00300 ////////////////////////////////////////// 00301 // Confidence Interval Stage 00302 00303 // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval. 00304 // We need to specify what are our parameters of interest 00305 RooArgSet* paramsOfInterest = nullParams; // they are the same as before in this case 00306 plc.SetParameters(*paramsOfInterest); 00307 LikelihoodInterval* lrint = (LikelihoodInterval*) plc.GetInterval(); // that was easy. 00308 lrint->SetConfidenceLevel(0.95); 00309 00310 // Step 9c. Get upper and lower limits 00311 cout << "lower limit on master signal = " << lrint->LowerLimit(*mu ) << endl; 00312 cout << "upper limit on master signal = " << lrint->UpperLimit(*mu ) << endl; 00313 00314 delete lrint; 00315 delete htr; 00316 delete wspace; 00317 delete nullParams; 00318 delete poi; 00319 00320 00321 } 00322 00323 00324 void rs_numberCountingCombination_observedWithTau() 00325 { 00326 00327 ///////////////////////////////////////// 00328 // The same example with observed data in a main 00329 // measurement and an background-only auxiliary 00330 // measurement with a factor tau more background 00331 // than in the main measurement. 00332 00333 ///////////////////////////////////////// 00334 // The Model building stage 00335 ///////////////////////////////////////// 00336 00337 // Step 1, define arrays with signal & bkg expectations and background uncertainties 00338 // We still need the expectation to relate signal in different channels with the master signal 00339 Double_t s[2] = {20.,10.}; // expected signal 00340 00341 // Step 2, use a RooStats factory to build a PDF for a 00342 // number counting combination and add it to the workspace. 00343 // We need to give the signal expectation to relate the masterSignal 00344 // to the signal contribution in the individual channels. 00345 // The model neglects correlations in background uncertainty, 00346 // but they could be added without much change to the example. 00347 NumberCountingPdfFactory f; 00348 RooWorkspace* wspace = new RooWorkspace(); 00349 f.AddModel(s,2,wspace,"TopLevelPdf", "masterSignal"); 00350 00351 // Step 3, use a RooStats factory to add datasets to the workspace. 00352 // Add the observed data to the workspace in the on-off problem. 00353 Double_t mainMeas[2] = {123.,117.}; // observed main measurement 00354 Double_t sideband[2] = {11123.,9876.}; // observed sideband 00355 Double_t tau[2] = {100.,100.}; // ratio of bkg in sideband to bkg in main measurement, from experimental design. 00356 f.AddDataWithSideband(mainMeas, sideband, tau, 2, wspace,"ObservedNumberCountingDataWithSideband"); 00357 00358 // see below for a printout of the workspace 00359 // wspace->Print(); //uncomment to see structure of workspace 00360 00361 ///////////////////////////////////////// 00362 // The Hypothesis testing stage: 00363 ///////////////////////////////////////// 00364 // Step 4, Define the null hypothesis for the calculator 00365 // Here you need to know the name of the variables corresponding to hypothesis. 00366 RooRealVar* mu = wspace->var("masterSignal"); 00367 RooArgSet* poi = new RooArgSet(*mu); 00368 RooArgSet* nullParams = new RooArgSet("nullParams"); 00369 nullParams->addClone(*mu); 00370 // here we explicitly set the value of the parameters for the null 00371 nullParams->setRealValue("masterSignal",0); 00372 00373 // Step 5, Create a calculator for doing the hypothesis test. 00374 // because this is a 00375 ProfileLikelihoodCalculator plc( *wspace->data("ObservedNumberCountingDataWithSideband"), 00376 *wspace->pdf("TopLevelPdf"), *poi, 0.05, nullParams); 00377 00378 00379 00380 // Step 7, Use the Calculator to get a HypoTestResult 00381 HypoTestResult* htr = plc.GetHypoTest(); 00382 cout << "-------------------------------------------------" << endl; 00383 cout << "The p-value for the null is " << htr->NullPValue() << endl; 00384 cout << "Corresponding to a signifcance of " << htr->Significance() << endl; 00385 cout << "-------------------------------------------------\n\n" << endl; 00386 00387 /* observed case should return: 00388 ------------------------------------------------- 00389 The p-value for the null is 0.0352035 00390 Corresponding to a signifcance of 1.80928 00391 ------------------------------------------------- 00392 */ 00393 00394 00395 ////////////////////////////////////////// 00396 // Confidence Interval Stage 00397 00398 // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval. 00399 // We need to specify what are our parameters of interest 00400 RooArgSet* paramsOfInterest = nullParams; // they are the same as before in this case 00401 plc.SetParameters(*paramsOfInterest); 00402 LikelihoodInterval* lrint = (LikelihoodInterval*) plc.GetInterval(); // that was easy. 00403 lrint->SetConfidenceLevel(0.95); 00404 00405 00406 00407 // Step 9c. Get upper and lower limits 00408 cout << "lower limit on master signal = " << lrint->LowerLimit(*mu ) << endl; 00409 cout << "upper limit on master signal = " << lrint->UpperLimit(*mu ) << endl; 00410 00411 delete lrint; 00412 delete htr; 00413 delete wspace; 00414 delete nullParams; 00415 delete poi; 00416 00417 00418 }