00001 //+ example demostrating usage of HybridCalcultor 00002 /* 00003 HybridInstructional 00004 00005 Authors: Kyle Cranmer, Wouter Verkerke, and Sven Kreiss 00006 date May 2010 Part 1-3 00007 date Dec 2010 Part 4-6 00008 00009 A hypothesis testing example based on number counting 00010 with background uncertainty. 00011 00012 NOTE: This example must be run with the ACLIC (the + option ) due to the 00013 new class that is defined. 00014 00015 This example: 00016 - demonstrates the usage of the HybridCalcultor (Part 4-6) 00017 - demonstrates the numerical integration of RooFit (Part 2) 00018 - validates the RooStats against an example with a known analytic answer 00019 - demonstrates usage of different test statistics 00020 - explains subtle choices in the prior used for hybrid methods 00021 - demonstrates usage of different priors for the nuisance parameters 00022 - demonstrates usage of PROOF 00023 00024 The basic setup here is that a main measurement has observed x events with an 00025 expectation of s+b. One can choose an ad hoc prior for the uncertainty on b, 00026 or try to base it on an auxiliary measurement. In this case, the auxiliary 00027 measurement (aka control measurement, sideband) is another counting experiment 00028 with measurement y and expectation tau*b. With an 'original prior' on b, 00029 called \eta(b) then one can obtain a posterior from the auxiliary measurement 00030 \pi(b) = \eta(b) * Pois(y|tau*b). This is a principled choice for a prior 00031 on b in the main measurement of x, which can then be treated in a hybrid 00032 Bayesian/Frequentist way. Additionally, one can try to treat the two 00033 measurements simultaneously, which is detailed in Part 6 of the tutorial. 00034 00035 This tutorial is related to the FourBin.C tutorial in the modeling, but 00036 focuses on hypothesis testing instead of interval estimation. 00037 00038 More background on this 'prototype problem' can be found in the 00039 following papers: 00040 00041 Evaluation of three methods for calculating statistical significance 00042 when incorporating a systematic uncertainty into a test of the 00043 background-only hypothesis for a Poisson process 00044 Authors: Robert D. Cousins, James T. Linnemann, Jordan Tucker 00045 http://arxiv.org/abs/physics/0702156 00046 NIM A 595 (2008) 480--501 00047 00048 Statistical Challenges for Searches for New Physics at the LHC 00049 Authors: Kyle Cranmer 00050 http://arxiv.org/abs/physics/0511028 00051 00052 Measures of Significance in HEP and Astrophysics 00053 Authors: J. T. Linnemann 00054 http://arxiv.org/abs/physics/0312059 00055 */ 00056 00057 #include "RooGlobalFunc.h" 00058 #include "RooRealVar.h" 00059 #include "RooProdPdf.h" 00060 #include "RooWorkspace.h" 00061 #include "RooDataSet.h" 00062 #include "TCanvas.h" 00063 #include "TStopwatch.h" 00064 #include "TH1.h" 00065 #include "RooPlot.h" 00066 #include "RooMsgService.h" 00067 00068 #include "RooStats/NumberCountingUtils.h" 00069 00070 #include "RooStats/HybridCalculator.h" 00071 #include "RooStats/ToyMCSampler.h" 00072 #include "RooStats/HypoTestPlot.h" 00073 00074 #include "RooStats/ProfileLikelihoodTestStat.h" 00075 #include "RooStats/SimpleLikelihoodRatioTestStat.h" 00076 #include "RooStats/RatioOfProfiledLikelihoodsTestStat.h" 00077 #include "RooStats/MaxLikelihoodEstimateTestStat.h" 00078 00079 using namespace RooFit; 00080 using namespace RooStats; 00081 00082 ////////////////////////////////////////////////// 00083 // A New Test Statistic Class for this example. 00084 // It simply returns the sum of the values in a particular 00085 // column of a dataset. 00086 // You can ignore this class and focus on the macro below 00087 ////////////////////////////////////////////////// 00088 class BinCountTestStat : public TestStatistic { 00089 public: 00090 BinCountTestStat(void) : fColumnName("tmp") {} 00091 BinCountTestStat(string columnName) : fColumnName(columnName) {} 00092 00093 virtual Double_t Evaluate(RooAbsData& data, RooArgSet& /*nullPOI*/) { 00094 // This is the main method in the interface 00095 Double_t value = 0.0; 00096 for(int i=0; i < data.numEntries(); i++) { 00097 value += data.get(i)->getRealValue(fColumnName.c_str()); 00098 } 00099 return value; 00100 } 00101 virtual const TString GetVarName() const { return fColumnName; } 00102 00103 private: 00104 string fColumnName; 00105 00106 protected: 00107 ClassDef(BinCountTestStat,1) 00108 }; 00109 00110 ClassImp(BinCountTestStat) 00111 00112 ////////////////////////////////////////////////// 00113 // The Actual Tutorial Macro 00114 ////////////////////////////////////////////////// 00115 00116 void HybridInstructional() { 00117 00118 // This tutorial has 6 parts 00119 // Table of Contents 00120 // Setup 00121 // 1. Make the model for the 'prototype problem' 00122 // Special cases 00123 // 2. Use RooFit's direct integration to get p-value & significance 00124 // 3. Use RooStats analytic solution for this problem 00125 // RooStats HybridCalculator -- can be generalized 00126 // 4. RooStats ToyMC version of 2. & 3. 00127 // 5. RooStats ToyMC with an equivalent test statistic 00128 // 6. RooStats ToyMC with simultaneous control & main measurement 00129 00130 // It takes ~4 min without PROOF and ~2 min with PROOF on 4 cores. 00131 // Of course, everything looks nicer with more toys, which takes longer. 00132 00133 #ifdef __CINT__ 00134 cout << "DO NOT RUN WITH CINT: we are using a custom test statistic "; 00135 cout << "which requires that this tutorial must be compiled "; 00136 cout << "with ACLIC" << endl; 00137 return; 00138 #endif 00139 00140 00141 TStopwatch t; 00142 t.Start(); 00143 TCanvas *c = new TCanvas; 00144 c->Divide(2,2); 00145 00146 /////////////////////////////////////////////////////// 00147 // P A R T 1 : D I R E C T I N T E G R A T I O N 00148 ////////////////////////////////////////////////////// 00149 // Make model for prototype on/off problem 00150 // Pois(x | s+b) * Pois(y | tau b ) 00151 // for Z_Gamma, use uniform prior on b. 00152 RooWorkspace* w = new RooWorkspace("w"); 00153 w->factory("Poisson::px(x[150,0,500],sum::splusb(s[0,0,100],b[100,0,300]))"); 00154 w->factory("Poisson::py(y[100,0,500],prod::taub(tau[1.],b))"); 00155 w->factory("PROD::model(px,py)"); 00156 w->factory("Uniform::prior_b(b)"); 00157 00158 // We will control the output level in a few places to avoid 00159 // verbose progress messages. We start by keeping track 00160 // of the current threshold on messages. 00161 RooFit::MsgLevel msglevel = RooMsgService::instance().globalKillBelow(); 00162 00163 // Use PROOF-lite on multi-core machines 00164 ProofConfig* pc = NULL; 00165 // uncomment below if you want to use PROOF 00166 // pc = new ProofConfig(*w, 4, "workers=4", kFALSE); // machine with 4 cores 00167 // pc = new ProofConfig(*w, 2, "workers=2", kFALSE); // machine with 2 cores 00168 00169 /////////////////////////////////////////////////////// 00170 // P A R T 2 : D I R E C T I N T E G R A T I O N 00171 ////////////////////////////////////////////////////// 00172 // This is not the 'RooStats' way, but in this case the distribution 00173 // of the test statistic is simply x and can be calculated directly 00174 // from the PDF using RooFit's built-in integration. 00175 // Note, this does not generalize to situations in which the test statistic 00176 // depends on many events (rows in a dataset). 00177 00178 // construct the Bayesian-averaged model (eg. a projection pdf) 00179 // p'(x|s) = \int db p(x|s+b) * [ p(y|b) * prior(b) ] 00180 w->factory("PROJ::averagedModel(PROD::foo(px|b,py,prior_b),b)") ; 00181 00182 RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); // lower message level 00183 // plot it, red is averaged model, green is b known exactly, blue is s+b av model 00184 RooPlot* frame = w->var("x")->frame(Range(50,230)) ; 00185 w->pdf("averagedModel")->plotOn(frame,LineColor(kRed)) ; 00186 w->pdf("px")->plotOn(frame,LineColor(kGreen)) ; 00187 w->var("s")->setVal(50.); 00188 w->pdf("averagedModel")->plotOn(frame,LineColor(kBlue)) ; 00189 c->cd(1); 00190 frame->Draw() ; 00191 w->var("s")->setVal(0.); 00192 00193 // compare analytic calculation of Z_Bi 00194 // with the numerical RooFit implementation of Z_Gamma 00195 // for an example with x = 150, y = 100 00196 00197 // numeric RooFit Z_Gamma 00198 w->var("y")->setVal(100); 00199 w->var("x")->setVal(150); 00200 RooAbsReal* cdf = w->pdf("averagedModel")->createCdf(*w->var("x")); 00201 cdf->getVal(); // get ugly print messages out of the way 00202 cout << "-----------------------------------------"<<endl; 00203 cout << "Part 2" << endl; 00204 cout << "Hybrid p-value from direct integration = " << 1-cdf->getVal() << endl; 00205 cout << "Z_Gamma Significance = " << 00206 PValueToSignificance(1-cdf->getVal()) << endl; 00207 RooMsgService::instance().setGlobalKillBelow(msglevel); // set it back 00208 00209 ///////////////////////////////////////////////// 00210 // P A R T 3 : A N A L Y T I C R E S U L T 00211 ///////////////////////////////////////////////// 00212 // In this special case, the integrals are known analytically 00213 // and they are implemented in RooStats::NumberCountingUtils 00214 00215 // analytic Z_Bi 00216 double p_Bi = NumberCountingUtils::BinomialWithTauObsP(150, 100, 1); 00217 double Z_Bi = NumberCountingUtils::BinomialWithTauObsZ(150, 100, 1); 00218 cout << "-----------------------------------------"<<endl; 00219 cout << "Part 3" << endl; 00220 std::cout << "Z_Bi p-value (analytic): " << p_Bi << std::endl; 00221 std::cout << "Z_Bi significance (analytic): " << Z_Bi << std::endl; 00222 t.Stop(); t.Print(); t.Reset(); t.Start(); 00223 00224 //////////////////////////////////////////////////////////////// 00225 // P A R T 4 : U S I N G H Y B R I D C A L C U L A T O R 00226 //////////////////////////////////////////////////////////////// 00227 // Now we demonstrate the RooStats HybridCalculator. 00228 // 00229 // Like all RooStats calculators it needs the data and a ModelConfig 00230 // for the relevant hypotheses. Since we are doing hypothesis testing 00231 // we need a ModelConfig for the null (background only) and the alternate 00232 // (signal+background) hypotheses. We also need to specify the PDF, 00233 // the parameters of interest, and the observables. Furthermore, since 00234 // the parameter of interest is floating, we need to specify which values 00235 // of the parameter corresponds to the null and alternate (eg. s=0 and s=50) 00236 // 00237 // define some sets of variables obs={x} and poi={s} 00238 // note here, x is the only observable in the main measurement 00239 // and y is treated as a separate measurement, which is used 00240 // to produce the prior that will be used in this calculation 00241 // to randomize the nuisance parameters. 00242 w->defineSet("obs","x"); 00243 w->defineSet("poi","s"); 00244 00245 // create a toy dataset with the x=150 00246 RooDataSet *data = new RooDataSet("d", "d", *w->set("obs")); 00247 data->add(*w->set("obs")); 00248 00249 ////////////////////////////////////////////////////////// 00250 // Part 3a : Setup ModelConfigs 00251 // create the null (background-only) ModelConfig with s=0 00252 ModelConfig b_model("B_model", w); 00253 b_model.SetPdf(*w->pdf("px")); 00254 b_model.SetObservables(*w->set("obs")); 00255 b_model.SetParametersOfInterest(*w->set("poi")); 00256 w->var("s")->setVal(0.0); // important! 00257 b_model.SetSnapshot(*w->set("poi")); 00258 00259 // create the alternate (signal+background) ModelConfig with s=50 00260 ModelConfig sb_model("S+B_model", w); 00261 sb_model.SetPdf(*w->pdf("px")); 00262 sb_model.SetObservables(*w->set("obs")); 00263 sb_model.SetParametersOfInterest(*w->set("poi")); 00264 w->var("s")->setVal(50.0); // important! 00265 sb_model.SetSnapshot(*w->set("poi")); 00266 00267 00268 ////////////////////////////////////////////////////////// 00269 // Part 3b : Choose Test Statistic 00270 // To make an equivalent calculation we need to use x as the test 00271 // statistic. This is not a built-in test statistic in RooStats 00272 // so we define it above. The new class inherits from the 00273 // RooStats::TestStatistic interface, and simply returns the value 00274 // of x in the dataset. 00275 00276 BinCountTestStat binCount("x"); 00277 00278 ////////////////////////////////////////////////////////// 00279 // Part 3c : Define Prior used to randomize nuisance parameters 00280 // 00281 // The prior used for the hybrid calculator is the posterior 00282 // from the auxiliary measurement y. The model for the aux. 00283 // measurement is Pois(y|tau*b), thus the likleihood function 00284 // is proportional to (has the form of) a Gamma distribution. 00285 // if the 'original prior' \eta(b) is uniform, then from 00286 // Bayes's theorem we have the posterior: 00287 // \pi(b) = Pois(y|tau*b) * \eta(b) 00288 // If \eta(b) is flat, then we arrive at a Gamma distribution. 00289 // Since RooFit will normalize the PDF we can actually supply 00290 // py=Pois(y,tau*b) that will be equivalent to multiplying by a uniform. 00291 // 00292 // Alternatively, we could explicitly use a gamma distribution: 00293 // w->factory("Gamma::gamma(b,sum::temp(y,1),1,0)"); 00294 // 00295 // or we can use some other ad hoc prior that do not naturally 00296 // follow from the known form of the auxiliary measurement. 00297 // The common choice is the equivlaent Gaussian: 00298 w->factory("Gaussian::gauss_prior(b,y, expr::sqrty('sqrt(y)',y))"); 00299 // this corresponds to the "Z_N" calculation. 00300 // 00301 // or one could use the analogous log-normal prior 00302 w->factory("Lognormal::lognorm_prior(b,y, expr::kappa('1+1./sqrt(y)',y))"); 00303 // 00304 // Ideally, the HybridCalculator would be able to inspect the full 00305 // model Pois(x | s+b) * Pois(y | tau b ) and be given the original 00306 // prior \eta(b) to form \pi(b) = Pois(y|tau*b) * \eta(b). 00307 // This is not yet implemented because in the general case 00308 // it is not easy to identify the terms in the PDF that correspond 00309 // to the auxiliary measurement. So for now, it must be set 00310 // explicitly with: 00311 // - ForcePriorNuisanceNull() 00312 // - ForcePriorNuisanceAlt() 00313 // the name "ForcePriorNuisance" was chosen because we anticipate 00314 // this to be auto-detected, but will leave the option open 00315 // to force to a different prior for the nuisance parameters. 00316 00317 ////////////////////////////////////////////////////////// 00318 // Part 3d : Construct and configure the HybridCalculator 00319 00320 HybridCalculator hc1(*data, sb_model, b_model); 00321 ToyMCSampler *toymcs1 = (ToyMCSampler*)hc1.GetTestStatSampler(); 00322 toymcs1->SetNEventsPerToy(1); // because the model is in number counting form 00323 toymcs1->SetTestStatistic(&binCount); // set the test statistic 00324 hc1.SetToys(20000,1000); 00325 hc1.ForcePriorNuisanceAlt(*w->pdf("py")); 00326 hc1.ForcePriorNuisanceNull(*w->pdf("py")); 00327 // if you wanted to use the ad hoc Gaussian prior instead 00328 // hc1.ForcePriorNuisanceAlt(*w->pdf("gauss_prior")); 00329 // hc1.ForcePriorNuisanceNull(*w->pdf("gauss_prior")); 00330 // if you wanted to use the ad hoc log-normal prior instead 00331 // hc1.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior")); 00332 // hc1.ForcePriorNuisanceNull(*w->pdf("lognorm_prior")); 00333 00334 // enable proof 00335 // NOTE: This test statistic is defined in this macro, and is not 00336 // working with PROOF currently. Luckily test stat is fast to evaluate. 00337 // if(pc) toymcs1->SetProofConfig(pc); 00338 00339 // these lines save current msg level and then kill any messages below ERROR 00340 RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); 00341 // Get the result 00342 HypoTestResult *r1 = hc1.GetHypoTest(); 00343 RooMsgService::instance().setGlobalKillBelow(msglevel); // set it back 00344 cout << "-----------------------------------------"<<endl; 00345 cout << "Part 4" << endl; 00346 r1->Print(); 00347 t.Stop(); t.Print(); t.Reset(); t.Start(); 00348 00349 c->cd(2); 00350 HypoTestPlot *p1 = new HypoTestPlot(*r1,30); // 30 bins, TS is discrete 00351 p1->Draw(); 00352 00353 //////////////////////////////////////////////////////////////////////////// 00354 // P A R T 5 : U S I N G H Y B R I D C A L C U L A T O R W I T H 00355 // A N A L T E R N A T I V E T E S T S T A T I S T I C 00356 ///////////////////////////////////////////////////////////////////////////// 00357 // 00358 // A likelihood ratio test statistics should be 1-to-1 with the count x 00359 // when the value of b is fixed in the likelihood. This is implemented 00360 // by the SimpleLikelihoodRatioTestStat 00361 00362 SimpleLikelihoodRatioTestStat slrts(*b_model.GetPdf(),*sb_model.GetPdf()); 00363 slrts.SetNullParameters(*b_model.GetSnapshot()); 00364 slrts.SetAltParameters(*sb_model.GetSnapshot()); 00365 00366 // HYBRID CALCULATOR 00367 HybridCalculator hc2(*data, sb_model, b_model); 00368 ToyMCSampler *toymcs2 = (ToyMCSampler*)hc2.GetTestStatSampler(); 00369 toymcs2->SetNEventsPerToy(1); 00370 toymcs2->SetTestStatistic(&slrts); 00371 hc2.SetToys(20000,1000); 00372 hc2.ForcePriorNuisanceAlt(*w->pdf("py")); 00373 hc2.ForcePriorNuisanceNull(*w->pdf("py")); 00374 // if you wanted to use the ad hoc Gaussian prior instead 00375 // hc2.ForcePriorNuisanceAlt(*w->pdf("gauss_prior")); 00376 // hc2.ForcePriorNuisanceNull(*w->pdf("gauss_prior")); 00377 // if you wanted to use the ad hoc log-normal prior instead 00378 // hc2.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior")); 00379 // hc2.ForcePriorNuisanceNull(*w->pdf("lognorm_prior")); 00380 00381 // enable proof 00382 if(pc) toymcs2->SetProofConfig(pc); 00383 00384 // these lines save current msg level and then kill any messages below ERROR 00385 RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); 00386 // Get the result 00387 HypoTestResult *r2 = hc2.GetHypoTest(); 00388 cout << "-----------------------------------------"<<endl; 00389 cout << "Part 5" << endl; 00390 r2->Print(); 00391 t.Stop(); t.Print(); t.Reset(); t.Start(); 00392 RooMsgService::instance().setGlobalKillBelow(msglevel); 00393 00394 c->cd(3); 00395 HypoTestPlot *p2 = new HypoTestPlot(*r2,30); // 30 bins 00396 p2->Draw(); 00397 00398 //////////////////////////////////////////////////////////////////////////// 00399 // P A R T 6 : U S I N G H Y B R I D C A L C U L A T O R W I T H 00400 // A N A L T E R N A T I V E T E S T S T A T I S T I C 00401 // A N D S I M U L T A N E O U S M O D E L 00402 ///////////////////////////////////////////////////////////////////////////// 00403 // 00404 // If one wants to use a test statistic in which the nuisance parameters 00405 // are profiled (in one way or another), then the PDF must constrain b. 00406 // Otherwise any observation x can always be explained with s=0 and b=x/tau. 00407 // 00408 // In this case, one is really thinking about the problem in a 00409 // different way. They are considering x,y simultaneously. 00410 // and the PDF should be Pois(x | s+b) * Pois(y | tau b ) 00411 // and the set 'obs' should be {x,y}. 00412 00413 w->defineSet("obsXY","x,y"); 00414 00415 // create a toy dataset with the x=150, y=100 00416 w->var("x")->setVal(150.); 00417 w->var("y")->setVal(100.); 00418 RooDataSet *dataXY = new RooDataSet("dXY", "dXY", *w->set("obsXY")); 00419 dataXY->add(*w->set("obsXY")); 00420 00421 // now we need new model configs, with PDF="model" 00422 ModelConfig b_modelXY("B_modelXY", w); 00423 b_modelXY.SetPdf(*w->pdf("model")); // IMPORTANT 00424 b_modelXY.SetObservables(*w->set("obsXY")); 00425 b_modelXY.SetParametersOfInterest(*w->set("poi")); 00426 w->var("s")->setVal(0.0); // IMPORTANT 00427 b_modelXY.SetSnapshot(*w->set("poi")); 00428 00429 // create the alternate (signal+background) ModelConfig with s=50 00430 ModelConfig sb_modelXY("S+B_modelXY", w); 00431 sb_modelXY.SetPdf(*w->pdf("model")); // IMPORTANT 00432 sb_modelXY.SetObservables(*w->set("obsXY")); 00433 sb_modelXY.SetParametersOfInterest(*w->set("poi")); 00434 w->var("s")->setVal(50.0); // IMPORTANT 00435 sb_modelXY.SetSnapshot(*w->set("poi")); 00436 00437 // without this print, their can be a crash when using PROOF. Strange. 00438 // w->Print(); 00439 00440 // Test statistics like the profile likelihood ratio 00441 // (or the ratio of profiled likelihoods (Tevatron) or the MLE for s) 00442 // will now work, since the nuisance parameter b is constrained by y. 00443 // ratio of alt and null likelihoods with background yield profiled. 00444 // 00445 // NOTE: These are slower because they have to run fits for each toy 00446 00447 // Tevatron-style Ratio of profiled likelihoods 00448 // Q_Tev = -log L(s=0,\hat\hat{b})/L(s=50,\hat\hat{b}) 00449 RatioOfProfiledLikelihoodsTestStat 00450 ropl(*b_modelXY.GetPdf(), *sb_modelXY.GetPdf(), sb_modelXY.GetSnapshot()); 00451 ropl.SetSubtractMLE(false); 00452 00453 // profile likelihood where alternate is best fit value of signal yield 00454 // \lambda(0) = -log L(s=0,\hat\hat{b})/L(\hat{s},\hat{b}) 00455 ProfileLikelihoodTestStat profll(*b_modelXY.GetPdf()); 00456 00457 // just use the maximum likelihood estimate of signal yield 00458 // MLE = \hat{s} 00459 MaxLikelihoodEstimateTestStat mlets(*sb_modelXY.GetPdf(), *w->var("s")); 00460 00461 // However, it is less clear how to justify the prior used in randomizing 00462 // the nuisance parameters (since that is a property of the ensemble, 00463 // and y is a property of each toy pseudo experiment. In that case, 00464 // one probably wants to consider a different y0 which will be held 00465 // constant and the prior \pi(b) = Pois(y0 | tau b) * \eta(b). 00466 w->factory("y0[100]"); 00467 w->factory("Gamma::gamma_y0(b,sum::temp0(y0,1),1,0)"); 00468 w->factory("Gaussian::gauss_prior_y0(b,y0, expr::sqrty0('sqrt(y0)',y0))"); 00469 00470 00471 // HYBRID CALCULATOR 00472 HybridCalculator hc3(*dataXY, sb_modelXY, b_modelXY); 00473 ToyMCSampler *toymcs3 = (ToyMCSampler*)hc3.GetTestStatSampler(); 00474 toymcs3->SetNEventsPerToy(1); 00475 toymcs3->SetTestStatistic(&slrts); 00476 hc3.SetToys(30000,1000); 00477 hc3.ForcePriorNuisanceAlt(*w->pdf("gamma_y0")); 00478 hc3.ForcePriorNuisanceNull(*w->pdf("gamma_y0")); 00479 // if you wanted to use the ad hoc Gaussian prior instead 00480 // hc3.ForcePriorNuisanceAlt(*w->pdf("gauss_prior_y0")); 00481 // hc3.ForcePriorNuisanceNull(*w->pdf("gauss_prior_y0")); 00482 00483 // choose fit-based test statistic 00484 toymcs3->SetTestStatistic(&profll); 00485 //toymcs3->SetTestStatistic(&ropl); 00486 //toymcs3->SetTestStatistic(&mlets); 00487 00488 // enable proof 00489 if(pc) toymcs3->SetProofConfig(pc); 00490 00491 // these lines save current msg level and then kill any messages below ERROR 00492 RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); 00493 // Get the result 00494 HypoTestResult *r3 = hc3.GetHypoTest(); 00495 cout << "-----------------------------------------"<<endl; 00496 cout << "Part 6" << endl; 00497 r3->Print(); 00498 t.Stop(); t.Print(); t.Reset(); t.Start(); 00499 RooMsgService::instance().setGlobalKillBelow(msglevel); 00500 00501 c->cd(4); 00502 c->GetPad(4)->SetLogy(); 00503 HypoTestPlot *p3 = new HypoTestPlot(*r3,50); // 50 bins 00504 p3->Draw(); 00505 00506 c->SaveAs("zbi.pdf"); 00507 00508 00509 /////////////////////////////////////////////////////////// 00510 // OUTPUT W/O PROOF (2.66 GHz Intel Core i7) 00511 /////////////////////////////////////////////////////////// 00512 00513 /* 00514 ----------------------------------------- 00515 Part 2 00516 Hybrid p-value from direct integration = 0.00094165 00517 Z_Gamma Significance = 3.10804 00518 ----------------------------------------- 00519 Part 3 00520 Z_Bi p-value (analytic): 0.00094165 00521 Z_Bi significance (analytic): 3.10804 00522 Real time 0:00:00, CP time 0.610 00523 00524 ----------------------------------------- 00525 Part 4 00526 Results HybridCalculator_result: 00527 - Null p-value = 0.00115 +/- 0.000228984 00528 - Significance = 3.04848 sigma 00529 - Number of S+B toys: 1000 00530 - Number of B toys: 20000 00531 - Test statistic evaluated on data: 150 00532 - CL_b: 0.99885 +/- 0.000239654 00533 - CL_s+b: 0.476 +/- 0.0157932 00534 - CL_s: 0.476548 +/- 0.0158118 00535 Real time 0:00:07, CP time 7.620 00536 00537 ----------------------------------------- 00538 Part 5 00539 Results HybridCalculator_result: 00540 - Null p-value = 0.0009 +/- 0.000206057 00541 - Significance = 3.12139 sigma 00542 - Number of S+B toys: 1000 00543 - Number of B toys: 20000 00544 - Test statistic evaluated on data: 10.8198 00545 - CL_b: 0.9991 +/- 0.000212037 00546 - CL_s+b: 0.465 +/- 0.0157726 00547 - CL_s: 0.465419 +/- 0.0157871 00548 Real time 0:00:34, CP time 34.360 00549 00550 ----------------------------------------- 00551 Part 6 00552 Results HybridCalculator_result: 00553 - Null p-value = 0.000666667 +/- 0.000149021 00554 - Significance = 3.20871 sigma 00555 - Number of S+B toys: 1000 00556 - Number of B toys: 30000 00557 - Test statistic evaluated on data: 5.03388 00558 - CL_b: 0.999333 +/- 0.000149021 00559 - CL_s+b: 0.511 +/- 0.0158076 00560 - CL_s: 0.511341 +/- 0.0158183 00561 Real time 0:05:06, CP time 306.330 00562 00563 */ 00564 00565 00566 00567 /////////////////////////////////////////////////////////// 00568 // OUTPUT w/ PROOF (2.66 GHz Intel Core i7, 4 virtual cores) 00569 /////////////////////////////////////////////////////////// 00570 /* 00571 ----------------------------------------- 00572 Part 5 00573 Results HybridCalculator_result: 00574 - Null p-value = 0.00075 +/- 0.000173124 00575 - Significance = 3.17468 sigma 00576 - Number of S+B toys: 1000 00577 - Number of B toys: 20000 00578 - Test statistic evaluated on data: 10.8198 00579 - CL_b: 0.99925 +/- 0.000193577 00580 - CL_s+b: 0.454 +/- 0.0157443 00581 - CL_s: 0.454341 +/- 0.0157564 00582 Real time 0:00:16, CP time 0.990 00583 00584 ----------------------------------------- 00585 Part 6 00586 Results HybridCalculator_result: 00587 - Null p-value = 0.0007 +/- 0.000152699 00588 - Significance = 3.19465 sigma 00589 - Number of S+B toys: 1000 00590 - Number of B toys: 30000 00591 - Test statistic evaluated on data: 5.03388 00592 - CL_b: 0.9993 +/- 0.000152699 00593 - CL_s+b: 0.518 +/- 0.0158011 00594 - CL_s: 0.518363 +/- 0.0158124 00595 Real time 0:01:25, CP time 0.580 00596 00597 */ 00598 00599 ////////////////////////////////////////// 00600 // Comparison 00601 /////////////////////////////////////////// 00602 // LEPStatToolsForLHC 00603 // https://plone4.fnal.gov:4430/P0/phystat/packages/0703002 00604 // Uses Gaussian prior 00605 // CL_b = 6.218476e-04, Significance = 3.228665 sigma 00606 // 00607 ////////////////////////////////////////// 00608 // Comparison 00609 /////////////////////////////////////////// 00610 // Asymptotics 00611 // From the value of the profile likelihood ratio (5.0338) 00612 // The significance can be estimated using Wilks's theorem 00613 // significance = sqrt(2*profileLR) = 3.1729 sigma 00614 00615 00616 }