ROOT_528-00b_version: tutorials/roostats/HybridInstructional.C Source File

00001 //+  example demostrating usage of HybridCalcultor 
00002 /*
00003 HybridInstructional
00004 
00005 Authors: Kyle Cranmer, Wouter Verkerke, and Sven Kreiss
00006 date  May 2010 Part 1-3 
00007 date  Dec 2010 Part 4-6
00008 
00009 A hypothesis testing example based on number counting 
00010 with background uncertainty.
00011 
00012 NOTE: This example must be run with the ACLIC (the + option ) due to the 
00013 new class that is defined.
00014 
00015 This example:
00016  - demonstrates the usage of the HybridCalcultor (Part 4-6)
00017  - demonstrates the numerical integration of RooFit (Part 2)
00018  - validates the RooStats against an example with a known analytic answer
00019  - demonstrates usage of different test statistics
00020  - explains subtle choices in the prior used for hybrid methods
00021  - demonstrates usage of different priors for the nuisance parameters
00022  - demonstrates usage of PROOF
00023 
00024 The basic setup here is that a main measurement has observed x events with an 
00025 expectation of s+b.  One can choose an ad hoc prior for the uncertainty on b,
00026 or try to base it on an auxiliary measurement.  In this case, the auxiliary
00027 measurement (aka control measurement, sideband) is another counting experiment
00028 with measurement y and expectation tau*b.  With an 'original prior' on b, 
00029 called \eta(b) then one can obtain a posterior from the auxiliary measurement
00030 \pi(b) = \eta(b) * Pois(y|tau*b).  This is a principled choice for a prior
00031 on b in the main measurement of x, which can then be treated in a hybrid 
00032 Bayesian/Frequentist way.  Additionally, one can try to treat the two 
00033 measurements simultaneously, which is detailed in Part 6 of the tutorial.
00034 
00035 This tutorial is related to the FourBin.C tutorial in the modeling, but
00036 focuses on hypothesis testing instead of interval estimation.
00037 
00038 More background on this 'prototype problem' can be found in the 
00039 following papers:
00040 
00041 Evaluation of three methods for calculating statistical significance 
00042 when incorporating a systematic uncertainty into a test of the 
00043 background-only hypothesis for a Poisson process
00044 Authors: Robert D. Cousins, James T. Linnemann, Jordan Tucker
00045 http://arxiv.org/abs/physics/0702156
00046 NIM  A 595 (2008) 480--501
00047 
00048 Statistical Challenges for Searches for New Physics at the LHC
00049 Authors: Kyle Cranmer
00050 http://arxiv.org/abs/physics/0511028
00051 
00052  Measures of Significance in HEP and Astrophysics
00053  Authors: J. T. Linnemann
00054  http://arxiv.org/abs/physics/0312059
00055 */
00056 
00057 #include "RooGlobalFunc.h"
00058 #include "RooRealVar.h"
00059 #include "RooProdPdf.h"
00060 #include "RooWorkspace.h"
00061 #include "RooDataSet.h"
00062 #include "TCanvas.h"
00063 #include "TStopwatch.h"
00064 #include "TH1.h"
00065 #include "RooPlot.h"
00066 #include "RooMsgService.h"
00067 
00068 #include "RooStats/NumberCountingUtils.h"
00069 
00070 #include "RooStats/HybridCalculator.h"
00071 #include "RooStats/ToyMCSampler.h"
00072 #include "RooStats/HypoTestPlot.h"
00073 
00074 #include "RooStats/ProfileLikelihoodTestStat.h"
00075 #include "RooStats/SimpleLikelihoodRatioTestStat.h"
00076 #include "RooStats/RatioOfProfiledLikelihoodsTestStat.h"
00077 #include "RooStats/MaxLikelihoodEstimateTestStat.h"
00078 
00079 using namespace RooFit;
00080 using namespace RooStats;
00081 
00082 //////////////////////////////////////////////////
00083 // A New Test Statistic Class for this example.
00084 // It simply returns the sum of the values in a particular
00085 // column of a dataset. 
00086 // You can ignore this class and focus on the macro below
00087 //////////////////////////////////////////////////
00088 class BinCountTestStat : public TestStatistic {
00089 public:
00090   BinCountTestStat(void) : fColumnName("tmp") {}
00091   BinCountTestStat(string columnName) : fColumnName(columnName) {}
00092    
00093   virtual Double_t Evaluate(RooAbsData& data, RooArgSet& /*nullPOI*/) {
00094     // This is the main method in the interface
00095     Double_t value = 0.0;
00096     for(int i=0; i < data.numEntries(); i++) {
00097       value += data.get(i)->getRealValue(fColumnName.c_str());
00098     }
00099     return value;
00100       }
00101   virtual const TString GetVarName() const { return fColumnName; }
00102   
00103 private:
00104   string fColumnName;
00105 
00106 protected:
00107   ClassDef(BinCountTestStat,1)
00108 };
00109 
00110 ClassImp(BinCountTestStat)
00111 
00112 //////////////////////////////////////////////////
00113 // The Actual Tutorial Macro
00114 //////////////////////////////////////////////////
00115 
00116 void HybridInstructional() {
00117   
00118   // This tutorial has 6 parts
00119   // Table of Contents
00120   // Setup
00121   //   1. Make the model for the 'prototype problem'
00122   // Special cases
00123   //   2. Use RooFit's direct integration to get p-value & significance
00124   //   3. Use RooStats analytic solution for this problem 
00125   // RooStats HybridCalculator -- can be generalized
00126   //   4. RooStats ToyMC version of 2. & 3. 
00127   //   5. RooStats ToyMC with an equivalent test statistic
00128   //   6. RooStats ToyMC with simultaneous control & main measurement
00129 
00130   // It takes ~4 min without PROOF and ~2 min with PROOF on 4 cores.
00131   // Of course, everything looks nicer with more toys, which takes longer.
00132 
00133 #ifdef __CINT__
00134   cout << "DO NOT RUN WITH CINT: we are using a custom test statistic ";
00135   cout << "which requires that this tutorial must be compiled ";
00136   cout << "with ACLIC" << endl;
00137   return;
00138 #endif
00139 
00140 
00141   TStopwatch t;
00142   t.Start();
00143   TCanvas *c = new TCanvas;
00144   c->Divide(2,2);
00145 
00146   ///////////////////////////////////////////////////////
00147   // P A R T   1  :  D I R E C T   I N T E G R A T I O N 
00148   //////////////////////////////////////////////////////
00149   // Make model for prototype on/off problem
00150   // Pois(x | s+b) * Pois(y | tau b )
00151   // for Z_Gamma, use uniform prior on b.
00152   RooWorkspace* w = new RooWorkspace("w");
00153   w->factory("Poisson::px(x[150,0,500],sum::splusb(s[0,0,100],b[100,0,300]))");
00154   w->factory("Poisson::py(y[100,0,500],prod::taub(tau[1.],b))"); 
00155   w->factory("PROD::model(px,py)");
00156   w->factory("Uniform::prior_b(b)");
00157 
00158   // We will control the output level in a few places to avoid
00159   // verbose progress messages.  We start by keeping track
00160   // of the current threshold on messages.
00161   RooFit::MsgLevel msglevel = RooMsgService::instance().globalKillBelow();
00162 
00163   // Use PROOF-lite on multi-core machines
00164   ProofConfig* pc = NULL;
00165   // uncomment below if you want to use PROOF
00166   // pc = new ProofConfig(*w, 4, "workers=4", kFALSE); // machine with 4 cores
00167   // pc = new ProofConfig(*w, 2, "workers=2", kFALSE); // machine with 2 cores
00168 
00169   ///////////////////////////////////////////////////////
00170   // P A R T   2  :  D I R E C T   I N T E G R A T I O N 
00171   //////////////////////////////////////////////////////
00172   // This is not the 'RooStats' way, but in this case the distribution 
00173   // of the test statistic is simply x and can be calculated directly 
00174   // from the PDF using RooFit's built-in integration. 
00175   // Note, this does not generalize to situations in which the test statistic
00176   // depends on many events (rows in a dataset).
00177 
00178   // construct the Bayesian-averaged model (eg. a projection pdf)
00179   // p'(x|s) = \int db p(x|s+b) * [ p(y|b) * prior(b) ]
00180   w->factory("PROJ::averagedModel(PROD::foo(px|b,py,prior_b),b)") ;
00181 
00182   RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); // lower message level
00183   // plot it, red is averaged model, green is b known exactly, blue is s+b av model
00184   RooPlot* frame = w->var("x")->frame(Range(50,230)) ;
00185   w->pdf("averagedModel")->plotOn(frame,LineColor(kRed)) ;
00186   w->pdf("px")->plotOn(frame,LineColor(kGreen)) ;
00187   w->var("s")->setVal(50.);
00188   w->pdf("averagedModel")->plotOn(frame,LineColor(kBlue)) ;
00189   c->cd(1);
00190   frame->Draw() ;
00191   w->var("s")->setVal(0.);
00192 
00193   // compare analytic calculation of Z_Bi
00194   // with the numerical RooFit implementation of Z_Gamma
00195   // for an example with x = 150, y = 100
00196    
00197   // numeric RooFit Z_Gamma
00198   w->var("y")->setVal(100);
00199   w->var("x")->setVal(150);
00200   RooAbsReal* cdf = w->pdf("averagedModel")->createCdf(*w->var("x"));
00201   cdf->getVal(); // get ugly print messages out of the way
00202   cout << "-----------------------------------------"<<endl;
00203   cout << "Part 2" << endl;
00204   cout << "Hybrid p-value from direct integration = " << 1-cdf->getVal() << endl;
00205   cout << "Z_Gamma Significance  = " << 
00206     PValueToSignificance(1-cdf->getVal()) << endl;
00207   RooMsgService::instance().setGlobalKillBelow(msglevel); // set it back
00208 
00209   /////////////////////////////////////////////////
00210   // P A R T   3  :  A N A L Y T I C   R E S U L T
00211   /////////////////////////////////////////////////
00212   // In this special case, the integrals are known analytically 
00213   // and they are implemented in RooStats::NumberCountingUtils
00214 
00215   // analytic Z_Bi
00216   double p_Bi = NumberCountingUtils::BinomialWithTauObsP(150, 100, 1);
00217   double Z_Bi = NumberCountingUtils::BinomialWithTauObsZ(150, 100, 1);
00218   cout << "-----------------------------------------"<<endl;
00219   cout << "Part 3" << endl;
00220   std::cout << "Z_Bi p-value (analytic): " << p_Bi << std::endl;
00221   std::cout << "Z_Bi significance (analytic): " << Z_Bi << std::endl;
00222   t.Stop();  t.Print(); t.Reset(); t.Start();
00223 
00224   ////////////////////////////////////////////////////////////////
00225   // P A R T   4  :  U S I N G   H Y B R I D   C A L C U L A T O R
00226   ////////////////////////////////////////////////////////////////
00227   // Now we demonstrate the RooStats HybridCalculator.
00228   //
00229   // Like all RooStats calculators it needs the data and a ModelConfig
00230   // for the relevant hypotheses.  Since we are doing hypothesis testing
00231   // we need a ModelConfig for the null (background only) and the alternate 
00232   // (signal+background) hypotheses.  We also need to specify the PDF, 
00233   // the parameters of interest, and the observables.  Furthermore, since
00234   // the parameter of interest is floating, we need to specify which values
00235   // of the parameter corresponds to the null and alternate (eg. s=0 and s=50)
00236   //
00237   // define some sets of variables obs={x} and poi={s}
00238   // note here, x is the only observable in the main measurement
00239   // and y is treated as a separate measurement, which is used 
00240   // to produce the prior that will be used in this calculation
00241   // to randomize the nuisance parameters.  
00242   w->defineSet("obs","x");
00243   w->defineSet("poi","s");
00244 
00245   // create a toy dataset with the x=150
00246   RooDataSet *data = new RooDataSet("d", "d", *w->set("obs"));
00247   data->add(*w->set("obs"));
00248 
00249   //////////////////////////////////////////////////////////
00250   // Part 3a : Setup ModelConfigs
00251   // create the null (background-only) ModelConfig with s=0
00252   ModelConfig b_model("B_model", w);
00253   b_model.SetPdf(*w->pdf("px"));
00254   b_model.SetObservables(*w->set("obs"));
00255   b_model.SetParametersOfInterest(*w->set("poi"));
00256   w->var("s")->setVal(0.0);  // important!
00257   b_model.SetSnapshot(*w->set("poi"));
00258 
00259   // create the alternate (signal+background) ModelConfig with s=50
00260   ModelConfig sb_model("S+B_model", w);
00261   sb_model.SetPdf(*w->pdf("px"));
00262   sb_model.SetObservables(*w->set("obs"));
00263   sb_model.SetParametersOfInterest(*w->set("poi"));
00264   w->var("s")->setVal(50.0); // important!
00265   sb_model.SetSnapshot(*w->set("poi"));
00266 
00267 
00268   //////////////////////////////////////////////////////////
00269   // Part 3b : Choose Test Statistic
00270   // To make an equivalent calculation we need to use x as the test 
00271   // statistic.  This is not a built-in test statistic in RooStats
00272   // so we define it above.  The new class inherits from the 
00273   // RooStats::TestStatistic interface, and simply returns the value
00274   // of x in the dataset.
00275 
00276   BinCountTestStat binCount("x");
00277 
00278   //////////////////////////////////////////////////////////
00279   // Part 3c : Define Prior used to randomize nuisance parameters
00280   //
00281   // The prior used for the hybrid calculator is the posterior
00282   // from the auxiliary measurement y.  The model for the aux.
00283   // measurement is Pois(y|tau*b), thus the likleihood function
00284   // is proportional to (has the form of) a Gamma distribution.
00285   // if the 'original prior' \eta(b) is uniform, then from
00286   // Bayes's theorem we have the posterior:
00287   //  \pi(b) = Pois(y|tau*b) * \eta(b)
00288   // If \eta(b) is flat, then we arrive at a Gamma distribution.
00289   // Since RooFit will normalize the PDF we can actually supply
00290   // py=Pois(y,tau*b) that will be equivalent to multiplying by a uniform.
00291   // 
00292   // Alternatively, we could explicitly use a gamma distribution:
00293   // w->factory("Gamma::gamma(b,sum::temp(y,1),1,0)");
00294   // 
00295   // or we can use some other ad hoc prior that do not naturally 
00296   // follow from the known form of the auxiliary measurement.
00297   // The common choice is the equivlaent Gaussian:
00298   w->factory("Gaussian::gauss_prior(b,y, expr::sqrty('sqrt(y)',y))");
00299   // this corresponds to the "Z_N" calculation.
00300   //
00301   // or one could use the analogous log-normal prior
00302   w->factory("Lognormal::lognorm_prior(b,y, expr::kappa('1+1./sqrt(y)',y))");
00303   //
00304   // Ideally, the HybridCalculator would be able to inspect the full
00305   // model Pois(x | s+b) * Pois(y | tau b ) and be given the original
00306   // prior \eta(b) to form \pi(b) = Pois(y|tau*b) * \eta(b).
00307   // This is not yet implemented because in the general case
00308   // it is not easy to identify the terms in the PDF that correspond
00309   // to the auxiliary measurement.  So for now, it must be set 
00310   // explicitly with:
00311   //  - ForcePriorNuisanceNull()
00312   //  - ForcePriorNuisanceAlt()
00313   // the name "ForcePriorNuisance" was chosen because we anticipate
00314   // this to be auto-detected, but will leave the option open
00315   // to force to a different prior for the nuisance parameters.
00316 
00317   //////////////////////////////////////////////////////////
00318   // Part 3d : Construct and configure the HybridCalculator
00319 
00320   HybridCalculator hc1(*data, sb_model, b_model);
00321   ToyMCSampler *toymcs1 = (ToyMCSampler*)hc1.GetTestStatSampler();
00322   toymcs1->SetNEventsPerToy(1); // because the model is in number counting form
00323   toymcs1->SetTestStatistic(&binCount); // set the test statistic
00324   hc1.SetToys(20000,1000); 
00325   hc1.ForcePriorNuisanceAlt(*w->pdf("py"));
00326   hc1.ForcePriorNuisanceNull(*w->pdf("py"));
00327   // if you wanted to use the ad hoc Gaussian prior instead
00328   //  hc1.ForcePriorNuisanceAlt(*w->pdf("gauss_prior"));
00329   //  hc1.ForcePriorNuisanceNull(*w->pdf("gauss_prior"));
00330   // if you wanted to use the ad hoc log-normal prior instead
00331   //  hc1.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior"));
00332   //  hc1.ForcePriorNuisanceNull(*w->pdf("lognorm_prior"));
00333 
00334   // enable proof
00335   // NOTE: This test statistic is defined in this macro, and is not 
00336   // working with PROOF currently.  Luckily test stat is fast to evaluate.
00337   //  if(pc) toymcs1->SetProofConfig(pc);     
00338 
00339   // these lines save current msg level and then kill any messages below ERROR
00340   RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR);
00341   // Get the result
00342   HypoTestResult *r1 = hc1.GetHypoTest();
00343   RooMsgService::instance().setGlobalKillBelow(msglevel); // set it back
00344   cout << "-----------------------------------------"<<endl;
00345   cout << "Part 4" << endl;
00346   r1->Print();
00347   t.Stop();  t.Print(); t.Reset(); t.Start();
00348 
00349   c->cd(2);
00350   HypoTestPlot *p1 = new HypoTestPlot(*r1,30); // 30 bins, TS is discrete
00351   p1->Draw();
00352 
00353   ////////////////////////////////////////////////////////////////////////////
00354   // P A R T   5  :  U S I N G   H Y B R I D   C A L C U L A T O R   W I T H 
00355   //                 A N   A L T E R N A T I V E   T E S T   S T A T I S T I C
00356   /////////////////////////////////////////////////////////////////////////////
00357   // 
00358   // A likelihood ratio test statistics should be 1-to-1 with the count x
00359   // when the value of b is fixed in the likelihood.  This is implemented
00360   // by the SimpleLikelihoodRatioTestStat
00361 
00362   SimpleLikelihoodRatioTestStat slrts(*b_model.GetPdf(),*sb_model.GetPdf());
00363   slrts.SetNullParameters(*b_model.GetSnapshot());
00364   slrts.SetAltParameters(*sb_model.GetSnapshot());
00365 
00366   // HYBRID CALCULATOR
00367   HybridCalculator hc2(*data, sb_model, b_model);
00368   ToyMCSampler *toymcs2 = (ToyMCSampler*)hc2.GetTestStatSampler();
00369   toymcs2->SetNEventsPerToy(1);
00370   toymcs2->SetTestStatistic(&slrts);
00371   hc2.SetToys(20000,1000); 
00372   hc2.ForcePriorNuisanceAlt(*w->pdf("py"));
00373   hc2.ForcePriorNuisanceNull(*w->pdf("py"));
00374   // if you wanted to use the ad hoc Gaussian prior instead
00375   //  hc2.ForcePriorNuisanceAlt(*w->pdf("gauss_prior"));
00376   //  hc2.ForcePriorNuisanceNull(*w->pdf("gauss_prior"));
00377   // if you wanted to use the ad hoc log-normal prior instead
00378   //  hc2.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior"));
00379   //  hc2.ForcePriorNuisanceNull(*w->pdf("lognorm_prior"));
00380 
00381   // enable proof
00382   if(pc) toymcs2->SetProofConfig(pc);     
00383 
00384   // these lines save current msg level and then kill any messages below ERROR
00385   RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR);
00386   // Get the result
00387   HypoTestResult *r2 = hc2.GetHypoTest();
00388   cout << "-----------------------------------------"<<endl;
00389   cout << "Part 5" << endl;
00390   r2->Print();
00391   t.Stop();  t.Print(); t.Reset(); t.Start();
00392   RooMsgService::instance().setGlobalKillBelow(msglevel);
00393 
00394   c->cd(3);
00395   HypoTestPlot *p2 = new HypoTestPlot(*r2,30); // 30 bins
00396   p2->Draw();
00397 
00398   ////////////////////////////////////////////////////////////////////////////
00399   // P A R T   6  :  U S I N G   H Y B R I D   C A L C U L A T O R   W I T H 
00400   //                 A N   A L T E R N A T I V E   T E S T   S T A T I S T I C
00401   //                 A N D   S I M U L T A N E O U S   M O D E L
00402   /////////////////////////////////////////////////////////////////////////////
00403   // 
00404   // If one wants to use a test statistic in which the nuisance parameters
00405   // are profiled (in one way or another), then the PDF must constrain b.
00406   // Otherwise any observation x can always be explained with s=0 and b=x/tau.
00407   //
00408   // In this case, one is really thinking about the problem in a 
00409   // different way.  They are considering x,y simultaneously.
00410   // and the PDF should be Pois(x | s+b) * Pois(y | tau b )
00411   // and the set 'obs' should be {x,y}.
00412  
00413   w->defineSet("obsXY","x,y");
00414   
00415   // create a toy dataset with the x=150, y=100
00416   w->var("x")->setVal(150.);
00417   w->var("y")->setVal(100.);
00418   RooDataSet *dataXY = new RooDataSet("dXY", "dXY", *w->set("obsXY"));
00419   dataXY->add(*w->set("obsXY"));
00420 
00421   // now we need new model configs, with PDF="model"
00422   ModelConfig b_modelXY("B_modelXY", w);
00423   b_modelXY.SetPdf(*w->pdf("model")); // IMPORTANT
00424   b_modelXY.SetObservables(*w->set("obsXY"));
00425   b_modelXY.SetParametersOfInterest(*w->set("poi"));
00426   w->var("s")->setVal(0.0);  // IMPORTANT
00427   b_modelXY.SetSnapshot(*w->set("poi"));
00428 
00429   // create the alternate (signal+background) ModelConfig with s=50
00430   ModelConfig sb_modelXY("S+B_modelXY", w);
00431   sb_modelXY.SetPdf(*w->pdf("model"));  // IMPORTANT
00432   sb_modelXY.SetObservables(*w->set("obsXY"));
00433   sb_modelXY.SetParametersOfInterest(*w->set("poi"));
00434   w->var("s")->setVal(50.0); // IMPORTANT
00435   sb_modelXY.SetSnapshot(*w->set("poi"));
00436 
00437   // without this print, their can be a crash when using PROOF.  Strange.
00438   //  w->Print();
00439 
00440   // Test statistics like the profile likelihood ratio  
00441   // (or the ratio of profiled likelihoods (Tevatron) or the MLE for s)
00442   // will now work, since the nuisance parameter b is constrained by y.
00443   // ratio of alt and null likelihoods with background yield profiled.
00444   //
00445   // NOTE: These are slower because they have to run fits for each toy
00446 
00447   // Tevatron-style Ratio of profiled likelihoods 
00448   // Q_Tev = -log L(s=0,\hat\hat{b})/L(s=50,\hat\hat{b})
00449   RatioOfProfiledLikelihoodsTestStat 
00450     ropl(*b_modelXY.GetPdf(), *sb_modelXY.GetPdf(), sb_modelXY.GetSnapshot());
00451   ropl.SetSubtractMLE(false);
00452 
00453   // profile likelihood where alternate is best fit value of signal yield
00454   // \lambda(0) = -log L(s=0,\hat\hat{b})/L(\hat{s},\hat{b})
00455   ProfileLikelihoodTestStat profll(*b_modelXY.GetPdf());
00456 
00457   // just use the maximum likelihood estimate of signal yield
00458   // MLE = \hat{s}
00459   MaxLikelihoodEstimateTestStat mlets(*sb_modelXY.GetPdf(), *w->var("s"));
00460 
00461   // However, it is less clear how to justify the prior used in randomizing
00462   // the nuisance parameters (since that is a property of the ensemble,
00463   // and y is a property of each toy pseudo experiment.  In that case,
00464   // one probably wants to consider a different y0 which will be held
00465   // constant and the prior \pi(b) = Pois(y0 | tau b) * \eta(b).
00466   w->factory("y0[100]");
00467   w->factory("Gamma::gamma_y0(b,sum::temp0(y0,1),1,0)");
00468   w->factory("Gaussian::gauss_prior_y0(b,y0, expr::sqrty0('sqrt(y0)',y0))");
00469   
00470 
00471   // HYBRID CALCULATOR
00472   HybridCalculator hc3(*dataXY, sb_modelXY, b_modelXY);
00473   ToyMCSampler *toymcs3 = (ToyMCSampler*)hc3.GetTestStatSampler();
00474   toymcs3->SetNEventsPerToy(1);
00475   toymcs3->SetTestStatistic(&slrts);
00476   hc3.SetToys(30000,1000); 
00477   hc3.ForcePriorNuisanceAlt(*w->pdf("gamma_y0"));
00478   hc3.ForcePriorNuisanceNull(*w->pdf("gamma_y0"));
00479   // if you wanted to use the ad hoc Gaussian prior instead
00480   //  hc3.ForcePriorNuisanceAlt(*w->pdf("gauss_prior_y0"));
00481   //  hc3.ForcePriorNuisanceNull(*w->pdf("gauss_prior_y0"));
00482 
00483   // choose fit-based test statistic
00484   toymcs3->SetTestStatistic(&profll);
00485   //toymcs3->SetTestStatistic(&ropl);
00486   //toymcs3->SetTestStatistic(&mlets);
00487 
00488   // enable proof
00489   if(pc) toymcs3->SetProofConfig(pc);     
00490 
00491   // these lines save current msg level and then kill any messages below ERROR
00492   RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR);
00493   // Get the result
00494   HypoTestResult *r3 = hc3.GetHypoTest();
00495   cout << "-----------------------------------------"<<endl;
00496   cout << "Part 6" << endl;
00497   r3->Print();
00498   t.Stop();  t.Print(); t.Reset(); t.Start();
00499   RooMsgService::instance().setGlobalKillBelow(msglevel);
00500 
00501   c->cd(4);
00502   c->GetPad(4)->SetLogy();
00503   HypoTestPlot *p3 = new HypoTestPlot(*r3,50); // 50 bins
00504   p3->Draw();
00505 
00506   c->SaveAs("zbi.pdf");
00507 
00508 
00509   ///////////////////////////////////////////////////////////
00510   // OUTPUT W/O PROOF (2.66 GHz Intel Core i7)
00511   ///////////////////////////////////////////////////////////
00512 
00513   /*
00514 -----------------------------------------
00515 Part 2
00516 Hybrid p-value from direct integration = 0.00094165
00517 Z_Gamma Significance  = 3.10804
00518 -----------------------------------------
00519 Part 3
00520 Z_Bi p-value (analytic): 0.00094165
00521 Z_Bi significance (analytic): 3.10804
00522 Real time 0:00:00, CP time 0.610
00523 
00524 -----------------------------------------
00525 Part 4
00526 Results HybridCalculator_result: 
00527  - Null p-value = 0.00115 +/- 0.000228984
00528  - Significance = 3.04848 sigma
00529  - Number of S+B toys: 1000
00530  - Number of B toys: 20000
00531  - Test statistic evaluated on data: 150
00532  - CL_b: 0.99885 +/- 0.000239654
00533  - CL_s+b: 0.476 +/- 0.0157932
00534  - CL_s: 0.476548 +/- 0.0158118
00535 Real time 0:00:07, CP time 7.620
00536 
00537 -----------------------------------------
00538 Part 5
00539 Results HybridCalculator_result: 
00540  - Null p-value = 0.0009 +/- 0.000206057
00541  - Significance = 3.12139 sigma
00542  - Number of S+B toys: 1000
00543  - Number of B toys: 20000
00544  - Test statistic evaluated on data: 10.8198
00545  - CL_b: 0.9991 +/- 0.000212037
00546  - CL_s+b: 0.465 +/- 0.0157726
00547  - CL_s: 0.465419 +/- 0.0157871
00548 Real time 0:00:34, CP time 34.360
00549 
00550 -----------------------------------------
00551 Part 6
00552 Results HybridCalculator_result: 
00553  - Null p-value = 0.000666667 +/- 0.000149021
00554  - Significance = 3.20871 sigma
00555  - Number of S+B toys: 1000
00556  - Number of B toys: 30000
00557  - Test statistic evaluated on data: 5.03388
00558  - CL_b: 0.999333 +/- 0.000149021
00559  - CL_s+b: 0.511 +/- 0.0158076
00560  - CL_s: 0.511341 +/- 0.0158183
00561 Real time 0:05:06, CP time 306.330
00562 
00563   */
00564 
00565 
00566 
00567   ///////////////////////////////////////////////////////////
00568   // OUTPUT w/ PROOF (2.66 GHz Intel Core i7, 4 virtual cores)
00569   ///////////////////////////////////////////////////////////
00570   /*
00571 -----------------------------------------
00572 Part 5
00573 Results HybridCalculator_result: 
00574  - Null p-value = 0.00075 +/- 0.000173124
00575  - Significance = 3.17468 sigma
00576  - Number of S+B toys: 1000
00577  - Number of B toys: 20000
00578  - Test statistic evaluated on data: 10.8198
00579  - CL_b: 0.99925 +/- 0.000193577
00580  - CL_s+b: 0.454 +/- 0.0157443
00581  - CL_s: 0.454341 +/- 0.0157564
00582 Real time 0:00:16, CP time 0.990
00583 
00584 -----------------------------------------
00585 Part 6
00586 Results HybridCalculator_result: 
00587  - Null p-value = 0.0007 +/- 0.000152699
00588  - Significance = 3.19465 sigma
00589  - Number of S+B toys: 1000
00590  - Number of B toys: 30000
00591  - Test statistic evaluated on data: 5.03388
00592  - CL_b: 0.9993 +/- 0.000152699
00593  - CL_s+b: 0.518 +/- 0.0158011
00594  - CL_s: 0.518363 +/- 0.0158124
00595 Real time 0:01:25, CP time 0.580
00596 
00597    */
00598 
00599   //////////////////////////////////////////
00600   // Comparison
00601   ///////////////////////////////////////////
00602   // LEPStatToolsForLHC
00603   // https://plone4.fnal.gov:4430/P0/phystat/packages/0703002
00604   // Uses Gaussian prior
00605   // CL_b = 6.218476e-04, Significance = 3.228665 sigma
00606   //
00607   //////////////////////////////////////////
00608   // Comparison
00609   ///////////////////////////////////////////
00610   // Asymptotics
00611   // From the value of the profile likelihood ratio (5.0338) 
00612   // The significance can be estimated using Wilks's theorem
00613   // significance = sqrt(2*profileLR) = 3.1729 sigma
00614 
00615 
00616 }