NeymanConstruction.cxx

Go to the documentation of this file.
00001 // @(#)root/roostats:$Id: NeymanConstruction.cxx 38078 2011-02-15 18:28:29Z cranmer $
00002 // Author: Kyle Cranmer   January 2009
00003 
00004 /*************************************************************************
00005  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 //_________________________________________________
00013 /*
00014 BEGIN_HTML
00015 <p>
00016 NeymanConstruction is a concrete implementation of the NeymanConstruction interface that, as the name suggests,
00017 performs a NeymanConstruction.  
00018 It produces a RooStats::PointSetInterval, which is a concrete implementation of the ConfInterval interface.  
00019 </p>
00020 <p>
00021 The Neyman Construction is not a uniquely defined statistical technique, it requires that one specify an ordering rule 
00022 or ordering principle, which is usually incoded by choosing a specific test statistic and limits of integration 
00023 (corresponding to upper/lower/central limits).  As a result, this class must be configured with the corresponding
00024 information before it can produce an interval.  Common configurations, such as the Feldman-Cousins approach, can be 
00025 enforced by other light weight classes.
00026 </p>
00027 <p>
00028 The Neyman Construction considers every point in the parameter space independently, no assumptions are 
00029 made that the interval is connected or of a particular shape.  As a result, the PointSetInterval class is used to 
00030 represent the result.  The user indicate which points in the parameter space to perform the constrution by providing
00031 a PointSetInterval instance with the desired points.
00032 </p>
00033 <p>
00034 This class is fairly light weight, because the choice of parameter points to be considered is factorized and so is the 
00035 creation of the sampling distribution of the test statistic (which is done by a concrete class implementing the DistributionCreator interface).  As a result, this class basically just drives the construction by:
00036 <ul>
00037 <li> using a DistributionCreator to create the SamplingDistribution of a user-defined test statistic for each parameter point of interest,</li>
00038 <li>defining the acceptance region in the data by finding the thresholds on the test statistic such that the integral of the sampling distribution is of the appropriate size and consistent with the limits of integration (eg. upper/lower/central limits), </li>
00039 <li> and finally updating the PointSetInterval based on whether the value of the test statistic evaluated on the data are in the acceptance region.</li>
00040 </p>
00041 END_HTML
00042 */
00043 //
00044 
00045 #ifndef RooStats_NeymanConstruction
00046 #include "RooStats/NeymanConstruction.h"
00047 #endif
00048 
00049 #ifndef RooStats_RooStatsUtils
00050 #include "RooStats/RooStatsUtils.h"
00051 #endif
00052 
00053 #ifndef RooStats_PointSetInterval
00054 #include "RooStats/PointSetInterval.h"
00055 #endif
00056 
00057 #include "RooStats/SamplingDistribution.h"
00058 #include "RooStats/ToyMCSampler.h"
00059 #include "RooStats/ModelConfig.h"
00060 
00061 #include "RooMsgService.h"
00062 #include "RooGlobalFunc.h"
00063 
00064 #include "RooDataSet.h"
00065 #include "TFile.h"
00066 #include "TTree.h"
00067 #include "TMath.h"
00068 #include "TH1F.h"
00069 
00070 
00071 
00072 ClassImp(RooStats::NeymanConstruction) ;
00073 
00074 using namespace RooFit;
00075 using namespace RooStats;
00076 
00077 
00078 //_______________________________________________________
00079 NeymanConstruction::NeymanConstruction(RooAbsData& data, ModelConfig& model):
00080    fSize(0.05),
00081    fData(data),
00082    fModel(model),
00083    fTestStatSampler(0), 
00084    fPointsToTest(0),
00085    fLeftSideFraction(0), 
00086    fConfBelt(0),  // constructed with tree data 
00087    fAdaptiveSampling(false),
00088    fAdditionalNToysFactor(1.),
00089    fSaveBeltToFile(false),
00090    fCreateBelt(false)
00091 
00092 {
00093    // default constructor
00094 //   fWS = new RooWorkspace();
00095 //   fOwnsWorkspace = true;
00096 //   fDataName = "";
00097 //   fPdfName = "";
00098 }
00099 
00100 //_______________________________________________________
00101 NeymanConstruction::~NeymanConstruction() {
00102    // default constructor
00103   //  if(fOwnsWorkspace && fWS) delete fWS;
00104   //  if(fConfBelt) delete fConfBelt;
00105 }
00106 
00107 //_______________________________________________________
00108 PointSetInterval* NeymanConstruction::GetInterval() const {
00109   // Main interface to get a RooStats::ConfInterval.  
00110   // It constructs a RooStats::SetInterval.
00111 
00112 
00113   TFile* f=0;
00114   if(fSaveBeltToFile){
00115     oocoutI(f,Contents) << "NeymanConstruction saving ConfidenceBelt to file SamplingDistributions.root" << endl;
00116     f = new TFile("SamplingDistributions.root","recreate");
00117   }
00118   
00119   if(!&fData) {
00120     oocoutF((TObject*)0,Contents) << "Neyman Construction: data is not set, can't get interval" << endl;
00121     return 0;
00122   }
00123   Int_t npass = 0;
00124   RooArgSet* point; 
00125   
00126   // strange problems when using snapshots.  
00127   //  RooArgSet* fPOI = (RooArgSet*) fModel.GetParametersOfInterest()->snapshot();
00128   RooArgSet* fPOI = new RooArgSet(*fModel.GetParametersOfInterest());
00129 
00130   RooDataSet* pointsInInterval = new RooDataSet("pointsInInterval", 
00131                                                  "points in interval", 
00132                                                 *(fPointsToTest->get(0)) );
00133 
00134   // loop over points to test
00135   for(Int_t i=0; i<fPointsToTest->numEntries(); ++i){
00136      // get a parameter point from the list of points to test.
00137     point = (RooArgSet*) fPointsToTest->get(i);//->clone("temp");
00138     
00139     // set parameters of interest to current point
00140     *fPOI = *point;
00141 
00142     // set test stat sampler to use this point
00143     fTestStatSampler->SetParametersForTestStat(*fPOI);
00144 
00145      // get the value of the test statistic for this data set
00146     Double_t thisTestStatistic = fTestStatSampler->EvaluateTestStatistic(fData, *fPOI );
00147     /*
00148     cout << "NC CHECK: " << i << endl;
00149     point->Print();
00150     fPOI->Print("v");
00151     fData.Print();
00152     cout <<"thisTestStatistic = " << thisTestStatistic << endl;
00153     */
00154 
00155     // find the lower & upper thresholds on the test statistic that 
00156     // define the acceptance region in the data
00157 
00158     SamplingDistribution* samplingDist=0;
00159     Double_t sigma;
00160     Double_t upperEdgeOfAcceptance, upperEdgeMinusSigma, upperEdgePlusSigma;
00161     Double_t lowerEdgeOfAcceptance, lowerEdgeMinusSigma, lowerEdgePlusSigma;
00162     Int_t additionalMC=0;
00163 
00164     // the adaptive sampling algorithm wants at least one toy event to be outside
00165     // of the requested pvalue including the sampling variaton.  That leads to an equation
00166     // N-1 = (1-alpha)N + Z sqrt(N - (1-alpha)N) // for upper limit and
00167     // 1   = alpha N - Z sqrt(alpha N)  // for lower limit 
00168     // 
00169     // solving for N gives:
00170     // N = 1/alpha * [3/2 + sqrt(5)] for Z = 1 (which is used currently)
00171     // thus, a good guess for the first iteration of events is N=3.73/alpha~4/alpha
00172     // should replace alpha here by smaller tail probability: eg. alpha*Min(leftsideFrac, 1.-leftsideFrac)
00173     // totalMC will be incremented by 2 before first call, so initiated it at half the value
00174     Int_t totalMC = (Int_t) (2./fSize/TMath::Min(fLeftSideFraction,1.-fLeftSideFraction)); 
00175     if(fLeftSideFraction==0. || fLeftSideFraction ==1.){
00176       totalMC = (Int_t) (2./fSize); 
00177     }
00178     // use control
00179     Double_t tmc = Double_t(totalMC)*fAdditionalNToysFactor;
00180     totalMC = (Int_t) tmc; 
00181 
00182     ToyMCSampler* toyMCSampler = dynamic_cast<ToyMCSampler*>(fTestStatSampler);
00183     if(fAdaptiveSampling && toyMCSampler) {
00184       do{
00185         // this will be executed first, then while conditioned checked
00186         // as an exit condition for the loop.
00187 
00188         // the next line is where most of the time will be spent 
00189         // generating the sampling dist of the test statistic.
00190         additionalMC = 2*totalMC; // grow by a factor of two
00191         samplingDist = 
00192           toyMCSampler->AppendSamplingDistribution(*point, 
00193                                                    samplingDist, 
00194                                                    additionalMC); 
00195         totalMC=samplingDist->GetSize();
00196 
00197         //cout << "without sigma upper = " << 
00198         //samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ) << endl;
00199 
00200         sigma = 1;
00201         upperEdgeOfAcceptance = 
00202           samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) , 
00203                                     sigma, upperEdgePlusSigma);
00204         sigma = -1;
00205         samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) , 
00206                                   sigma, upperEdgeMinusSigma);
00207         
00208         sigma = 1;
00209         lowerEdgeOfAcceptance = 
00210           samplingDist->InverseCDF( fLeftSideFraction * fSize , 
00211                                     sigma, lowerEdgePlusSigma);
00212         sigma = -1;
00213         samplingDist->InverseCDF( fLeftSideFraction * fSize , 
00214                                   sigma, lowerEdgeMinusSigma);
00215         
00216         ooccoutD(samplingDist,Eval) << "NeymanConstruction: "
00217              << "total MC = " << totalMC 
00218              << " this test stat = " << thisTestStatistic << endl
00219              << " upper edge -1sigma = " << upperEdgeMinusSigma
00220              << ", upperEdge = "<<upperEdgeOfAcceptance
00221              << ", upper edge +1sigma = " << upperEdgePlusSigma << endl
00222              << " lower edge -1sigma = " << lowerEdgeMinusSigma
00223              << ", lowerEdge = "<<lowerEdgeOfAcceptance
00224              << ", lower edge +1sigma = " << lowerEdgePlusSigma << endl;
00225       } while(( 
00226               (thisTestStatistic <= upperEdgeOfAcceptance &&
00227                thisTestStatistic > upperEdgeMinusSigma)
00228               || (thisTestStatistic >= upperEdgeOfAcceptance &&
00229                   thisTestStatistic < upperEdgePlusSigma)
00230               || (thisTestStatistic <= lowerEdgeOfAcceptance &&
00231                   thisTestStatistic > lowerEdgeMinusSigma)
00232               || (thisTestStatistic >= lowerEdgeOfAcceptance &&
00233                   thisTestStatistic < lowerEdgePlusSigma) 
00234                 ) && (totalMC < 100./fSize)
00235               ) ; // need ; here
00236     } else {
00237       // the next line is where most of the time will be spent 
00238       // generating the sampling dist of the test statistic.
00239       samplingDist = fTestStatSampler->GetSamplingDistribution(*point); 
00240       
00241       lowerEdgeOfAcceptance = 
00242         samplingDist->InverseCDF( fLeftSideFraction * fSize );
00243       upperEdgeOfAcceptance = 
00244         samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) );
00245     }
00246     
00247     // add acceptance region to ConfidenceBelt
00248     if(fConfBelt && fCreateBelt){
00249       //      cout << "conf belt set " << fConfBelt << endl;
00250       fConfBelt->AddAcceptanceRegion(*point, i,
00251                                      lowerEdgeOfAcceptance, 
00252                                      upperEdgeOfAcceptance);
00253     }
00254 
00255     // printout some debug info
00256     TIter      itr = point->createIterator();
00257     RooRealVar* myarg;
00258     ooccoutP(samplingDist,Eval) << "NeymanConstruction: Prog: "<< i+1<<"/"<<fPointsToTest->numEntries()
00259                                 << " total MC = " << samplingDist->GetSize()
00260                                 << " this test stat = " << thisTestStatistic << endl;
00261     ooccoutP(samplingDist,Eval) << " ";
00262     while ((myarg = (RooRealVar *)itr.Next())) { 
00263       ooccoutP(samplingDist,Eval) << myarg->GetName() << "=" << myarg->getVal() << " ";
00264     }
00265     ooccoutP(samplingDist,Eval) << "[" << lowerEdgeOfAcceptance << ", " 
00266                        << upperEdgeOfAcceptance << "] " << " in interval = " <<
00267       (thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) 
00268               << endl << endl;
00269 
00270     // Check if this data is in the acceptance region
00271     if(thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) {
00272       // if so, set this point to true
00273       //      fPointsToTest->add(*point, 1.);  // this behaves differently for Hist and DataSet
00274       pointsInInterval->add(*point);
00275       ++npass;
00276     }
00277 
00278     if(fSaveBeltToFile){
00279       //write to file
00280       samplingDist->Write();
00281       string tmpName = "hist_";
00282       tmpName+=samplingDist->GetName();
00283       TH1F* h = new TH1F(tmpName.c_str(),"",500,0.,5.);
00284       for(int ii=0; ii<samplingDist->GetSize(); ++ii){
00285         h->Fill(samplingDist->GetSamplingDistribution().at(ii) );
00286       }
00287       h->Write();
00288       delete h;
00289     }
00290 
00291     delete samplingDist;
00292     //    delete point; // from dataset
00293   }
00294   oocoutI(pointsInInterval,Eval) << npass << " points in interval" << endl;
00295 
00296   // create an interval based pointsInInterval
00297   PointSetInterval* interval 
00298     = new PointSetInterval("ClassicalConfidenceInterval", *pointsInInterval);
00299   
00300 
00301   if(fSaveBeltToFile){
00302     //   write belt to file
00303     fConfBelt->Write();
00304 
00305     f->Close();
00306   }
00307 
00308   delete f;
00309   //delete data;
00310   return interval;
00311 }
00312 
00313 

Generated on Tue Jul 5 15:14:36 2011 for ROOT_528-00b_version by  doxygen 1.5.1