ToyMCSampler.cxx

Go to the documentation of this file.
00001 // @(#)root/roostats:$Id: ToyMCSampler.cxx 37564 2010-12-13 13:47:23Z moneta $
00002 // Author: Sven Kreiss    June 2010
00003 // Author: Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke
00004 /*************************************************************************
00005  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 #include "RooStats/ToyMCSampler.h"
00013 
00014 #ifndef ROO_MSG_SERVICE
00015 #include "RooMsgService.h"
00016 #endif
00017 
00018 #ifndef ROO_DATA_HIST
00019 #include "RooDataHist.h"
00020 #endif
00021 
00022 #ifndef ROO_REAL_VAR
00023 #include "RooRealVar.h"
00024 #endif
00025 
00026 #include "TCanvas.h"
00027 #include "RooPlot.h"
00028 #include "RooRandom.h"
00029 
00030 #include "RooStudyManager.h"
00031 #include "RooStats/ToyMCStudy.h"
00032 
00033 #include "TMath.h"
00034 
00035 
00036 ClassImp(RooStats::ToyMCSampler)
00037 
00038 namespace RooStats {
00039 
00040 class NuisanceParametersSampler {
00041    // Helper for ToyMCSampler. Handles all of the nuisance parameter related
00042    // functions. Once instantiated, it gives a new nuisance parameter point
00043    // at each call to nextPoint(...).
00044 
00045    public:
00046       NuisanceParametersSampler(RooAbsPdf *prior=NULL, const RooArgSet *parameters=NULL, Int_t nToys=1000, Bool_t asimov=kFALSE) :
00047          fPrior(prior),
00048          fParams(parameters),
00049          fNToys(nToys),
00050          fExpected(asimov),
00051          fPoints(NULL),
00052          fIndex(0)
00053       {
00054          if(prior) Refresh();
00055       }
00056 
00057       void NextPoint(RooArgSet& nuisPoint, Double_t& weight) {
00058          // Assigns new nuisance parameter point to members of nuisPoint.
00059          // nuisPoint can be more objects than just the nuisance
00060          // parameters.
00061 
00062          // check whether to get new set of nuisanceParPoints
00063          if (fIndex >= fNToys) {
00064             Refresh();
00065             fIndex = 0;
00066          }
00067 
00068          // get value
00069          nuisPoint =  *fPoints->get(fIndex++);
00070          weight = fPoints->weight();
00071 
00072          // check whether result will have any influence
00073          if(fPoints->weight() == 0.0) {
00074             oocoutI((TObject*)NULL,Generation) << "Weight 0 encountered. Skipping." << endl;
00075             NextPoint(nuisPoint, weight);
00076          }
00077       }
00078 
00079 
00080    protected:
00081 
00082       void Refresh() {
00083          // Creates the initial set of nuisance parameter points. It also refills the
00084          // set with new parameter points if called repeatedly. This helps with
00085          // adaptive sampling as the required number of nuisance parameter points
00086          // might increase during the run.
00087 
00088          if (!fPrior || !fParams) return;
00089 
00090          if (fPoints) delete fPoints;
00091 
00092          if (fExpected) {
00093             // UNDER CONSTRUCTION
00094             oocoutI((TObject*)NULL,InputArguments) << "Using expected nuisance parameters." << endl;
00095 
00096             int nBins = fNToys;
00097 
00098             // From FeldmanCousins.cxx:
00099             // set nbins for the POI
00100             TIter it2 = fParams->createIterator();
00101             RooRealVar *myarg2;
00102             while ((myarg2 = dynamic_cast<RooRealVar*>(it2.Next()))) {
00103               myarg2->setBins(nBins);
00104             }
00105 
00106 
00107             fPoints = fPrior->generateBinned(
00108                *fParams,
00109                RooFit::ExpectedData(),
00110                RooFit::NumEvents(1) // for Asimov set, this is only a scale factor
00111             );
00112             if(fPoints->numEntries() != fNToys) {
00113                fNToys = fPoints->numEntries();
00114                oocoutI((TObject*)NULL,InputArguments) <<
00115                   "Adjusted number of toys to number of bins of nuisance parameters: " << fNToys << endl;
00116             }
00117 
00118 /*
00119             // check
00120             TCanvas *c1 = new TCanvas;
00121             RooPlot *p = dynamic_cast<RooRealVar*>(fParams->first())->frame();
00122             fPoints->plotOn(p);
00123             p->Draw();
00124             for(int x=0; x < fPoints->numEntries(); x++) {
00125                fPoints->get(x)->Print("v");
00126                cout << fPoints->weight() << endl;
00127             }
00128 */
00129 
00130          }else{
00131             oocoutI((TObject*)NULL,InputArguments) << "Using randomized nuisance parameters." << endl;
00132 
00133             fPoints = fPrior->generate(*fParams, fNToys);
00134          }
00135       }
00136 
00137 
00138    private:
00139       RooAbsPdf *fPrior;           // prior for nuisance parameters
00140       const RooArgSet *fParams;    // nuisance parameters
00141       Int_t fNToys;
00142       Bool_t fExpected;
00143 
00144       RooAbsData *fPoints;         // generated nuisance parameter points
00145       Int_t fIndex;                // current index in fPoints array
00146 };
00147 
00148 
00149 
00150 
00151 
00152 
00153 Bool_t ToyMCSampler::CheckConfig(void) {
00154    // only checks, no guessing/determination (do this in calculators,
00155    // e.g. using ModelConfig::GuessObsAndNuisance(...))
00156    bool goodConfig = true;
00157 
00158    if(!fTestStat) { ooccoutE((TObject*)NULL,InputArguments) << "Test statistic not set." << endl; goodConfig = false; }
00159    if(!fObservables) { ooccoutE((TObject*)NULL,InputArguments) << "Observables not set." << endl; goodConfig = false; }
00160    if(!fNullPOI) { ooccoutE((TObject*)NULL,InputArguments) << "Parameter values used to evaluate for test statistic  not set." << endl; goodConfig = false; }
00161    if(!fPdf) { ooccoutE((TObject*)NULL,InputArguments) << "Pdf not set." << endl; goodConfig = false; }
00162 
00163    return goodConfig;
00164 }
00165 
00166 
00167 
00168 SamplingDistribution* ToyMCSampler::GetSamplingDistribution(RooArgSet& paramPointIn) {
00169    // Use for serial and parallel runs.
00170 
00171    // ======= S I N G L E   R U N ? =======
00172    if(!fProofConfig)
00173       return GetSamplingDistributionSingleWorker(paramPointIn);
00174 
00175 
00176    // ======= P A R A L L E L   R U N =======
00177    CheckConfig();
00178 
00179    // turn adaptive sampling off if given
00180    if(fToysInTails) {
00181       fToysInTails = 0;
00182       oocoutW((TObject*)NULL, InputArguments)
00183          << "Adaptive sampling in ToyMCSampler is not supported for parallel runs."
00184          << endl;
00185    }
00186 
00187    // adjust number of toys on the slaves to keep the total number of toys constant
00188    Int_t totToys = fNToys;
00189    fNToys = (int)ceil((double)fNToys / (double)fProofConfig->GetNExperiments()); // round up
00190 
00191    // create the study instance for parallel processing
00192    ToyMCStudy toymcstudy;
00193    toymcstudy.SetToyMCSampler(*this);
00194    toymcstudy.SetParamPointOfInterest(paramPointIn);
00195 
00196    // temporary workspace for proof to avoid messing with TRef
00197    RooWorkspace w(fProofConfig->GetWorkspace());
00198    RooStudyManager studymanager(w, toymcstudy);
00199    studymanager.runProof(fProofConfig->GetNExperiments(), fProofConfig->GetHost(), fProofConfig->GetShowGui());
00200 
00201    SamplingDistribution *result = new SamplingDistribution(GetSamplingDistName().c_str(), GetSamplingDistName().c_str());
00202    toymcstudy.merge(*result);
00203 
00204    // reset the number of toys
00205    fNToys = totToys;
00206 
00207    return result;
00208 }
00209 
00210 SamplingDistribution* ToyMCSampler::GetSamplingDistributionSingleWorker(RooArgSet& paramPointIn) {
00211    // This is the main function for serial runs. It is called automatically
00212    // from inside GetSamplingDistribution when no ProofConfig is given.
00213    // You should not call this function yourself. This function should
00214    // be used by ToyMCStudy on the workers (ie. when you explicitly want
00215    // a serial run although ProofConfig is present).
00216 
00217    CheckConfig();
00218 
00219    std::vector<Double_t> testStatVec;
00220    std::vector<Double_t> testStatWeights;
00221 
00222    // important to cache the paramPoint b/c test statistic might 
00223    // modify it from event to event
00224    RooArgSet *paramPoint = (RooArgSet*) paramPointIn.snapshot();
00225    RooArgSet *allVars = fPdf->getVariables();
00226    RooArgSet *saveAll = (RooArgSet*) allVars->snapshot();
00227 
00228    // create nuisance parameter points
00229    NuisanceParametersSampler *np = NULL;
00230    if(fPriorNuisance && fNuisancePars)
00231       np = new NuisanceParametersSampler(fPriorNuisance, fNuisancePars, fNToys, fExpectedNuisancePar);
00232 
00233    // counts the number of toys in the limits set for adaptive sampling
00234    // (taking weights into account)
00235    Double_t toysInTails = 0.0;
00236 
00237 
00238    for (Int_t i = 0; i < fMaxToys; ++i) {
00239 
00240       // status update
00241       if ( i% 500 == 0 && i>0 ) {
00242          oocoutP((TObject*)0,Generation) << "generated toys: " << i << " / " << fNToys;
00243          if (fToysInTails) ooccoutP((TObject*)0,Generation) << " (tails: " << toysInTails << " / " << fToysInTails << ")" << std::endl;
00244          else ooccoutP((TObject*)0,Generation) << endl;
00245       }
00246 
00247       // create toy data and calculate value of test statistic
00248       Double_t value, weight;
00249       if (np) { // use nuisance parameters?
00250 
00251          // set variables to requested parameter point
00252          *allVars = *paramPoint;
00253 
00254          // get nuisance parameter point and weight
00255          np->NextPoint(*allVars, weight);
00256 
00257          // generate toy data for this parameter point
00258          RooAbsData* toydata = GenerateToyData(*allVars);
00259          // evaluate test statistic, that only depends on null POI
00260          value = fTestStat->Evaluate(*toydata, *fNullPOI);
00261 
00262          if(fImportanceDensity) {
00263             // Importance Sampling: adjust weight
00264             // Source: presentation by Michael Woodroofe
00265 
00266             // get the NLLs of the importance density and the pdf to sample
00267             *allVars = *fImportanceSnapshot;
00268             RooAbsReal *impNLL = fImportanceDensity->createNLL(*toydata, RooFit::Extended(kFALSE), RooFit::CloneData(kFALSE));
00269             double impNLLVal = impNLL->getVal();
00270             delete impNLL;
00271             *allVars = *paramPoint;
00272             RooAbsReal *pdfNLL = fPdf->createNLL(*toydata, RooFit::Extended(kFALSE), RooFit::CloneData(kFALSE));
00273             double pdfNLLVal = pdfNLL->getVal();
00274             delete pdfNLL;
00275 
00276             // L(pdf) / L(imp)  =  exp( NLL(imp) - NLL(pdf) )
00277             weight *= exp(impNLLVal - pdfNLLVal);
00278          }
00279 
00280          delete toydata;
00281 
00282       }else{
00283 
00284          // set variables to requested parameter point
00285          *allVars = *paramPoint;
00286          // generate toy data for this parameter point
00287          RooAbsData* toydata = GenerateToyData(*allVars);
00288          // evaluate test statistic, that only depends on null POI
00289          value = fTestStat->Evaluate(*toydata, *fNullPOI);
00290          weight = -1.;
00291          delete toydata;
00292       }
00293 
00294       // check for nan
00295       if(value != value) {
00296          oocoutW((TObject*)NULL, Generation) << "skip: " << value << ", " << weight << endl;
00297          continue;
00298       }
00299 
00300       // add results
00301       testStatVec.push_back(value);
00302       if(weight >= 0.) testStatWeights.push_back(weight);
00303 
00304       // adaptive sampling checks
00305       if (value <= fAdaptiveLowLimit  ||  value >= fAdaptiveHighLimit) {
00306          if(weight >= 0.) toysInTails += weight;
00307          else toysInTails += 1.;
00308       }
00309       if (toysInTails >= fToysInTails  &&  i+1 >= fNToys) break;
00310    }
00311 
00312    // clean up
00313    *allVars = *saveAll;
00314    delete saveAll;
00315    delete allVars;
00316    if(np) delete np;
00317 
00318    // return
00319    if (testStatWeights.size()) {
00320       return new SamplingDistribution(
00321          fSamplingDistName.c_str(),
00322          fSamplingDistName.c_str(),
00323          testStatVec,
00324          testStatWeights,
00325          fTestStat->GetVarName()
00326       );
00327    }
00328    return new SamplingDistribution(
00329       fSamplingDistName.c_str(),
00330       fSamplingDistName.c_str(),
00331       testStatVec,
00332       fTestStat->GetVarName()
00333    );
00334 }
00335 
00336 
00337 RooAbsData* ToyMCSampler::GenerateToyData(RooArgSet& /*nullPOI*/) const {
00338    // This method generates a toy data set for the given parameter point taking
00339    // global observables into account.
00340 
00341    RooArgSet observables(*fObservables);
00342    if(fGlobalObservables  &&  fGlobalObservables->getSize()) {
00343       observables.remove(*fGlobalObservables);
00344 
00345       // generate one set of global observables and assign it
00346       RooDataSet *one = fPdf->generate(*fGlobalObservables, 1);
00347       const RooArgSet *values = one->get();
00348       RooArgSet *allVars = fPdf->getVariables();
00349       *allVars = *values;
00350       delete allVars;
00351       delete values;
00352       delete one;
00353    }
00354 
00355 
00356    RooAbsData* data = NULL;
00357 
00358    if(!fImportanceDensity) {
00359       // no Importance Sampling
00360       data = Generate(*fPdf, observables);
00361    }else{
00362 
00363       // Importance Sampling
00364       RooArgSet* allVars = fPdf->getVariables();
00365       RooArgSet* allVars2 = fImportanceDensity->getVariables();
00366       allVars->add(*allVars2);
00367       const RooArgSet* saveVars = (const RooArgSet*)allVars->snapshot();
00368 
00369       // the number of events generated is either the given fNEvents or
00370       // in case this is not given, the expected number of events of
00371       // the pdf with a Poisson fluctuation
00372       int forceEvents = 0;
00373       if(fNEvents == 0) {
00374          forceEvents = (int)fPdf->expectedEvents(observables);
00375          forceEvents = RooRandom::randomGenerator()->Poisson(forceEvents);
00376       }
00377 
00378       // need to be careful here not to overwrite the current state of the
00379       // nuisance parameters, ie they must not be part of the snapshot
00380       if(fImportanceSnapshot) *allVars = *fImportanceSnapshot;
00381 
00382       // generate with the parameters configured in this class
00383       //   NULL => no protoData
00384       //   overwriteEvents => replaces fNEvents it would usually take
00385       data = Generate(*fImportanceDensity, observables, NULL, forceEvents);
00386 
00387       *allVars = *saveVars;
00388       delete allVars;
00389       delete allVars2;
00390       delete saveVars;
00391    }
00392 
00393    return data;
00394 }
00395 
00396 RooAbsData* ToyMCSampler::Generate(RooAbsPdf &pdf, RooArgSet &observables, const RooDataSet* protoData, int forceEvents) const {
00397    // This is the generate function to use in the context of the ToyMCSampler
00398    // instead of the standard RooAbsPdf::generate(...).
00399    // It takes into account whether the number of events is given explicitly
00400    // or whether it should use the expected number of events. It also takes
00401    // into account the option to generate a binned data set (ie RooDataHist).
00402 
00403    if(fProtoData) {
00404       protoData = fProtoData;
00405       forceEvents = protoData->numEntries();
00406    }
00407 
00408    RooAbsData *data = NULL;
00409    int events = forceEvents;
00410    if(events == 0) events = fNEvents;
00411 
00412    if(events == 0) {
00413       if( pdf.canBeExtended() && pdf.expectedEvents(observables) > 0) {
00414          if(fGenerateBinned) {
00415             if(protoData) data = pdf.generateBinned(observables, RooFit::Extended(), RooFit::ProtoData(*protoData, true, true));
00416             else          data = pdf.generateBinned(observables, RooFit::Extended());
00417          }else{
00418             if(protoData) data = pdf.generate      (observables, RooFit::Extended(), RooFit::ProtoData(*protoData, true, true));
00419             else          data = pdf.generate      (observables, RooFit::Extended());
00420          }
00421       }else{
00422          oocoutE((TObject*)0,InputArguments)
00423             << "ToyMCSampler: Error : pdf is not extended and number of events per toy is zero"
00424             << endl;
00425       }
00426    }else{
00427       if(fGenerateBinned) {
00428          if(protoData) data = pdf.generateBinned(observables, events, RooFit::ProtoData(*protoData, true, true));
00429          else          data = pdf.generateBinned(observables, events);
00430       }else{
00431          if(protoData) data = pdf.generate      (observables, events, RooFit::ProtoData(*protoData, true, true));
00432          else          data = pdf.generate      (observables, events);
00433       }
00434    }
00435 
00436    return data;
00437 }
00438 
00439 
00440 
00441 
00442 
00443 } // end namespace RooStats

Generated on Tue Jul 5 15:14:37 2011 for ROOT_528-00b_version by  doxygen 1.5.1