00001 // @(#)root/roostats:$Id: MCMCInterval.h 35821 2010-09-28 08:18:13Z moneta $ 00002 // Authors: Kevin Belasco 17/06/2009 00003 // Authors: Kyle Cranmer 17/06/2009 00004 /************************************************************************* 00005 * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. * 00006 * All rights reserved. * 00007 * * 00008 * For the licensing terms see $ROOTSYS/LICENSE. * 00009 * For the list of contributors see $ROOTSYS/README/CREDITS. * 00010 *************************************************************************/ 00011 00012 #ifndef RooStats_MCMCInterval 00013 #define RooStats_MCMCInterval 00014 00015 #ifndef ROOT_Rtypes 00016 #include "Rtypes.h" 00017 #endif 00018 00019 #ifndef ROOSTATS_ConfInterval 00020 #include "RooStats/ConfInterval.h" 00021 #endif 00022 #ifndef ROO_ARG_SET 00023 #include "RooArgSet.h" 00024 #endif 00025 #ifndef ROO_ARG_LIST 00026 #include "RooArgList.h" 00027 #endif 00028 #ifndef ROOSTATS_MarkovChain 00029 #include "RooStats/MarkovChain.h" 00030 #endif 00031 00032 class RooNDKeysPdf; 00033 class RooProduct; 00034 00035 00036 namespace RooStats { 00037 00038 class Heaviside; 00039 00040 00041 class MCMCInterval : public ConfInterval { 00042 00043 00044 public: 00045 00046 // default constructor 00047 explicit MCMCInterval(const char* name = 0); 00048 00049 // constructor from parameter of interest and Markov chain object 00050 MCMCInterval(const char* name, const RooArgSet& parameters, 00051 MarkovChain& chain); 00052 00053 enum {DEFAULT_NUM_BINS = 50}; 00054 enum IntervalType {kShortest, kTailFraction}; 00055 00056 virtual ~MCMCInterval(); 00057 00058 // determine whether this point is in the confidence interval 00059 virtual Bool_t IsInInterval(const RooArgSet& point) const; 00060 00061 // set the desired confidence level (see GetActualConfidenceLevel()) 00062 // Note: calling this function triggers the algorithm that determines 00063 // the interval, so call this after initializing all other aspects 00064 // of this IntervalCalculator 00065 // Also, calling this function again with a different confidence level 00066 // retriggers the calculation of the interval 00067 virtual void SetConfidenceLevel(Double_t cl); 00068 00069 // get the desired confidence level (see GetActualConfidenceLevel()) 00070 virtual Double_t ConfidenceLevel() const {return fConfidenceLevel;} 00071 00072 // return a set containing the parameters of this interval 00073 // the caller owns the returned RooArgSet* 00074 virtual RooArgSet* GetParameters() const; 00075 00076 // get the cutoff bin height for being considered in the 00077 // confidence interval 00078 virtual Double_t GetHistCutoff(); 00079 00080 // get the cutoff RooNDKeysPdf value for being considered in the 00081 // confidence interval 00082 virtual Double_t GetKeysPdfCutoff(); 00083 //virtual Double_t GetKeysPdfCutoff() { return fKeysCutoff; } 00084 00085 // get the actual value of the confidence level for this interval. 00086 virtual Double_t GetActualConfidenceLevel(); 00087 00088 // whether the specified confidence level is a floor for the actual 00089 // confidence level (strict), or a ceiling (not strict) 00090 virtual void SetHistStrict(Bool_t isHistStrict) 00091 { fIsHistStrict = isHistStrict; } 00092 00093 // check if parameters are correct. (dummy implementation to start) 00094 Bool_t CheckParameters(const RooArgSet& point) const; 00095 00096 // Set the parameters of interest for this interval 00097 // and change other internal data members accordingly 00098 virtual void SetParameters(const RooArgSet& parameters); 00099 00100 // Set the MarkovChain that this interval is based on 00101 virtual void SetChain(MarkovChain& chain) { fChain = &chain; } 00102 00103 // Set which parameters go on which axis. The first list element 00104 // goes on the x axis, second (if it exists) on y, third (if it 00105 // exists) on z, etc 00106 virtual void SetAxes(RooArgList& axes); 00107 00108 // return a list of RooRealVars representing the axes 00109 // you own the returned RooArgList 00110 virtual RooArgList* GetAxes() 00111 { 00112 RooArgList* axes = new RooArgList(); 00113 for (Int_t i = 0; i < fDimension; i++) 00114 axes->addClone(*fAxes[i]); 00115 return axes; 00116 } 00117 00118 // get the lowest value of param that is within the confidence interval 00119 virtual Double_t LowerLimit(RooRealVar& param); 00120 00121 // determine lower limit of the lower confidence interval 00122 virtual Double_t LowerLimitTailFraction(RooRealVar& param); 00123 00124 // get the lower limit of param in the shortest confidence interval 00125 // Note that this works better for some distributions (ones with exactly 00126 // one maximum) than others, and sometimes has little value. 00127 virtual Double_t LowerLimitShortest(RooRealVar& param); 00128 00129 // determine lower limit in the shortest interval by using keys pdf 00130 virtual Double_t LowerLimitByKeys(RooRealVar& param); 00131 00132 // determine lower limit using histogram 00133 virtual Double_t LowerLimitByHist(RooRealVar& param); 00134 00135 // determine lower limit using histogram 00136 virtual Double_t LowerLimitBySparseHist(RooRealVar& param); 00137 00138 // determine lower limit using histogram 00139 virtual Double_t LowerLimitByDataHist(RooRealVar& param); 00140 00141 // get the highest value of param that is within the confidence interval 00142 virtual Double_t UpperLimit(RooRealVar& param); 00143 00144 // determine upper limit of the lower confidence interval 00145 virtual Double_t UpperLimitTailFraction(RooRealVar& param); 00146 00147 // get the upper limit of param in the confidence interval 00148 // Note that this works better for some distributions (ones with exactly 00149 // one maximum) than others, and sometimes has little value. 00150 virtual Double_t UpperLimitShortest(RooRealVar& param); 00151 00152 // determine upper limit in the shortest interval by using keys pdf 00153 virtual Double_t UpperLimitByKeys(RooRealVar& param); 00154 00155 // determine upper limit using histogram 00156 virtual Double_t UpperLimitByHist(RooRealVar& param); 00157 00158 // determine upper limit using histogram 00159 virtual Double_t UpperLimitBySparseHist(RooRealVar& param); 00160 00161 // determine upper limit using histogram 00162 virtual Double_t UpperLimitByDataHist(RooRealVar& param); 00163 00164 // Determine the approximate maximum value of the Keys PDF 00165 Double_t GetKeysMax(); 00166 00167 // set the number of steps in the chain to discard as burn-in, 00168 // starting from the first 00169 virtual void SetNumBurnInSteps(Int_t numBurnInSteps) 00170 { fNumBurnInSteps = numBurnInSteps; } 00171 00172 // set whether to use kernel estimation to determine the interval 00173 virtual void SetUseKeys(Bool_t useKeys) { fUseKeys = useKeys; } 00174 00175 // set whether to use a sparse histogram. you MUST also call 00176 // SetUseKeys(kFALSE) to use a histogram. 00177 virtual void SetUseSparseHist(Bool_t useSparseHist) 00178 { fUseSparseHist = useSparseHist; } 00179 00180 // get whether we used kernel estimation to determine the interval 00181 virtual Bool_t GetUseKeys() { return fUseKeys; } 00182 00183 // get the number of steps in the chain to disard as burn-in, 00184 00185 // get the number of steps in the chain to disard as burn-in, 00186 // starting from the first 00187 virtual Int_t GetNumBurnInSteps() { return fNumBurnInSteps; } 00188 00189 // set the number of bins to use (same for all axes, for now) 00190 //virtual void SetNumBins(Int_t numBins); 00191 00192 // Get a clone of the histogram of the posterior 00193 virtual TH1* GetPosteriorHist(); 00194 00195 // Get a clone of the keys pdf of the posterior 00196 virtual RooNDKeysPdf* GetPosteriorKeysPdf(); 00197 00198 // Get a clone of the (keyspdf * heaviside) product of the posterior 00199 virtual RooProduct* GetPosteriorKeysProduct(); 00200 00201 // Get the number of parameters of interest in this interval 00202 virtual Int_t GetDimension() const { return fDimension; } 00203 00204 // Get the markov chain on which this interval is based 00205 // You do not own the returned MarkovChain* 00206 virtual const MarkovChain* GetChain() { return fChain; } 00207 00208 // Get a clone of the markov chain on which this interval is based 00209 // as a RooDataSet. You own the returned RooDataSet* 00210 virtual RooDataSet* GetChainAsDataSet(RooArgSet* whichVars = NULL) 00211 { return fChain->GetAsDataSet(whichVars); } 00212 00213 // Get the markov chain on which this interval is based 00214 // as a RooDataSet. You do not own the returned RooDataSet* 00215 virtual const RooDataSet* GetChainAsConstDataSet() 00216 { return fChain->GetAsConstDataSet(); } 00217 00218 // Get a clone of the markov chain on which this interval is based 00219 // as a RooDataHist. You own the returned RooDataHist* 00220 virtual RooDataHist* GetChainAsDataHist(RooArgSet* whichVars = NULL) 00221 { return fChain->GetAsDataHist(whichVars); } 00222 00223 // Get a clone of the markov chain on which this interval is based 00224 // as a THnSparse. You own the returned THnSparse* 00225 virtual THnSparse* GetChainAsSparseHist(RooArgSet* whichVars = NULL) 00226 { return fChain->GetAsSparseHist(whichVars); } 00227 00228 // Get a clone of the NLL variable from the markov chain 00229 virtual RooRealVar* GetNLLVar() const 00230 { return fChain->GetNLLVar(); } 00231 00232 // Get a clone of the weight variable from the markov chain 00233 virtual RooRealVar* GetWeightVar() const 00234 { return fChain->GetWeightVar(); } 00235 00236 // set the acceptable level or error for Keys interval determination 00237 virtual void SetEpsilon(Double_t epsilon) 00238 { 00239 if (epsilon < 0) 00240 coutE(InputArguments) << "MCMCInterval::SetEpsilon will not allow " 00241 << "negative epsilon value" << endl; 00242 else 00243 fEpsilon = epsilon; 00244 } 00245 00246 // Set the type of interval to find. This will only have an effect for 00247 // 1-D intervals. If is more than 1 parameter of interest, then a 00248 // "shortest" interval will always be used, since it generalizes directly 00249 // to N dimensions 00250 virtual void SetIntervalType(enum IntervalType intervalType) 00251 { fIntervalType = intervalType; } 00252 00253 // Return the type of this interval 00254 virtual enum IntervalType GetIntervalType() { return fIntervalType; } 00255 00256 // set the left-side tail fraction for a tail-fraction interval 00257 virtual void SetLeftSideTailFraction(Double_t a) { fLeftSideTF = a; } 00258 00259 // kbelasco: The inner-workings of the class really should not be exposed 00260 // like this in a comment, but it seems to be the only way to give 00261 // the user any control over this process, if he desires it 00262 // 00263 // Set the fraction delta such that 00264 // topCutoff (a) is considered == bottomCutoff (b) iff 00265 // (TMath::Abs(a - b) < TMath::Abs(fDelta * (a + b)/2)) 00266 // when determining the confidence interval by Keys 00267 virtual void SetDelta(Double_t delta) 00268 { 00269 if (delta < 0.) 00270 coutE(InputArguments) << "MCMCInterval::SetDelta will not allow " 00271 << "negative delta value" << endl; 00272 else 00273 fDelta = delta; 00274 } 00275 00276 private: 00277 inline Bool_t AcceptableConfLevel(Double_t confLevel); 00278 inline Bool_t WithinDeltaFraction(Double_t a, Double_t b); 00279 00280 protected: 00281 // data members 00282 RooArgSet fParameters; // parameters of interest for this interval 00283 MarkovChain* fChain; // the markov chain 00284 Double_t fConfidenceLevel; // Requested confidence level (eg. 0.95 for 95% CL) 00285 00286 RooDataHist* fDataHist; // the binned Markov Chain data 00287 THnSparse* fSparseHist; // the binned Markov Chain data 00288 Double_t fHistConfLevel; // the actual conf level determined by hist 00289 Double_t fHistCutoff; // cutoff bin size to be in interval 00290 00291 RooNDKeysPdf* fKeysPdf; // the kernel estimation pdf 00292 RooProduct* fProduct; // the (keysPdf * heaviside) product 00293 Heaviside* fHeaviside; // the Heaviside function 00294 RooDataHist* fKeysDataHist; // data hist representing product 00295 RooRealVar* fCutoffVar; // cutoff variable to use for integrating keys pdf 00296 Double_t fKeysConfLevel; // the actual conf level determined by keys 00297 Double_t fKeysCutoff; // cutoff keys pdf value to be in interval 00298 Double_t fFull; // Value of intergral of fProduct 00299 00300 Double_t fLeftSideTF; // left side tail-fraction for interval 00301 Double_t fTFConfLevel; // the actual conf level of tail-fraction interval 00302 vector<Int_t> fVector; // vector containing the Markov chain data 00303 Double_t fVecWeight; // sum of weights of all entries in fVector 00304 Double_t fTFLower; // lower limit of the tail-fraction interval 00305 Double_t fTFUpper; // upper limit of the tail-fraction interval 00306 00307 TH1* fHist; // the binned Markov Chain data 00308 00309 Bool_t fUseKeys; // whether to use kernel estimation 00310 Bool_t fUseSparseHist; // whether to use sparse hist (vs. RooDataHist) 00311 Bool_t fIsHistStrict; // whether the specified confidence level is a 00312 // floor for the actual confidence level (strict), 00313 // or a ceiling (not strict) for determination by 00314 // histogram 00315 Int_t fDimension; // number of variables 00316 Int_t fNumBurnInSteps; // number of steps to discard as burn in, starting 00317 // from the first 00318 // LM (not used) Double_t fIntervalSum; // sum of heights of bins in the interval 00319 RooRealVar** fAxes; // array of pointers to RooRealVars representing 00320 // the axes of the histogram 00321 // fAxes[0] represents x-axis, [1] y, [2] z, etc 00322 00323 Double_t fEpsilon; // acceptable error for Keys interval determination 00324 00325 Double_t fDelta; // topCutoff (a) considered == bottomCutoff (b) iff 00326 // (TMath::Abs(a - b) < TMath::Abs(fDelta * (a + b)/2)); 00327 // Theoretically, the Abs is not needed here, but 00328 // floating-point arithmetic does not always work 00329 // perfectly, and the Abs doesn't hurt 00330 enum IntervalType fIntervalType; 00331 00332 00333 // functions 00334 virtual void DetermineInterval(); 00335 virtual void DetermineShortestInterval(); 00336 virtual void DetermineTailFractionInterval(); 00337 virtual void DetermineByHist(); 00338 virtual void DetermineBySparseHist(); 00339 virtual void DetermineByDataHist(); 00340 virtual void DetermineByKeys(); 00341 virtual void CreateHist(); 00342 virtual void CreateSparseHist(); 00343 virtual void CreateDataHist(); 00344 virtual void CreateKeysPdf(); 00345 virtual void CreateKeysDataHist(); 00346 virtual void CreateVector(RooRealVar* param); 00347 inline virtual Double_t CalcConfLevel(Double_t cutoff, Double_t full); 00348 00349 ClassDef(MCMCInterval,1) // Concrete implementation of a ConfInterval based on MCMC calculation 00350 00351 }; 00352 } 00353 00354 #endif