MCMCInterval.h

Go to the documentation of this file.
00001 // @(#)root/roostats:$Id: MCMCInterval.h 35821 2010-09-28 08:18:13Z moneta $
00002 // Authors: Kevin Belasco        17/06/2009
00003 // Authors: Kyle Cranmer         17/06/2009
00004 /*************************************************************************
00005  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 #ifndef RooStats_MCMCInterval
00013 #define RooStats_MCMCInterval
00014 
00015 #ifndef ROOT_Rtypes
00016 #include "Rtypes.h"
00017 #endif
00018 
00019 #ifndef ROOSTATS_ConfInterval
00020 #include "RooStats/ConfInterval.h"
00021 #endif
00022 #ifndef ROO_ARG_SET
00023 #include "RooArgSet.h"
00024 #endif
00025 #ifndef ROO_ARG_LIST
00026 #include "RooArgList.h"
00027 #endif
00028 #ifndef ROOSTATS_MarkovChain
00029 #include "RooStats/MarkovChain.h"
00030 #endif
00031 
00032 class RooNDKeysPdf;
00033 class RooProduct;
00034 
00035 
00036 namespace RooStats {
00037 
00038    class Heaviside;
00039 
00040 
00041    class MCMCInterval : public ConfInterval {
00042 
00043 
00044    public:
00045 
00046       // default constructor
00047       explicit MCMCInterval(const char* name = 0);
00048 
00049       // constructor from parameter of interest and Markov chain object
00050       MCMCInterval(const char* name, const RooArgSet& parameters,
00051                    MarkovChain& chain);
00052 
00053       enum {DEFAULT_NUM_BINS = 50};
00054       enum IntervalType {kShortest, kTailFraction};
00055 
00056       virtual ~MCMCInterval();
00057         
00058       // determine whether this point is in the confidence interval
00059       virtual Bool_t IsInInterval(const RooArgSet& point) const;
00060 
00061       // set the desired confidence level (see GetActualConfidenceLevel())
00062       // Note: calling this function triggers the algorithm that determines
00063       // the interval, so call this after initializing all other aspects
00064       // of this IntervalCalculator
00065       // Also, calling this function again with a different confidence level
00066       // retriggers the calculation of the interval
00067       virtual void SetConfidenceLevel(Double_t cl);
00068 
00069       // get the desired confidence level (see GetActualConfidenceLevel())
00070       virtual Double_t ConfidenceLevel() const {return fConfidenceLevel;}
00071  
00072       // return a set containing the parameters of this interval
00073       // the caller owns the returned RooArgSet*
00074       virtual RooArgSet* GetParameters() const;
00075 
00076       // get the cutoff bin height for being considered in the
00077       // confidence interval
00078       virtual Double_t GetHistCutoff();
00079 
00080       // get the cutoff RooNDKeysPdf value for being considered in the
00081       // confidence interval
00082       virtual Double_t GetKeysPdfCutoff();
00083       //virtual Double_t GetKeysPdfCutoff() { return fKeysCutoff; }
00084 
00085       // get the actual value of the confidence level for this interval.
00086       virtual Double_t GetActualConfidenceLevel();
00087 
00088       // whether the specified confidence level is a floor for the actual
00089       // confidence level (strict), or a ceiling (not strict)
00090       virtual void SetHistStrict(Bool_t isHistStrict)
00091       { fIsHistStrict = isHistStrict; }
00092 
00093       // check if parameters are correct. (dummy implementation to start)
00094       Bool_t CheckParameters(const RooArgSet& point) const;
00095 
00096       // Set the parameters of interest for this interval
00097       // and change other internal data members accordingly
00098       virtual void SetParameters(const RooArgSet& parameters);
00099 
00100       // Set the MarkovChain that this interval is based on
00101       virtual void SetChain(MarkovChain& chain) { fChain = &chain; }
00102 
00103       // Set which parameters go on which axis.  The first list element
00104       // goes on the x axis, second (if it exists) on y, third (if it
00105       // exists) on z, etc
00106       virtual void SetAxes(RooArgList& axes);
00107 
00108       // return a list of RooRealVars representing the axes
00109       // you own the returned RooArgList
00110       virtual RooArgList* GetAxes()
00111       {
00112          RooArgList* axes = new RooArgList();
00113          for (Int_t i = 0; i < fDimension; i++)
00114             axes->addClone(*fAxes[i]);
00115          return axes;
00116       }
00117 
00118       // get the lowest value of param that is within the confidence interval
00119       virtual Double_t LowerLimit(RooRealVar& param);
00120 
00121       // determine lower limit of the lower confidence interval
00122       virtual Double_t LowerLimitTailFraction(RooRealVar& param);
00123 
00124       // get the lower limit of param in the shortest confidence interval
00125       // Note that this works better for some distributions (ones with exactly
00126       // one maximum) than others, and sometimes has little value.
00127       virtual Double_t LowerLimitShortest(RooRealVar& param);
00128 
00129       // determine lower limit in the shortest interval by using keys pdf
00130       virtual Double_t LowerLimitByKeys(RooRealVar& param);
00131 
00132       // determine lower limit using histogram
00133       virtual Double_t LowerLimitByHist(RooRealVar& param);
00134 
00135       // determine lower limit using histogram
00136       virtual Double_t LowerLimitBySparseHist(RooRealVar& param);
00137 
00138       // determine lower limit using histogram
00139       virtual Double_t LowerLimitByDataHist(RooRealVar& param);
00140 
00141       // get the highest value of param that is within the confidence interval
00142       virtual Double_t UpperLimit(RooRealVar& param);
00143 
00144       // determine upper limit of the lower confidence interval
00145       virtual Double_t UpperLimitTailFraction(RooRealVar& param);
00146 
00147       // get the upper limit of param in the confidence interval
00148       // Note that this works better for some distributions (ones with exactly
00149       // one maximum) than others, and sometimes has little value.
00150       virtual Double_t UpperLimitShortest(RooRealVar& param);
00151 
00152       // determine upper limit in the shortest interval by using keys pdf
00153       virtual Double_t UpperLimitByKeys(RooRealVar& param);
00154 
00155       // determine upper limit using histogram
00156       virtual Double_t UpperLimitByHist(RooRealVar& param);
00157 
00158       // determine upper limit using histogram
00159       virtual Double_t UpperLimitBySparseHist(RooRealVar& param);
00160 
00161       // determine upper limit using histogram
00162       virtual Double_t UpperLimitByDataHist(RooRealVar& param);
00163 
00164       // Determine the approximate maximum value of the Keys PDF
00165       Double_t GetKeysMax();
00166 
00167       // set the number of steps in the chain to discard as burn-in,
00168       // starting from the first
00169       virtual void SetNumBurnInSteps(Int_t numBurnInSteps)
00170       { fNumBurnInSteps = numBurnInSteps; }
00171 
00172       // set whether to use kernel estimation to determine the interval
00173       virtual void SetUseKeys(Bool_t useKeys) { fUseKeys = useKeys; }
00174 
00175       // set whether to use a sparse histogram.  you MUST also call
00176       // SetUseKeys(kFALSE) to use a histogram.
00177       virtual void SetUseSparseHist(Bool_t useSparseHist)
00178       { fUseSparseHist = useSparseHist; }
00179 
00180       // get whether we used kernel estimation to determine the interval
00181       virtual Bool_t GetUseKeys() { return fUseKeys; }
00182 
00183       // get the number of steps in the chain to disard as burn-in,
00184 
00185       // get the number of steps in the chain to disard as burn-in,
00186       // starting from the first
00187       virtual Int_t GetNumBurnInSteps() { return fNumBurnInSteps; }
00188 
00189       // set the number of bins to use (same for all axes, for now)
00190       //virtual void SetNumBins(Int_t numBins);
00191 
00192       // Get a clone of the histogram of the posterior
00193       virtual TH1* GetPosteriorHist();
00194 
00195       // Get a clone of the keys pdf of the posterior
00196       virtual RooNDKeysPdf* GetPosteriorKeysPdf();
00197 
00198       // Get a clone of the (keyspdf * heaviside) product of the posterior
00199       virtual RooProduct* GetPosteriorKeysProduct();
00200 
00201       // Get the number of parameters of interest in this interval
00202       virtual Int_t GetDimension() const { return fDimension; }
00203 
00204       // Get the markov chain on which this interval is based
00205       // You do not own the returned MarkovChain*
00206       virtual const MarkovChain* GetChain() { return fChain; }
00207 
00208       // Get a clone of the markov chain on which this interval is based
00209       // as a RooDataSet.  You own the returned RooDataSet*
00210       virtual RooDataSet* GetChainAsDataSet(RooArgSet* whichVars = NULL)
00211       { return fChain->GetAsDataSet(whichVars); }
00212 
00213       // Get the markov chain on which this interval is based
00214       // as a RooDataSet.  You do not own the returned RooDataSet*
00215       virtual const RooDataSet* GetChainAsConstDataSet()
00216       { return fChain->GetAsConstDataSet(); }
00217 
00218       // Get a clone of the markov chain on which this interval is based
00219       // as a RooDataHist.  You own the returned RooDataHist*
00220       virtual RooDataHist* GetChainAsDataHist(RooArgSet* whichVars = NULL)
00221       { return fChain->GetAsDataHist(whichVars); }
00222 
00223       // Get a clone of the markov chain on which this interval is based
00224       // as a THnSparse.  You own the returned THnSparse*
00225       virtual THnSparse* GetChainAsSparseHist(RooArgSet* whichVars = NULL)
00226       { return fChain->GetAsSparseHist(whichVars); }
00227 
00228       // Get a clone of the NLL variable from the markov chain
00229       virtual RooRealVar* GetNLLVar() const
00230       { return fChain->GetNLLVar(); }
00231 
00232       // Get a clone of the weight variable from the markov chain
00233       virtual RooRealVar* GetWeightVar() const
00234       { return fChain->GetWeightVar(); }
00235 
00236       // set the acceptable level or error for Keys interval determination
00237       virtual void SetEpsilon(Double_t epsilon)
00238       {
00239          if (epsilon < 0)
00240             coutE(InputArguments) << "MCMCInterval::SetEpsilon will not allow "
00241                                   << "negative epsilon value" << endl;
00242          else
00243             fEpsilon = epsilon;
00244       }
00245 
00246       // Set the type of interval to find.  This will only have an effect for
00247       // 1-D intervals.  If is more than 1 parameter of interest, then a
00248       // "shortest" interval will always be used, since it generalizes directly
00249       // to N dimensions
00250       virtual void SetIntervalType(enum IntervalType intervalType)
00251       { fIntervalType = intervalType; }
00252 
00253       // Return the type of this interval
00254       virtual enum IntervalType GetIntervalType() { return fIntervalType; }
00255 
00256       // set the left-side tail fraction for a tail-fraction interval
00257       virtual void SetLeftSideTailFraction(Double_t a) { fLeftSideTF = a; }
00258 
00259       // kbelasco: The inner-workings of the class really should not be exposed
00260       // like this in a comment, but it seems to be the only way to give
00261       // the user any control over this process, if he desires it
00262       //
00263       // Set the fraction delta such that
00264       // topCutoff (a) is considered == bottomCutoff (b) iff
00265       // (TMath::Abs(a - b) < TMath::Abs(fDelta * (a + b)/2))
00266       // when determining the confidence interval by Keys
00267       virtual void SetDelta(Double_t delta)
00268       {
00269          if (delta < 0.)
00270             coutE(InputArguments) << "MCMCInterval::SetDelta will not allow "
00271                                   << "negative delta value" << endl;
00272          else
00273             fDelta = delta;
00274       }
00275 
00276    private:
00277       inline Bool_t AcceptableConfLevel(Double_t confLevel);
00278       inline Bool_t WithinDeltaFraction(Double_t a, Double_t b);
00279 
00280    protected:
00281       // data members
00282       RooArgSet  fParameters; // parameters of interest for this interval
00283       MarkovChain* fChain; // the markov chain
00284       Double_t fConfidenceLevel; // Requested confidence level (eg. 0.95 for 95% CL)
00285 
00286       RooDataHist* fDataHist; // the binned Markov Chain data
00287       THnSparse* fSparseHist; // the binned Markov Chain data
00288       Double_t fHistConfLevel; // the actual conf level determined by hist
00289       Double_t fHistCutoff; // cutoff bin size to be in interval
00290 
00291       RooNDKeysPdf* fKeysPdf; // the kernel estimation pdf
00292       RooProduct* fProduct; // the (keysPdf * heaviside) product
00293       Heaviside* fHeaviside; // the Heaviside function
00294       RooDataHist* fKeysDataHist; // data hist representing product
00295       RooRealVar* fCutoffVar; // cutoff variable to use for integrating keys pdf
00296       Double_t fKeysConfLevel; // the actual conf level determined by keys
00297       Double_t fKeysCutoff; // cutoff keys pdf value to be in interval
00298       Double_t fFull; // Value of intergral of fProduct
00299 
00300       Double_t fLeftSideTF; // left side tail-fraction for interval
00301       Double_t fTFConfLevel; // the actual conf level of tail-fraction interval
00302       vector<Int_t> fVector; // vector containing the Markov chain data
00303       Double_t fVecWeight; // sum of weights of all entries in fVector
00304       Double_t fTFLower;   // lower limit of the tail-fraction interval
00305       Double_t fTFUpper;   // upper limit of the tail-fraction interval
00306 
00307       TH1* fHist; // the binned Markov Chain data
00308 
00309       Bool_t fUseKeys; // whether to use kernel estimation
00310       Bool_t fUseSparseHist; // whether to use sparse hist (vs. RooDataHist)
00311       Bool_t fIsHistStrict; // whether the specified confidence level is a
00312                             // floor for the actual confidence level (strict),
00313                             // or a ceiling (not strict) for determination by
00314                             // histogram
00315       Int_t fDimension; // number of variables
00316       Int_t fNumBurnInSteps; // number of steps to discard as burn in, starting
00317                              // from the first
00318       // LM (not used) Double_t fIntervalSum; // sum of heights of bins in the interval
00319       RooRealVar** fAxes; // array of pointers to RooRealVars representing
00320                           // the axes of the histogram
00321                           // fAxes[0] represents x-axis, [1] y, [2] z, etc
00322 
00323       Double_t fEpsilon; // acceptable error for Keys interval determination
00324 
00325       Double_t fDelta; // topCutoff (a) considered == bottomCutoff (b) iff
00326                        // (TMath::Abs(a - b) < TMath::Abs(fDelta * (a + b)/2));
00327                        // Theoretically, the Abs is not needed here, but
00328                        // floating-point arithmetic does not always work
00329                        // perfectly, and the Abs doesn't hurt
00330       enum IntervalType fIntervalType;
00331 
00332 
00333       // functions
00334       virtual void DetermineInterval();
00335       virtual void DetermineShortestInterval();
00336       virtual void DetermineTailFractionInterval();
00337       virtual void DetermineByHist();
00338       virtual void DetermineBySparseHist();
00339       virtual void DetermineByDataHist();
00340       virtual void DetermineByKeys();
00341       virtual void CreateHist();
00342       virtual void CreateSparseHist();
00343       virtual void CreateDataHist();
00344       virtual void CreateKeysPdf();
00345       virtual void CreateKeysDataHist();
00346       virtual void CreateVector(RooRealVar* param);
00347       inline virtual Double_t CalcConfLevel(Double_t cutoff, Double_t full);
00348 
00349       ClassDef(MCMCInterval,1)  // Concrete implementation of a ConfInterval based on MCMC calculation
00350       
00351    };
00352 }
00353 
00354 #endif

Generated on Tue Jul 5 14:26:25 2011 for ROOT_528-00b_version by  doxygen 1.5.1