ROOT_528-00b_version: roofit/roostats/src/SamplingDistribution.cxx Source File

00001 // @(#)root/roostats:$Id: SamplingDistribution.cxx 38101 2011-02-16 16:52:00Z moneta $
00002 
00003 /*************************************************************************
00004  * Project: RooStats                                                     *
00005  * Package: RooFit/RooStats                                              *
00006  * Authors:                                                              *
00007  *   Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke       *
00008  *************************************************************************
00009  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers.               *
00010  * All rights reserved.                                                  *
00011  *                                                                       *
00012  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00013  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00014  *************************************************************************/
00015 
00016 //____________________________________________________________________
00017 /*
00018 SamplingDistribution : 
00019 
00020 This class simply holds a sampling distribution of some test statistic.  
00021 The distribution can either be an empirical distribution (eg. the samples themselves) or
00022 a weighted set of points (eg. for the FFT method).
00023 The class supports merging.
00024 */
00025 
00026 #include "RooStats/SamplingDistribution.h"
00027 #include "RooNumber.h"
00028 #include "math.h"
00029 #include <algorithm>
00030 #include <iostream>
00031 using namespace std ;
00032 
00033 /// ClassImp for building the THtml documentation of the class 
00034 ClassImp(RooStats::SamplingDistribution)
00035 
00036 using namespace RooStats;
00037 
00038 //_______________________________________________________
00039 SamplingDistribution::SamplingDistribution( const char *name, const char *title,
00040                                             std::vector<Double_t>& samplingDist, const char * varName) :
00041   TNamed(name,title)
00042 {
00043   // SamplingDistribution constructor
00044   fSamplingDist = samplingDist;
00045   // need to check STL stuff here.  Will this = operator work as wanted, or do we need:
00046   //  std::copy(samplingDist.begin(), samplingDist.end(), fSamplingDist.begin());
00047 
00048   // WVE must fill sampleWeights vector here otherwise append behavior potentially undefined
00049   fSampleWeights.resize(fSamplingDist.size(),1.0) ;  
00050 
00051   fVarName = varName;
00052 }
00053 
00054 //_______________________________________________________
00055 SamplingDistribution::SamplingDistribution( const char *name, const char *title,
00056                                             std::vector<Double_t>& samplingDist, std::vector<Double_t>& sampleWeights, const char * varName) :
00057   TNamed(name,title)
00058 {
00059   // SamplingDistribution constructor
00060   fSamplingDist = samplingDist;
00061   fSampleWeights = sampleWeights;
00062   // need to check STL stuff here.  Will this = operator work as wanted, or do we need:
00063   //  std::copy(samplingDist.begin(), samplingDist.end(), fSamplingDist.begin());
00064 
00065   fVarName = varName;
00066 }
00067 
00068 //_______________________________________________________
00069 SamplingDistribution::SamplingDistribution( const char *name, const char *title, const char * varName) :
00070   TNamed(name,title)
00071 {
00072    // SamplingDistribution constructor (with name and title)
00073   fVarName = varName;
00074 }
00075 
00076 
00077 SamplingDistribution::SamplingDistribution(
00078    const char *name,
00079    const char *title,
00080    RooDataSet& dataSet,
00081    const char * varName
00082 ) : TNamed(name, title) {
00083    // Creates a SamplingDistribution from a RooDataSet for debugging
00084    // purposes; e.g. if you need a Gaussian type SamplingDistribution
00085    // you can generate it from a Gaussian pdf and use the resulting
00086    // RooDataSet with this constructor.
00087    //
00088    // The result is the projected distribution onto varName
00089    // marginalizing the other variables.
00090    //
00091    // If varName is not given, the first variable will be used.
00092    // This is useful mostly for RooDataSets with only one observable.
00093 
00094    fVarName = varName;
00095    if(fVarName.Length() == 0) {
00096       // no leak. none of these transfers ownership.
00097       fVarName = dataSet.get()->first()->GetName();
00098    }
00099 
00100    for(Int_t i=0; i < dataSet.numEntries(); i++) {
00101       fSamplingDist.push_back(dataSet.get(i)->getRealValue(fVarName));
00102       fSampleWeights.push_back(dataSet.weight());
00103    }
00104 }
00105 
00106 
00107 //_______________________________________________________
00108 SamplingDistribution::SamplingDistribution( ) :
00109   TNamed("SamplingDistribution_DefaultName","SamplingDistribution")
00110 {
00111    // SamplingDistribution default constructor
00112 }
00113 
00114 //_______________________________________________________
00115 SamplingDistribution::~SamplingDistribution()
00116 {
00117    // SamplingDistribution destructor
00118 
00119    fSamplingDist.clear();
00120    fSampleWeights.clear();
00121 }
00122 
00123 
00124 //_______________________________________________________
00125 void SamplingDistribution::Add(const SamplingDistribution* other)
00126 {
00127    // Merge SamplingDistributions (does nothing if NULL is given).
00128    // If variable name was not set before, it is copied from the added
00129    // SamplingDistribution.
00130 
00131    if(!other) return;
00132 
00133   std::vector<double> newSamplingDist = other->fSamplingDist;
00134   std::vector<double> newSampleWeights = other->fSampleWeights;
00135   // need to check STL stuff here.  Will this = operator work as wanted, or do we need:
00136   //  std::copy(samplingDist.begin(), samplingDist.end(), fSamplingDist.begin());
00137   // need to look into STL, do it the easy way for now
00138 
00139   // reserve memory
00140   fSamplingDist.reserve(fSamplingDist.size()+newSamplingDist.size());
00141   fSampleWeights.reserve(fSampleWeights.size()+newSampleWeights.size());
00142 
00143   // push back elements
00144   for(unsigned int i=0; i<newSamplingDist.size(); ++i){
00145     fSamplingDist.push_back(newSamplingDist[i]);
00146     fSampleWeights.push_back(newSampleWeights[i]);
00147   }
00148 
00149 
00150   if(GetVarName().Length() == 0  &&  other->GetVarName().Length() > 0)
00151      fVarName = other->GetVarName();
00152 
00153   if(strlen(GetName()) == 0  &&  strlen(other->GetName()) > 0)
00154      SetName(other->GetName());
00155   if(strlen(GetTitle()) == 0  &&  strlen(other->GetTitle()) > 0)
00156      SetTitle(other->GetTitle());
00157 
00158 }
00159 
00160 
00161 
00162 //_______________________________________________________
00163 Double_t SamplingDistribution::Integral(Double_t low, Double_t high, Bool_t normalize, Bool_t lowClosed, Bool_t highClosed) const
00164 {
00165    // Returns the integral in the open/closed/mixed interval. Default is [low,high) interval.
00166    // Normalization can be turned off.
00167 
00168    Double_t sum = 0;
00169    for(unsigned int i=0; i<fSamplingDist.size(); i++) {
00170       double value = fSamplingDist[i];
00171 
00172       if((lowClosed  ? value >= low  : value > low)  &&
00173          (highClosed ? value <= high : value < high))
00174       {
00175          sum += fSampleWeights[i];
00176       }
00177    }
00178 
00179    if(normalize) {
00180       Double_t norm = 0;
00181       for(unsigned int i=0; i<fSamplingDist.size(); i++) {
00182          norm += fSampleWeights[i];
00183       }
00184       sum /= norm;
00185    }
00186 
00187    return sum;
00188 }
00189 
00190 //_______________________________________________________
00191 Double_t SamplingDistribution::CDF(Double_t x) const {
00192    // returns the closed integral [-inf,x]
00193    return Integral(-RooNumber::infinity(), x, kTRUE, kTRUE, kTRUE);
00194 }
00195 
00196 
00197 
00198 //_______________________________________________________
00199 Double_t SamplingDistribution::InverseCDF(Double_t pvalue)
00200 {
00201    // returns the inverse of the cumulative distribution function
00202 
00203   Double_t dummy=0;
00204   return InverseCDF(pvalue,0,dummy);
00205 }
00206 //_______________________________________________________
00207 Double_t SamplingDistribution::InverseCDF(Double_t pvalue, 
00208                                           Double_t sigmaVariation, 
00209                                           Double_t& inverseWithVariation)
00210 {
00211    // returns the inverse of the cumulative distribution function, with variations depending on number of samples
00212 
00213   // will need to deal with weights, but for now:
00214   std::sort(fSamplingDist.begin(), fSamplingDist.end());
00215 
00216 
00217   // Acceptance regions are meant to be inclusive of (1-\alpha) of the probability
00218   // so the returned values of the CDF should make this easy.
00219   // in particular:
00220   //   if finding the critical value for a lower bound
00221   //     when p_i < p < p_j, one should return the value associated with i
00222   //     if i=0, then one should return -infinity
00223   //   if finding the critical value for an upper bound
00224   //     when p_i < p < p_j, one should return the value associated with j
00225   //     if i = size-1, then one should return +infinity
00226   //   use pvalue < 0.5 to indicate a lower bound is requested
00227   
00228   // casting will round down, eg. give i
00229   int nominal = (unsigned int) (pvalue*fSamplingDist.size());
00230 
00231   if(nominal <= 0) {
00232     inverseWithVariation = -1.*RooNumber::infinity();
00233     return -1.*RooNumber::infinity();
00234   }
00235   else if(nominal >= (Int_t)fSamplingDist.size()-1 ) {
00236     inverseWithVariation = RooNumber::infinity();
00237     return RooNumber::infinity();
00238   }
00239   else if(pvalue < 0.5){
00240     int delta = (int)(sigmaVariation*sqrt(1.0*nominal)); // note sqrt(small fraction)
00241     int variation = nominal+delta;
00242 
00243     if(variation>=(Int_t)fSamplingDist.size()-1)
00244       inverseWithVariation = RooNumber::infinity();
00245     else if(variation<=0)
00246       inverseWithVariation = -1.*RooNumber::infinity();
00247     else 
00248       inverseWithVariation =  fSamplingDist[ variation ];
00249 
00250     return fSamplingDist[nominal];
00251   }
00252   else if(pvalue >= 0.5){
00253     int delta = (int)(sigmaVariation*sqrt(1.0*fSamplingDist.size()- nominal)); // note sqrt(small fraction)
00254     int variation = nominal+delta;
00255 
00256 
00257     if(variation>=(Int_t)fSamplingDist.size()-1)
00258       inverseWithVariation = RooNumber::infinity();
00259 
00260     else if(variation<=0)
00261       inverseWithVariation = -1.*RooNumber::infinity();
00262     else 
00263       inverseWithVariation =  fSamplingDist[ variation+1 ];
00264 
00265 
00266     /*
00267       std::cout << "dgb SamplingDistribution::InverseCDF. variation = " << variation
00268       << " size = " << fSamplingDist.size()
00269       << " value = " << inverseWithVariation << std::endl;
00270     */
00271 
00272     return fSamplingDist[nominal+1];
00273   }
00274   else{
00275     std::cout << "problem in SamplingDistribution::InverseCDF" << std::endl;
00276   }
00277   inverseWithVariation = RooNumber::infinity();
00278   return RooNumber::infinity();
00279 
00280 }
00281 
00282 
00283 //_______________________________________________________
00284 Double_t SamplingDistribution::InverseCDFInterpolate(Double_t pvalue)
00285 {
00286    // returns the inverse of the cumulative distribution function
00287 
00288   // will need to deal with weights, but for now:
00289   std::sort(fSamplingDist.begin(), fSamplingDist.end());
00290 
00291   // casting will round down, eg. give i
00292   int nominal = (unsigned int) (pvalue*fSamplingDist.size());
00293 
00294   if(nominal <= 0) {
00295     return -1.*RooNumber::infinity();
00296   }
00297   if(nominal >= (Int_t)fSamplingDist.size()-1 ) {
00298     return RooNumber::infinity();
00299   }
00300   Double_t upperX = fSamplingDist[nominal+1];
00301   Double_t upperY = ((Double_t) (nominal+1))/fSamplingDist.size();
00302   Double_t lowerX =  fSamplingDist[nominal];
00303   Double_t lowerY = ((Double_t) nominal)/fSamplingDist.size();
00304   
00305   //  std::cout << upperX << " " << upperY << " " << lowerX << " " << lowerY << std::endl;
00306 
00307   return (upperX-lowerX)/(upperY-lowerY)*(pvalue-lowerY)+lowerX;
00308 
00309 }