ROOT_528-00b_version: roofit/roostats/src/HypoTestResult.cxx Source File

00001 // @(#)root/roostats:$Id: HypoTestResult.cxx 38101 2011-02-16 16:52:00Z moneta $
00002 // Author: Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke, Sven Kreiss
00003 /*************************************************************************
00004  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers.               *
00005  * All rights reserved.                                                  *
00006  *                                                                       *
00007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00009  *************************************************************************/
00010 
00011 /*****************************************************************************
00012  * Project: RooStats
00013  * Package: RooFit/RooStats  
00014  * @(#)root/roofit/roostats:$Id: HypoTestResult.cxx 38101 2011-02-16 16:52:00Z moneta $
00015  * Authors:                     
00016  *   Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke, Sven Kreiss
00017  *
00018  *****************************************************************************/
00019 
00020 
00021 
00022 //_________________________________________________
00023 /*
00024 BEGIN_HTML
00025 <p>
00026 HypoTestResult is a base class for results from hypothesis tests.
00027 Any tool inheriting from HypoTestCalculator can return a HypoTestResult.
00028 As such, it stores a p-value for the null-hypothesis (eg. background-only) 
00029 and an alternate hypothesis (eg. signal+background).  
00030 The p-values can also be transformed into confidence levels (CLb, CLsplusb) in a trivial way.
00031 The ratio of the CLsplusb to CLb is often called CLs, and is considered useful, though it is 
00032 not a probability.
00033 Finally, the p-value of the null can be transformed into a number of equivalent Gaussian sigma using the 
00034 Significance method.
00035 END_HTML
00036 */
00037 //
00038 
00039 #include "RooStats/HypoTestResult.h"
00040 #include "RooAbsReal.h"
00041 
00042 #ifndef RooStats_RooStatsUtils
00043 #include "RooStats/RooStatsUtils.h"
00044 #endif
00045 
00046 #include <limits>
00047 #define NaN numeric_limits<float>::quiet_NaN()
00048 #define IsNaN(a) isnan(a)
00049 
00050 ClassImp(RooStats::HypoTestResult) ;
00051 
00052 using namespace RooStats;
00053 
00054 
00055 //____________________________________________________________________
00056 HypoTestResult::HypoTestResult(const char* name) : 
00057    TNamed(name,name),
00058    fNullPValue(NaN), fAlternatePValue(NaN),
00059    fTestStatisticData(NaN),
00060    fNullDistr(NULL), fAltDistr(NULL),
00061    fPValueIsRightTail(kTRUE)
00062 {
00063    // Default constructor
00064 }
00065 
00066 
00067 //____________________________________________________________________
00068 HypoTestResult::HypoTestResult(const char* name, Double_t nullp, Double_t altp) :
00069    TNamed(name,name),
00070    fNullPValue(nullp), fAlternatePValue(altp),
00071    fTestStatisticData(NaN),
00072    fNullDistr(NULL), fAltDistr(NULL),
00073    fPValueIsRightTail(kTRUE)
00074 {
00075    // Alternate constructor
00076 }
00077 
00078 
00079 //____________________________________________________________________
00080 HypoTestResult::~HypoTestResult()
00081 {
00082    // Destructor
00083 
00084 }
00085 
00086 
00087 void HypoTestResult::Append(const HypoTestResult* other) {
00088    // Add additional toy-MC experiments to the current results.
00089    // Use the data test statistics of the added object if it is not already
00090    // set (otherwise, ignore the new one).
00091 
00092    if(fNullDistr)
00093       fNullDistr->Add(other->GetNullDistribution());
00094    else
00095       fNullDistr = other->GetNullDistribution();
00096 
00097    if(fAltDistr)
00098       fAltDistr->Add(other->GetAltDistribution());
00099    else
00100       fAltDistr = other->GetAltDistribution();
00101 
00102    // if no data is present use the other HypoTestResult's data
00103    if(IsNaN(fTestStatisticData)) fTestStatisticData = other->GetTestStatisticData();
00104 
00105    UpdatePValue(fNullDistr, &fNullPValue, fPValueIsRightTail);
00106    UpdatePValue(fAltDistr, &fAlternatePValue, !fPValueIsRightTail);
00107 }
00108 
00109 
00110 //____________________________________________________________________
00111 void HypoTestResult::SetAltDistribution(SamplingDistribution *alt) {
00112    fAltDistr = alt;
00113    UpdatePValue(fAltDistr, &fAlternatePValue, !fPValueIsRightTail);
00114 }
00115 //____________________________________________________________________
00116 void HypoTestResult::SetNullDistribution(SamplingDistribution *null) {
00117    fNullDistr = null;
00118    UpdatePValue(fNullDistr, &fNullPValue, fPValueIsRightTail);
00119 }
00120 //____________________________________________________________________
00121 void HypoTestResult::SetTestStatisticData(const Double_t tsd) {
00122    fTestStatisticData = tsd;
00123 
00124    UpdatePValue(fNullDistr, &fNullPValue, fPValueIsRightTail);
00125    UpdatePValue(fAltDistr, &fAlternatePValue, !fPValueIsRightTail);
00126 }
00127 //____________________________________________________________________
00128 void HypoTestResult::SetPValueIsRightTail(Bool_t pr) {
00129    fPValueIsRightTail = pr;
00130 
00131    UpdatePValue(fNullDistr, &fNullPValue, fPValueIsRightTail);
00132    UpdatePValue(fAltDistr, &fAlternatePValue, !fPValueIsRightTail);
00133 }
00134 
00135 //____________________________________________________________________
00136 Bool_t HypoTestResult::HasTestStatisticData(void) const {
00137    return !IsNaN(fTestStatisticData);
00138 }
00139 
00140 Double_t HypoTestResult::NullPValueError() const {
00141    // compute error on Null pvalue 
00142    // Use binomial error and 
00143    if(!fNullDistr  ||  !HasTestStatisticData()) return 0.0;
00144 
00145    double squares = 0.0;
00146    //vector<Double_t> values = fNullDistr->GetSamplingDistribution();
00147    const vector<Double_t> & weights = fNullDistr->GetSampleWeights();
00148    size_t entries = weights.size();
00149 
00150    
00151    // compute sum of weight and sum of weight square 
00152    double sumw = 0.0;
00153    for(size_t i=0; i < entries; i++) {
00154       sumw += weights[i];
00155       squares += weights[i]*weights[i];
00156    }
00157    if (sumw == 0) return 0.0;
00158    double neff = sumw*sumw/squares; 
00159 
00160    return sqrt( NullPValue()*(1.- NullPValue() )/ neff );
00161 
00162    // // weights
00163    // for(size_t i=0; i < entries; i++) {
00164    //    if( (GetPValueIsRightTail()  &&  values[i] > fTestStatisticData) ||
00165    //        (!GetPValueIsRightTail()  &&  values[i] < fTestStatisticData)
00166    //    ) {
00167    //       squares += pow(weights[i], 2);
00168    //    }
00169    // }
00170    // squares /= entries;
00171 
00172    // //cout << "NullPValue Binomial error: " << TMath::Sqrt(NullPValue() * (1. - NullPValue()) / entries) << endl;
00173    // return sqrt( (squares - pow(NullPValue(),2)) / entries );
00174 }
00175 
00176 //____________________________________________________________________
00177 Double_t HypoTestResult::CLbError() const {
00178    // compute CLb error
00179    // Clb =  1 - NullPValue() 
00180    // must use opposite condition that routine above
00181 
00182    if(!fNullDistr  ||  !HasTestStatisticData()) return 0.0;
00183 
00184    double sumw = 0.0; 
00185    double squares = 0.0;
00186    //vector<Double_t> values = fNullDistr->GetSamplingDistribution();
00187    const vector<Double_t> & weights = fNullDistr->GetSampleWeights();
00188    size_t entries = weights.size();
00189 
00190    // // weights
00191    // for(size_t i=0; i < entries; i++) {
00192    //    if( (GetPValueIsRightTail()  &&  values[i] <= fTestStatisticData) ||
00193    //        (!GetPValueIsRightTail()  &&  values[i] >= fTestStatisticData)
00194    //    ) {
00195    //       squares += pow(weights[i], 2);
00196    //    }
00197    // }
00198    // squares /= entries;
00199 
00200    for(size_t i=0; i < entries; i++) {
00201       sumw += weights[i];
00202       squares += weights[i]*weights[i];
00203    }
00204    if (sumw == 0) return 0.0;
00205    double neff = sumw*sumw/squares; 
00206 
00207    return sqrt( CLb()*(1.-CLb() )/ neff );
00208 
00209 }
00210 
00211 //____________________________________________________________________
00212 Double_t HypoTestResult::CLsplusbError() const {
00213    if(!fAltDistr  ||  !HasTestStatisticData()) return 0.0;
00214 
00215    double squares = 0.0;
00216    //vector<Double_t> values = fAltDistr->GetSamplingDistribution();
00217    const vector<Double_t> & weights = fAltDistr->GetSampleWeights();
00218    size_t entries = weights.size();
00219 
00220 
00221    // // weights
00222    // for(size_t i=0; i < entries; i++) {
00223    //    if( (GetPValueIsRightTail()  &&  values[i] <= fTestStatisticData) ||
00224    //        (!GetPValueIsRightTail()  &&  values[i] >= fTestStatisticData)
00225    //    ) {
00226    //       squares += pow(weights[i], 2);
00227    //    }
00228    // }
00229    // squares /= entries;
00230 
00231    //cout << "CLs+b Binomial error: " << TMath::Sqrt(CLsplusb() * (1. - CLsplusb()) / entries) << endl;
00232    //return sqrt( (squares - pow(CLsplusb(),2)) / entries );
00233 
00234    double sumw = 0.0;
00235    for(size_t i=0; i < entries; i++) {
00236       sumw += weights[i];
00237       squares += weights[i]*weights[i];
00238    }
00239    if (sumw == 0) return 0.0;
00240    double neff = sumw*sumw/squares; 
00241 
00242    return sqrt( CLsplusb()*(1.-CLsplusb() )/ neff );
00243 
00244 }
00245 
00246 
00247 //____________________________________________________________________
00248 Double_t HypoTestResult::CLsError() const {
00249    // Returns an estimate of the error on CLs through combination of the
00250    // errors on CLb and CLsplusb:
00251    // BEGIN_LATEX
00252    // #sigma_{CL_s} = CL_s
00253    // #sqrt{#left( #frac{#sigma_{CL_{s+b}}}{CL_{s+b}} #right)^2 + #left( #frac{#sigma_{CL_{b}}}{CL_{b}} #right)^2}
00254    // END_LATEX
00255 
00256    if(!fAltDistr || !fNullDistr) return 0.0;
00257 
00258    // unsigned const int n_b = fNullDistr->GetSamplingDistribution().size();
00259    // unsigned const int n_sb = fAltDistr->GetSamplingDistribution().size();
00260 
00261    if (CLb() == 0 ) return numeric_limits<double>::infinity();
00262 
00263    double cl_b_err2 = pow(CLbError(),2);
00264    double cl_sb_err2 = pow(CLsplusbError(),2);
00265 
00266    return TMath::Sqrt(cl_sb_err2 + cl_b_err2 * pow(CLs(),2))/CLb();
00267 }
00268 
00269 
00270 
00271 // private
00272 //____________________________________________________________________
00273 void HypoTestResult::UpdatePValue(const SamplingDistribution* distr, Double_t *pvalue, Bool_t pIsRightTail) {
00274    // updates the pvalue if sufficient data is available
00275 
00276    if(IsNaN(fTestStatisticData)) return;
00277 
00278    /* Got to be careful for discrete distributions:
00279     * To get the right behaviour for limits, the p-value for the Null must not
00280     * include the value of fTestStatistic (ie the value is in the confidence level),
00281     * but for the Alt, it must be included.
00282     *
00283     * Technical note: to find out, whether we are doing the calc for Null or Alt,
00284     * we compare the pIsRightTail given to this function to fPValueIsRightTail which
00285     * always refers to the Null. Therefore, if they are equal it is the Null, if not
00286     * it is the Alt.
00287     */
00288    if(distr) {
00289       if(pIsRightTail) {
00290          *pvalue = distr->Integral(fTestStatisticData, RooNumber::infinity(), kTRUE,
00291             pIsRightTail == fPValueIsRightTail ? kFALSE : kTRUE,     // lowClosed
00292             kTRUE       // highClosed
00293          ); // [or( fTestStatistic, inf ]
00294       }else{
00295          *pvalue = distr->Integral(-RooNumber::infinity(), fTestStatisticData, kTRUE,
00296             kTRUE,      // lowClosed
00297             pIsRightTail == fPValueIsRightTail ? kFALSE : kTRUE      // highClosed
00298          ); // [ -inf, fTestStatistic )or]
00299       }
00300    }
00301 }
00302