SPlot.cxx

Go to the documentation of this file.
00001 // @(#)root/roostats:$Id: SPlot.cxx 36025 2010-10-01 16:16:41Z moneta $
00002 // Author: Kyle Cranmer   28/07/2008
00003 
00004 /*************************************************************************
00005  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 /*****************************************************************************
00013  * Project: RooStats
00014  * Package: RooFit/RooStats  
00015  *
00016  * Authors:                     
00017  *   Original code from M. Pivk as part of MLFit package from BaBar.
00018  *   Modifications:
00019  *     Giacinto Piacquadio, Maurizio Pierini: modifications for new RooFit version
00020  *     George H. Lewis, Kyle Cranmer: generalized for weighted events
00021  * 
00022  * Porting to RooStats (with permission) by Kyle Cranmer, July 2008
00023  *
00024  *****************************************************************************/
00025 
00026 
00027 
00028 //_________________________________________________
00029 //BEGIN_HTML
00030 // This class calculates sWeights used to create an sPlot.  
00031 // The code is based on 
00032 // ``SPlot: A statistical tool to unfold data distributions,'' 
00033 //  Nucl. Instrum. Meth. A 555, 356 (2005) 
00034 //  [arXiv:physics/0402083].
00035 //
00036 // An SPlot gives us  the distribution of some variable, x in our 
00037 // data sample for a given species (eg. signal or background).  
00038 // The result is similar to a likelihood projection plot, but no cuts are made, 
00039 //  so every event contributes to the distribution.
00040 //
00041 // [Usage]
00042 // To use this class, you first must have a pdf that includes
00043 // yields for (possibly several) different species.
00044 // Create an instance of the class by supplying a data set,
00045 // the pdf, and a list of the yield variables.  The SPlot Class
00046 // will calculate SWeights and include these as columns in the RooDataSet.
00047 //END_HTML
00048 //
00049 
00050 #include <vector>
00051 #include <map>
00052 
00053 #include "RooStats/SPlot.h"
00054 #include "RooAbsPdf.h"
00055 #include "RooDataSet.h"
00056 #include "RooRealVar.h"
00057 #include "RooGlobalFunc.h"
00058 #include "TTree.h"
00059 #include "RooStats/RooStatsUtils.h" 
00060 
00061 
00062 #include "TMatrixD.h"
00063 
00064 
00065 ClassImp(RooStats::SPlot) ;
00066 
00067 using namespace RooStats;
00068 
00069 
00070 
00071 //__________________________________________________________________
00072 SPlot::~SPlot()
00073 {
00074   if(fSData)
00075     delete fSData;
00076 
00077 }
00078 
00079 //____________________________________________________________________
00080 SPlot::SPlot():
00081   TNamed()
00082 {
00083   // Default constructor
00084 
00085   RooArgList Args;
00086 
00087   fSWeightVars = Args;
00088 
00089   fSData = NULL;
00090 
00091 }
00092 
00093 //_________________________________________________________________
00094 SPlot::SPlot(const char* name, const char* title):
00095   TNamed(name, title)
00096 {
00097 
00098   RooArgList Args;
00099 
00100   fSWeightVars = Args;
00101 
00102   fSData = NULL;
00103 
00104 }
00105 
00106 //___________________________________________________________________
00107 SPlot::SPlot(const char* name, const char* title, const RooDataSet &data):
00108   TNamed(name, title)
00109 {
00110   //Constructor from a RooDataSet
00111   //No sWeighted variables are present
00112   
00113   RooArgList Args;
00114   
00115   fSWeightVars = Args;
00116 
00117   fSData = (RooDataSet*) &data;
00118 }
00119 
00120 
00121 //____________________________________________________________________
00122 SPlot::SPlot(const SPlot &other):
00123   TNamed(other)
00124 {
00125   // Copy Constructor from another SPlot
00126   
00127   RooArgList Args = (RooArgList) other.GetSWeightVars();
00128   
00129   fSWeightVars.addClone(Args);
00130 
00131   fSData = (RooDataSet*) other.GetSDataSet();
00132   
00133 }
00134 
00135 
00136 //______________________________________________________________________
00137 SPlot::SPlot(const char* name, const char* title, RooDataSet& data, RooAbsPdf* pdf, 
00138              const RooArgList &yieldsList, const RooArgSet &projDeps, 
00139              bool includeWeights, bool cloneData, const char* newName):
00140   TNamed(name, title)
00141 {
00142   if(cloneData == 1)
00143     fSData = (RooDataSet*) data.Clone(newName);
00144   else
00145     fSData = (RooDataSet*) &data;
00146 
00147   // Add check that yieldsList contains all RooRealVars
00148   TIterator* iter = yieldsList.createIterator() ;
00149   RooAbsArg* arg ;
00150   while((arg=(RooAbsArg*)iter->Next())) {
00151     if (!dynamic_cast<RooRealVar*>(arg)) {
00152       coutE(InputArguments) << "SPlot::SPlot(" << GetName() << ") input argument " 
00153                             << arg->GetName() << " is not of type RooRealVar " << endl ;
00154       throw string(Form("SPlot::SPlot(%s) input argument %s is not of type RooRealVar",GetName(),arg->GetName())) ;
00155     }
00156   }
00157   delete iter ;
00158 
00159   //Construct a new SPlot class,
00160   //calculate sWeights, and include them
00161   //in the RooDataSet of this class.
00162 
00163   this->AddSWeight(pdf, yieldsList, projDeps, includeWeights);
00164 }
00165 
00166 
00167 //___________________________________________________________________________
00168 RooDataSet* SPlot::SetSData(RooDataSet* data)
00169 {
00170   if(data)    {
00171     fSData = (RooDataSet*) data;
00172     return fSData;
00173   }  else
00174     return NULL;
00175 }
00176 
00177 //__________________________________________________________________________
00178 RooDataSet* SPlot::GetSDataSet() const
00179 {
00180   return fSData;
00181 }  
00182 
00183 //____________________________________________________________________________
00184 Double_t SPlot::GetSWeight(Int_t numEvent, const char* sVariable) const
00185 {
00186 
00187   if(numEvent > fSData->numEntries() )
00188     {
00189       coutE(InputArguments)  << "Invalid Entry Number" << endl;
00190       return -1;
00191     }
00192 
00193   if(numEvent < 0)
00194     {
00195       coutE(InputArguments)  << "Invalid Entry Number" << endl;
00196       return -1;    
00197     }
00198 
00199   Double_t totalYield = 0;
00200 
00201   std::string varname(sVariable);
00202   varname += "_sw";
00203 
00204 
00205   if(fSWeightVars.find(sVariable) )
00206     {
00207       RooArgSet Row(*fSData->get(numEvent));
00208       totalYield += Row.getRealValue(sVariable); 
00209       
00210       return totalYield;
00211     }
00212 
00213   if( fSWeightVars.find(varname.c_str())  )
00214     {
00215 
00216       RooArgSet Row(*fSData->get(numEvent));
00217       totalYield += Row.getRealValue(varname.c_str() ); 
00218       
00219       return totalYield;
00220     }
00221   
00222   else
00223     coutE(InputArguments) << "InputVariable not in list of sWeighted variables" << endl;
00224     
00225   return -1;
00226 }
00227 
00228 
00229 //____________________________________________________________________
00230 Double_t SPlot::GetSumOfEventSWeight(Int_t numEvent) const
00231 {
00232 
00233   //Sum the SWeights for a particular event.
00234   //This sum should equal the total weight of that event.
00235   //This method is intended to be used as a check.
00236 
00237 
00238   if(numEvent > fSData->numEntries() )
00239     {
00240       coutE(InputArguments)  << "Invalid Entry Number" << endl;
00241       return -1;
00242     }
00243 
00244   if(numEvent < 0)
00245     {
00246       coutE(InputArguments)  << "Invalid Entry Number" << endl;
00247       return -1;    
00248     }
00249 
00250   Int_t numSWeightVars = this->GetNumSWeightVars();
00251 
00252   Double_t eventSWeight = 0;
00253 
00254   RooArgSet Row(*fSData->get(numEvent));
00255   
00256   for (Int_t i = 0; i < numSWeightVars; i++) 
00257     eventSWeight += Row.getRealValue(fSWeightVars.at(i)->GetName() );
00258  
00259   return  eventSWeight;
00260 }
00261 
00262 
00263 //_________________________________________________________________
00264 Double_t SPlot::GetYieldFromSWeight(const char* sVariable) const
00265 {
00266 
00267 
00268   //Sum the SWeights for a particular specie over all events
00269   //This should equal the total (weighted) yield of that specie
00270   //This method is intended as a check.
00271 
00272   Double_t totalYield = 0;
00273 
00274   std::string varname(sVariable);
00275   varname += "_sw";
00276 
00277 
00278   if(fSWeightVars.find(sVariable) )
00279     {
00280       for(Int_t i=0; i < fSData->numEntries(); i++)
00281         {
00282           RooArgSet Row(*fSData->get(i));
00283           totalYield += Row.getRealValue(sVariable); 
00284         }
00285       
00286       return totalYield;
00287     }
00288 
00289   if( fSWeightVars.find(varname.c_str())  )
00290     {
00291       for(Int_t i=0; i < fSData->numEntries(); i++)
00292         {
00293           RooArgSet Row(*fSData->get(i));
00294           totalYield += Row.getRealValue(varname.c_str() ); 
00295         }
00296       
00297       return totalYield;
00298     }
00299   
00300   else
00301     coutE(InputArguments) << "InputVariable not in list of sWeighted variables" << endl;
00302     
00303   return -1;
00304 }
00305 
00306 
00307 //______________________________________________________________________
00308 RooArgList SPlot::GetSWeightVars() const
00309 {
00310    
00311   //Return a RooArgList containing the SWeights
00312 
00313   RooArgList Args = fSWeightVars;
00314    
00315   return  Args;
00316    
00317 }
00318 
00319 //__________________________________________________________________ 
00320 Int_t SPlot::GetNumSWeightVars() const
00321 {
00322   //Return the number of SWeights
00323   //In other words, return the number of
00324   //species that we are trying to extract.
00325   
00326   RooArgList Args = fSWeightVars;
00327   
00328   return Args.getSize();
00329 }
00330 
00331 
00332 //____________________________________________________________________
00333 void SPlot::AddSWeight( RooAbsPdf* pdf, const RooArgList &yieldsTmp, 
00334                         const RooArgSet &projDeps, bool includeWeights) 
00335 {  
00336 
00337   //
00338   // Method which adds the sWeights to the dataset.
00339   // Input is the PDF, a RooArgList of the yields (floating)
00340   // and a RooArgSet of the projDeps.
00341   //
00342   // The projDeps will not be normalized over when calculating the SWeights
00343   // and will be considered parameters, not observables.
00344   //
00345   // The SPlot will contain two new variables for each specie of name "varname":
00346   //
00347   // L_varname is the value of the pdf for the variable "varname" at values of this event
00348   // varname_sw is the value of the sWeight for the variable "varname" for this event
00349   
00350   // Find Parameters in the PDF to be considered fixed when calculating the SWeights
00351   // and be sure to NOT include the yields in that list
00352   //
00353 
00354 
00355   RooFit::MsgLevel currentLevel =  RooMsgService::instance().globalKillBelow();
00356   
00357   RooArgList* constParameters = (RooArgList*)pdf->getParameters(fSData) ;
00358   constParameters->remove(yieldsTmp, kTRUE, kTRUE);
00359   
00360 
00361   // Set these parameters constant and store them so they can later
00362   // be set to not constant
00363   std::vector<RooRealVar*> constVarHolder;  
00364 
00365   for(Int_t i = 0; i < constParameters->getSize(); i++) 
00366     {
00367       RooRealVar* varTemp = ( dynamic_cast<RooRealVar*>( constParameters->at(i) ) );
00368       if(varTemp &&  varTemp->isConstant() == 0 )
00369         {
00370           varTemp->setConstant();
00371           constVarHolder.push_back(varTemp);
00372         }
00373     }
00374   
00375   // Fit yields to the data with all other variables held constant
00376   // This is necessary because SPlot assumes the yields minimixe -Log(likelihood)
00377 
00378   pdf->fitTo(*fSData, RooFit::Extended(kTRUE), RooFit::SumW2Error(kTRUE), RooFit::PrintLevel(-1), RooFit::PrintEvalErrors(-1) );
00379 
00380   // Hold the value of the fitted yields
00381   std::vector<double> yieldsHolder;
00382   
00383   for(Int_t i = 0; i < yieldsTmp.getSize(); i++)   
00384     yieldsHolder.push_back( ((RooRealVar*) yieldsTmp.at(i))->getVal());
00385   
00386   Int_t nspec = yieldsTmp.getSize();
00387   RooArgList yields = *(RooArgList*)yieldsTmp.snapshot(kFALSE);
00388   
00389   if(currentLevel <= RooFit::DEBUG)
00390     {
00391       coutI(InputArguments) << "Printing Yields" << endl;
00392       yields.Print();
00393     }
00394   
00395   // The list of variables to normalize over when calculating PDF values.
00396 
00397   RooArgSet vars(*fSData->get() );
00398   vars.remove(projDeps, kTRUE, kTRUE);
00399   
00400   // Attach data set
00401 
00402   // const_cast<RooAbsPdf*>(pdf)->attachDataSet(*fSData);
00403 
00404   pdf->attachDataSet(*fSData);
00405 
00406   // first calculate the pdf values for all species and all events
00407   std::vector<RooRealVar*> yieldvars ;
00408   RooArgSet* parameters = pdf->getParameters(fSData) ;
00409 
00410   std::vector<Double_t> yieldvalues ;
00411   for (Int_t k = 0; k < nspec; ++k) 
00412     {
00413       RooRealVar* thisyield = dynamic_cast<RooRealVar*>(yields.at(k)) ;
00414       if (thisyield) { 
00415          RooRealVar* yieldinpdf = dynamic_cast<RooRealVar*>(parameters->find(thisyield->GetName() )) ;
00416 
00417          if (yieldinpdf) { 
00418             coutI(InputArguments)<< "yield in pdf: " << yieldinpdf->GetName() << " " << thisyield->getVal() << endl;
00419       
00420             yieldvars.push_back(yieldinpdf) ;
00421             yieldvalues.push_back(thisyield->getVal()) ;
00422          }
00423       }
00424     }
00425   
00426   Int_t numevents = fSData->numEntries() ;
00427   
00428   std::vector<std::vector<Double_t> > pdfvalues(numevents,std::vector<Double_t>(nspec,0)) ; 
00429 
00430       
00431   // set all yield to zero
00432   for(Int_t m=0; m<nspec; ++m) yieldvars[m]->setVal(0) ;
00433    
00434 
00435   // For every event and for every specie,
00436   // calculate the value of the component pdf for that specie
00437   // by setting the yield of that specie to 1
00438   // and all others to 0.  Evaluate the pdf for each event
00439   // and store the values.
00440 
00441   for (Int_t ievt = 0; ievt <numevents; ievt++) 
00442     {
00443       //   if (ievt % 100 == 0) 
00444       //  coutP(Eval)  << ".";
00445 
00446 
00447       //FIX THIS PART, EVALUATION PROGRESS!!
00448 
00449       RooStats::SetParameters(fSData->get(ievt), pdf->getVariables()); 
00450 
00451       //   RooArgSet row(*fSData->get(ievt));
00452        
00453       for(Int_t k = 0; k < nspec; ++k) 
00454         {
00455           //Check that range of yields is at least (0,1), and fix otherwise
00456           if(yieldvars[k]->getMin() > 0) 
00457             {
00458               coutW(InputArguments)  << "Minimum Range for " << yieldvars[k]->GetName() << " must be 0.  ";
00459               coutW(InputArguments)  << "Setting min range to 0" << std::endl;
00460               yieldvars[k]->setMin(0);
00461             }
00462 
00463           if(yieldvars[k]->getMax() < 1) 
00464             {
00465               coutW(InputArguments)  << "Maximum Range for " << yieldvars[k]->GetName() << " must be 1.  ";
00466               coutW(InputArguments)  << "Setting max range to 1" << std::endl;
00467               yieldvars[k]->setMax(1);
00468             }
00469 
00470           // set this yield to 1
00471           yieldvars[k]->setVal( 1 ) ;
00472           // evaluate the pdf    
00473           Double_t f_k = pdf->getVal(&vars) ;
00474           pdfvalues[ievt][k] = f_k ;
00475           if( !(f_k>1 || f_k<1) ) 
00476             coutW(InputArguments) << "Strange pdf value: " << ievt << " " << k << " " << f_k << std::endl ;
00477           yieldvars[k]->setVal( 0 ) ;
00478         }
00479     }
00480    
00481   // check that the likelihood normalization is fine
00482   std::vector<Double_t> norm(nspec,0) ;
00483   for (Int_t ievt = 0; ievt <numevents ; ievt++) 
00484     {
00485       Double_t dnorm(0) ;
00486       for(Int_t k=0; k<nspec; ++k) dnorm += yieldvalues[k] * pdfvalues[ievt][k] ;
00487       for(Int_t j=0; j<nspec; ++j) norm[j] += pdfvalues[ievt][j]/dnorm ;
00488     }
00489    
00490   coutI(Contents) << "likelihood norms: "  ;
00491 
00492   for(Int_t k=0; k<nspec; ++k)  coutI(Contents) << norm[k] << " " ;
00493   coutI(Contents) << std::endl ;
00494    
00495   // Make a TMatrixD to hold the covariance matrix.
00496   TMatrixD covInv(nspec, nspec);
00497   for (Int_t i = 0; i < nspec; i++) for (Int_t j = 0; j < nspec; j++) covInv(i,j) = 0;
00498    
00499   coutI(Contents) << "Calculating covariance matrix";
00500 
00501 
00502   // Calculate the inverse covariance matrix, using weights
00503   for (Int_t ievt = 0; ievt < numevents; ++ievt) 
00504     {
00505       
00506       fSData->get(ievt) ;
00507      
00508       // Calculate contribution to the inverse of the covariance
00509       // matrix. See BAD 509 V2 eqn. 15
00510 
00511       // Sum for the denominator
00512       Double_t dsum(0);
00513       for(Int_t k = 0; k < nspec; ++k) 
00514         dsum += pdfvalues[ievt][k] * yieldvalues[k] ;
00515        
00516       for(Int_t n=0; n<nspec; ++n)
00517         for(Int_t j=0; j<nspec; ++j) 
00518           {
00519             if(includeWeights == kTRUE)
00520               covInv(n,j) +=  fSData->weight()*pdfvalues[ievt][n]*pdfvalues[ievt][j]/(dsum*dsum) ;
00521             else 
00522               covInv(n,j) +=  pdfvalues[ievt][n]*pdfvalues[ievt][j]/(dsum*dsum) ;
00523           }
00524 
00525       //ADDED WEIGHT ABOVE
00526 
00527     }
00528    
00529   // Covariance inverse should now be computed!
00530    
00531   // Invert to get the covariance matrix
00532   if (covInv.Determinant() <=0) 
00533     {
00534       coutE(Eval) << "SPlot Error: covariance matrix is singular; I can't invert it!" << std::endl;
00535       covInv.Print();
00536       return;
00537     }
00538    
00539   TMatrixD covMatrix(TMatrixD::kInverted,covInv);
00540    
00541   //check cov normalization
00542   if(currentLevel <= RooFit::DEBUG)
00543     {
00544       coutI(Eval) << "Checking Likelihood normalization:  " << std::endl;
00545       coutI(Eval) << "Yield of specie  Sum of Row in Matrix   Norm" << std::endl; 
00546       for(Int_t k=0; k<nspec; ++k) 
00547         {
00548           Double_t covnorm(0) ;
00549           for(Int_t m=0; m<nspec; ++m) covnorm += covInv[k][m]*yieldvalues[m] ;
00550           Double_t sumrow(0) ;
00551           for(Int_t m = 0; m < nspec; ++m) sumrow += covMatrix[k][m] ;
00552           coutI(Eval)  << yieldvalues[k] << " " << sumrow << " " << covnorm << endl ;
00553         }
00554     }
00555   
00556   // calculate for each event the sWeight (BAD 509 V2 eq. 21)
00557   coutI(Eval) << "Calculating sWeight" << std::endl;
00558   std::vector<RooRealVar*> sweightvec ;
00559   std::vector<RooRealVar*> pdfvec ;  
00560   RooArgSet sweightset ;
00561    
00562   // Create and label the variables
00563   // used to store the SWeights
00564 
00565   fSWeightVars.Clear();
00566 
00567   for(Int_t k=0; k<nspec; ++k) 
00568     {
00569        std::string wname = std::string(yieldvars[k]->GetName()) + "_sw";
00570        RooRealVar* var = new RooRealVar(wname.c_str(),wname.c_str(),0) ;
00571        sweightvec.push_back( var) ;
00572        sweightset.add(*var) ;
00573        fSWeightVars.add(*var);
00574     
00575        wname = "L_" + std::string(yieldvars[k]->GetName());
00576        var = new RooRealVar(wname.c_str(),wname.c_str(),0) ;
00577        pdfvec.push_back( var) ;
00578        sweightset.add(*var) ;
00579     }
00580 
00581   // Create and fill a RooDataSet
00582   // with the SWeights
00583  
00584   RooDataSet* sWeightData = new RooDataSet("dataset", "dataset with sWeights", sweightset);
00585   
00586   for(Int_t ievt = 0; ievt < numevents; ++ievt) 
00587     {
00588 
00589       fSData->get(ievt) ;
00590        
00591       // sum for denominator
00592       Double_t dsum(0);
00593       for(Int_t k = 0; k < nspec; ++k)   dsum +=  pdfvalues[ievt][k] * yieldvalues[k] ;
00594       // covariance weighted pdf for each specief
00595       for(Int_t n=0; n<nspec; ++n) 
00596         {
00597           Double_t nsum(0) ;
00598           for(Int_t j=0; j<nspec; ++j) nsum += covMatrix(n,j) * pdfvalues[ievt][j] ;     
00599            
00600 
00601           //Add the sWeights here!!
00602           //Include weights,
00603           //ie events weights are absorbed into sWeight
00604 
00605 
00606           if(includeWeights == kTRUE) sweightvec[n]->setVal(fSData->weight() * nsum/dsum) ;
00607           else  sweightvec[n]->setVal( nsum/dsum) ;
00608 
00609           pdfvec[n]->setVal( pdfvalues[ievt][n] ) ;
00610 
00611           if( !(fabs(nsum/dsum)>=0 ) ) 
00612             {
00613               coutE(Contents) << "error: " << nsum/dsum << endl ;
00614               return;
00615             }
00616         }
00617       
00618       sWeightData->add(sweightset) ;
00619     }
00620 
00621     
00622   // Add the SWeights to the original data set
00623 
00624   fSData->merge(sWeightData);
00625 
00626   //Restore yield values
00627 
00628   for(Int_t i = 0; i < yieldsTmp.getSize(); i++) 
00629     ((RooRealVar*) yieldsTmp.at(i))->setVal(yieldsHolder.at(i));
00630    
00631   //Make any variables that were forced to constant no longer constant
00632 
00633   for(Int_t i=0; i < (Int_t) constVarHolder.size(); i++) 
00634     constVarHolder.at(i)->setConstant(kFALSE);
00635 
00636   return;
00637 
00638 }

Generated on Tue Jul 5 15:14:37 2011 for ROOT_528-00b_version by  doxygen 1.5.1