RooAbsTestStatistic.cxx

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * Project: RooFit                                                           *
00003  * Package: RooFitCore                                                       *
00004  * @(#)root/roofitcore:$Id: RooAbsTestStatistic.cxx 37380 2010-12-07 21:26:55Z wouter $
00005  * Authors:                                                                  *
00006  *   WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu       *
00007  *   DK, David Kirkby,    UC Irvine,         dkirkby@uci.edu                 *
00008  *                                                                           *
00009  * Copyright (c) 2000-2005, Regents of the University of California          *
00010  *                          and Stanford University. All rights reserved.    *
00011  *                                                                           *
00012  * Redistribution and use in source and binary forms,                        *
00013  * with or without modification, are permitted according to the terms        *
00014  * listed in LICENSE (http://roofit.sourceforge.net/license.txt)             *
00015  *****************************************************************************/
00016 
00017 //////////////////////////////////////////////////////////////////////////////
00018 //
00019 // BEGIN_HTML
00020 // RooAbsTestStatistic is the abstract base class for all test
00021 // statistics. Test statistics that evaluate the PDF at each data
00022 // point should inherit from the RooAbsOptTestStatistic class which
00023 // implements several generic optimizations that can be done for such
00024 // quantities.
00025 //
00026 // This test statistic base class organizes calculation of test
00027 // statistic values for RooSimultaneous PDF as a combination of test
00028 // statistic values for the PDF components of the simultaneous PDF and
00029 // organizes multi-processor parallel calculation of test statistic
00030 // values. For the latter, the test statistic value is calculated in
00031 // partitions in parallel executing processes and a posteriori
00032 // combined in the main thread.
00033 // END_HTML
00034 //
00035 
00036 
00037 #include "RooFit.h"
00038 #include "Riostream.h"
00039 
00040 #include "RooAbsTestStatistic.h"
00041 #include "RooAbsPdf.h"
00042 #include "RooSimultaneous.h"
00043 #include "RooAbsData.h"
00044 #include "RooArgSet.h"
00045 #include "RooRealVar.h"
00046 #include "RooNLLVar.h"
00047 #include "RooRealMPFE.h"
00048 #include "RooErrorHandler.h"
00049 #include "RooMsgService.h"
00050 
00051 #include <string>
00052 
00053 ClassImp(RooAbsTestStatistic)
00054 ;
00055 
00056 
00057 //_____________________________________________________________________________
00058 RooAbsTestStatistic::RooAbsTestStatistic()
00059 {
00060   // Default constructor
00061   _func = 0 ;
00062   _data = 0 ;
00063   _projDeps = 0 ;
00064   _init = kFALSE ;
00065   _gofArray = 0 ;
00066   _mpfeArray = 0 ;
00067   _projDeps = 0 ;
00068 }
00069 
00070 
00071 
00072 //_____________________________________________________________________________
00073 RooAbsTestStatistic::RooAbsTestStatistic(const char *name, const char *title, RooAbsReal& real, RooAbsData& data,
00074                                          const RooArgSet& projDeps, const char* rangeName, const char* addCoefRangeName,
00075                                          Int_t nCPU, Bool_t interleave, Bool_t verbose, Bool_t splitCutRange) :
00076   RooAbsReal(name,title),
00077   _paramSet("paramSet","Set of parameters",this),
00078   _func(&real),
00079   _data(&data),
00080   _projDeps((RooArgSet*)projDeps.Clone()),
00081   _rangeName(rangeName?rangeName:""),
00082   _addCoefRangeName(addCoefRangeName?addCoefRangeName:""),
00083   _splitRange(splitCutRange),
00084   _simCount(1),
00085   _verbose(verbose),
00086   _nGof(0),
00087   _gofArray(0),
00088   _nCPU(nCPU),
00089   _mpfeArray(0),
00090   _mpinterl(interleave)
00091 {
00092   // Constructor taking function (real), a dataset (data), a set of projected observables (projSet). If
00093   // rangeName is not null, only events in the dataset inside the range will be used in the test
00094   // statistic calculation. If addCoefRangeName is not null, all RooAddPdf component of 'real' will be
00095   // instructed to fix their fraction definitions to the given named range. If nCPU is greater than
00096   // 1 the test statistic calculation will be paralellized over multiple processes. By default the data
00097   // is split with 'bulk' partitioning (each process calculates a contigious block of fraction 1/nCPU
00098   // of the data). For binned data this approach may be suboptimal as the number of bins with >0 entries
00099   // in each processing block many vary greatly thereby distributing the workload rather unevenly.
00100   // If interleave is set to true, the interleave partitioning strategy is used where each partition
00101   // i takes all bins for which (ibin % ncpu == i) which is more likely to result in an even workload.
00102   // If splitCutRange is true, a different rangeName constructed as rangeName_{catName} will be used
00103   // as range definition for each index state of a RooSimultaneous
00104 
00105 
00106   // Register all parameters as servers
00107   RooArgSet* params = real.getParameters(&data) ;
00108   _paramSet.add(*params) ;
00109   delete params ;
00110 
00111   if (_nCPU>1) {
00112 
00113     _gofOpMode = MPMaster ;
00114 
00115   } else {
00116 
00117     // Determine if RooAbsReal is a RooSimultaneous
00118     Bool_t simMode = dynamic_cast<RooSimultaneous*>(&real)?kTRUE:kFALSE ;
00119 
00120     if (simMode) {
00121       _gofOpMode = SimMaster ;
00122     } else {
00123       _gofOpMode = Slave ;
00124     }
00125   }
00126 
00127   _setNum = 0 ;
00128   _numSets = 1 ;
00129   _init = kFALSE ;
00130   _nEvents = data.numEntries() ;
00131 }
00132 
00133 
00134 
00135 //_____________________________________________________________________________
00136 RooAbsTestStatistic::RooAbsTestStatistic(const RooAbsTestStatistic& other, const char* name) :
00137   RooAbsReal(other,name),
00138   _paramSet("paramSet","Set of parameters",this),
00139   _func(other._func),
00140   _data(other._data),
00141   _projDeps((RooArgSet*)other._projDeps->Clone()),
00142   _rangeName(other._rangeName),
00143   _addCoefRangeName(other._addCoefRangeName),
00144   _splitRange(other._splitRange),
00145   _simCount(1),
00146   _verbose(other._verbose),
00147   _nGof(0),
00148   _gofArray(0),
00149   _nCPU(other._nCPU),
00150   _mpfeArray(0),
00151   _mpinterl(other._mpinterl)
00152 {
00153   // Copy constructor
00154 
00155   // Our parameters are those of original
00156   _paramSet.add(other._paramSet) ;
00157 
00158   if (_nCPU>1) {
00159 
00160     _gofOpMode = MPMaster ;
00161 
00162   } else {
00163 
00164     // Determine if RooAbsReal is a RooSimultaneous
00165     Bool_t simMode = dynamic_cast<RooSimultaneous*>(_func)?kTRUE:kFALSE ;
00166 
00167     if (simMode) {
00168       _gofOpMode = SimMaster ;
00169     } else {
00170       _gofOpMode = Slave ;
00171     }
00172   }
00173 
00174   _setNum = 0 ;
00175   _numSets = 1 ;
00176   _init = kFALSE ;
00177   _nEvents = _data->numEntries() ;
00178 
00179 
00180 }
00181 
00182 
00183 
00184 //_____________________________________________________________________________
00185 RooAbsTestStatistic::~RooAbsTestStatistic()
00186 {
00187   // Destructor
00188 
00189   if (_gofOpMode==MPMaster && _init) {
00190     Int_t i ;
00191     for (i=0 ; i<_nCPU ; i++) {
00192       delete _mpfeArray[i] ;
00193     }
00194     delete[] _mpfeArray ;
00195   }
00196 
00197   if (_gofOpMode==SimMaster && _init) {
00198     Int_t i ;
00199     for (i=0 ; i<_nGof ; i++) {
00200       delete _gofArray[i] ;
00201     }
00202     delete[] _gofArray ;
00203   }
00204 
00205   if (_projDeps) {
00206     delete _projDeps ;
00207   }
00208 }
00209 
00210 
00211 
00212 //_____________________________________________________________________________
00213 Double_t RooAbsTestStatistic::evaluate() const
00214 {
00215   // Calculates and return value of test statistic. If the test statistic
00216   // is calculated from on a RooSimultaneous, the test statistic calculation
00217   // is performed separately on each simultaneous p.d.f component and associated
00218   // data and then combined. If the test statistic calculation is parallelized
00219   // partitions are calculated in nCPU processes and a posteriori combined.
00220 
00221   // One-time Initialization
00222   if (!_init) {
00223     const_cast<RooAbsTestStatistic*>(this)->initialize() ;
00224   }
00225 
00226   if (_gofOpMode==SimMaster) {
00227 
00228     // Evaluate array of owned GOF objects
00229     Double_t ret = combinedValue((RooAbsReal**)_gofArray,_nGof) ;
00230 
00231     // Only apply global normalization if SimMaster doesn't have MP master
00232     if (numSets()==1) {
00233 //       cout << "RooAbsTestStatistic::evaluate(" << GetName() << ") A dividing ret= " << ret << " by globalNorm of " << globalNormalization() << endl ;
00234       ret /= globalNormalization() ;
00235     }
00236 
00237     return ret ;
00238 
00239   } else if (_gofOpMode==MPMaster) {
00240 
00241     // Start calculations in parallel
00242     Int_t i ;
00243     for (i=0 ; i<_nCPU ; i++) {
00244       _mpfeArray[i]->calculate() ;
00245     }
00246     Double_t ret = combinedValue((RooAbsReal**)_mpfeArray,_nCPU)/globalNormalization() ;
00247     return ret ;
00248 
00249   } else {
00250 
00251     // Evaluate as straight FUNC
00252     Int_t nFirst, nLast, nStep ;
00253     if (_mpinterl) {
00254       nFirst = _setNum ;
00255       nLast  = _nEvents ;
00256       nStep  = _numSets ;
00257     } else {
00258       nFirst = _nEvents * _setNum / _numSets ;
00259       nLast  = _nEvents * (_setNum+1) / _numSets ;
00260       nStep  = 1 ;
00261     }
00262 
00263 
00264     //cout << "nCPU = " << _nCPU << (_mpinterl?"INTERLEAVE":"BULK") << " nFirst = " << nFirst << " nLast = " << nLast << " nStep = " << nStep << endl ;
00265 
00266     Double_t ret =  evaluatePartition(nFirst,nLast,nStep) ;
00267     if (numSets()==1) {
00268 //       cout << "RooAbsTestStatistic::evaluate(" << GetName() << ") B dividing ret= " << ret << " by globalNorm of " << globalNormalization() << endl ;
00269       ret /= globalNormalization() ;
00270     }
00271 
00272     return ret ;
00273 
00274   }
00275 }
00276 
00277 
00278 
00279 //_____________________________________________________________________________
00280 Bool_t RooAbsTestStatistic::initialize()
00281 {
00282   // One-time initialization of the test statistic. Setup
00283   // infrastructure for simultaneous p.d.f processing and/or
00284   // parallelized processing if requested
00285 
00286   if (_init) return kFALSE ;
00287 
00288   if (_gofOpMode==MPMaster) {
00289     initMPMode(_func,_data,_projDeps,_rangeName.size()?_rangeName.c_str():0,_addCoefRangeName.size()?_addCoefRangeName.c_str():0) ;
00290   } else if (_gofOpMode==SimMaster) {
00291     initSimMode((RooSimultaneous*)_func,_data,_projDeps,_rangeName.size()?_rangeName.c_str():0,_addCoefRangeName.size()?_addCoefRangeName.c_str():0) ;
00292   }
00293   _init = kTRUE ;
00294   return kFALSE ;
00295 }
00296 
00297 
00298 
00299 //_____________________________________________________________________________
00300 Bool_t RooAbsTestStatistic::redirectServersHook(const RooAbsCollection& newServerList, Bool_t mustReplaceAll, Bool_t nameChange, Bool_t)
00301 {
00302   // Forward server redirect calls to component test statistics
00303 
00304   if (_gofOpMode==SimMaster && _gofArray) {
00305     // Forward to slaves
00306     Int_t i ;
00307     for (i=0 ; i<_nGof ; i++) {
00308       if (_gofArray[i]) {
00309         _gofArray[i]->recursiveRedirectServers(newServerList,mustReplaceAll,nameChange) ;
00310       }
00311     }
00312   } else if (_gofOpMode==MPMaster && _mpfeArray) {
00313 
00314     // Forward to slaves
00315     Int_t i ;
00316     for (i=0 ; i<_nCPU ; i++) {
00317       if (_mpfeArray[i]) {
00318         _mpfeArray[i]->recursiveRedirectServers(newServerList,mustReplaceAll,nameChange) ;
00319 //      cout << "redirecting servers on " << _mpfeArray[i]->GetName() << endl ;
00320       }
00321     }
00322 
00323   }
00324   return kFALSE ;
00325 }
00326 
00327 
00328 
00329 //_____________________________________________________________________________
00330 void RooAbsTestStatistic::printCompactTreeHook(ostream& os, const char* indent)
00331 {
00332   // Add extra information on component test statistics when printing
00333   // itself as part of a tree structure
00334 
00335   if (_gofOpMode==SimMaster) {
00336     // Forward to slaves
00337     Int_t i ;
00338     os << indent << "RooAbsTestStatistic begin GOF contents" << endl ;
00339     for (i=0 ; i<_nGof ; i++) {
00340       if (_gofArray[i]) {
00341         TString indent2(indent) ;
00342         indent2 += Form("[%d] ",i) ;
00343         _gofArray[i]->printCompactTreeHook(os,indent2) ;
00344       }
00345     }
00346     os << indent << "RooAbsTestStatistic end GOF contents" << endl ;
00347   } else if (_gofOpMode==MPMaster) {
00348     // WVE implement this
00349   }
00350 }
00351 
00352 
00353 
00354 //_____________________________________________________________________________
00355 void RooAbsTestStatistic::constOptimizeTestStatistic(ConstOpCode opcode)
00356 {
00357   // Forward constant term optimization management calls to component
00358   // test statistics
00359   Int_t i ;
00360   initialize() ;
00361   if (_gofOpMode==SimMaster) {
00362     // Forward to slaves
00363     for (i=0 ; i<_nGof ; i++) {
00364       if (_gofArray[i]) _gofArray[i]->constOptimizeTestStatistic(opcode) ;
00365     }
00366   } else if (_gofOpMode==MPMaster) {
00367     for (i=0 ; i<_nCPU ; i++) {
00368       _mpfeArray[i]->constOptimizeTestStatistic(opcode) ;
00369     }
00370   }
00371 }
00372 
00373 
00374 
00375 //_____________________________________________________________________________
00376 void RooAbsTestStatistic::setMPSet(Int_t inSetNum, Int_t inNumSets)
00377 {
00378   // Set MultiProcessor set number identification of this instance
00379 
00380   _setNum = inSetNum ; _numSets = inNumSets ;
00381   if (_gofOpMode==SimMaster) {
00382     // Forward to slaves
00383     initialize() ;
00384     Int_t i ;
00385     for (i=0 ; i<_nGof ; i++) {
00386       if (_gofArray[i]) _gofArray[i]->setMPSet(inSetNum,inNumSets) ;
00387     }
00388   }
00389 }
00390 
00391 
00392 
00393 //_____________________________________________________________________________
00394 void RooAbsTestStatistic::initMPMode(RooAbsReal* real, RooAbsData* data, const RooArgSet* projDeps, const char* rangeName, const char* addCoefRangeName)
00395 {
00396   // Initialize multi-processor calculation mode. Create component test statistics in separate
00397   // processed that are connected to this process through a RooAbsRealMPFE front-end class.
00398 
00399   Int_t i ;
00400   _mpfeArray = new pRooRealMPFE[_nCPU] ;
00401 
00402   // Create proto-goodness-of-fit
00403   RooAbsTestStatistic* gof = create(GetName(),GetTitle(),*real,*data,*projDeps,rangeName,addCoefRangeName,1,_mpinterl,_verbose,_splitRange) ;
00404   gof->recursiveRedirectServers(_paramSet) ;
00405 
00406   for (i=0 ; i<_nCPU ; i++) {
00407 
00408     gof->setMPSet(i,_nCPU) ;
00409     gof->SetName(Form("%s_GOF%d",GetName(),i)) ;
00410     gof->SetTitle(Form("%s_GOF%d",GetTitle(),i)) ;
00411 
00412     Bool_t doInline = (i==_nCPU-1) ;
00413     if (!doInline) coutI(Eval) << "RooAbsTestStatistic::initMPMode: starting remote server process #" << i << endl ;
00414     _mpfeArray[i] = new RooRealMPFE(Form("%s_%lx_MPFE%d",GetName(),(ULong_t)this,i),Form("%s_%lx_MPFE%d",GetTitle(),(ULong_t)this,i),*gof,doInline) ;
00415     _mpfeArray[i]->initialize() ;
00416   }
00417   //cout << "initMPMode --- done" << endl ;
00418   return ;
00419 }
00420 
00421 
00422 
00423 //_____________________________________________________________________________
00424 void RooAbsTestStatistic::initSimMode(RooSimultaneous* simpdf, RooAbsData* data,
00425                                       const RooArgSet* projDeps, const char* rangeName, const char* addCoefRangeName)
00426 {
00427   // Initialize simultaneous p.d.f processing mode. Strip simultaneous
00428   // p.d.f into individual components, split dataset in subset
00429   // matching each component and create component test statistics for
00430   // each of them.
00431 
00432 
00433   RooAbsCategoryLValue& simCat = (RooAbsCategoryLValue&) simpdf->indexCat() ;
00434 
00435 
00436   TString simCatName(simCat.GetName()) ;
00437   TList* dsetList = const_cast<RooAbsData*>(data)->split(simCat,processEmptyDataSets()) ;
00438   if (!dsetList) {
00439     coutE(Fitting) << "RooAbsTestStatistic::initSimMode(" << GetName() << ") ERROR: index category of simultaneous pdf is missing in dataset, aborting" << endl ;
00440     throw std::string("RooAbsTestStatistic::initSimMode() ERROR, index category of simultaneous pdf is missing in dataset, aborting") ;
00441     RooErrorHandler::softAbort() ;
00442   }
00443 
00444   // Count number of used states
00445   Int_t n(0) ;
00446   _nGof = 0 ;
00447   RooCatType* type ;
00448   TIterator* catIter = simCat.typeIterator() ;
00449   while((type=(RooCatType*)catIter->Next())){
00450 
00451     // Retrieve the PDF for this simCat state
00452     RooAbsPdf* pdf =  simpdf->getPdf(type->GetName()) ;
00453     RooAbsData* dset = (RooAbsData*) dsetList->FindObject(type->GetName()) ;
00454 
00455     if (pdf && dset && (dset->sumEntries()!=0. || processEmptyDataSets() )) {
00456       _nGof++ ;
00457     }
00458   }
00459 
00460   // Allocate arrays
00461   _gofArray = new pRooAbsTestStatistic[_nGof] ;
00462 
00463   // Create array of regular fit contexts, containing subset of data and single fitCat PDF
00464   catIter->Reset() ;
00465   while((type=(RooCatType*)catIter->Next())){
00466 
00467     // Retrieve the PDF for this simCat state
00468     RooAbsPdf* pdf =  simpdf->getPdf(type->GetName()) ;
00469     RooAbsData* dset = (RooAbsData*) dsetList->FindObject(type->GetName()) ;
00470 
00471     if (pdf && dset && (dset->sumEntries()!=0. || processEmptyDataSets())) {
00472       coutI(Fitting) << "RooAbsTestStatistic::initSimMode: creating slave calculator #" << n << " for state " << type->GetName()
00473                      << " (" << dset->numEntries() << " dataset entries)" << endl ;
00474 
00475       if (_splitRange && rangeName) {
00476         _gofArray[n] = create(type->GetName(),type->GetName(),*pdf,*dset,*projDeps,
00477                               Form("%s_%s",rangeName,type->GetName()),addCoefRangeName,_nCPU*(_mpinterl?-1:1),_mpinterl,_verbose,_splitRange) ;
00478       } else {
00479         _gofArray[n] = create(type->GetName(),type->GetName(),*pdf,*dset,*projDeps,
00480                               rangeName,addCoefRangeName,_nCPU,_mpinterl,_verbose,_splitRange) ;
00481       }
00482       _gofArray[n]->setSimCount(_nGof) ;
00483 
00484       // Servers may have been redirected between instantiation and (deferred) initialization
00485       _gofArray[n]->recursiveRedirectServers(_paramSet) ;
00486       n++ ;
00487     } else {
00488       if ((!dset || (dset->sumEntries()==0. && !processEmptyDataSets()) ) && pdf) {
00489         if (_verbose) {
00490           coutI(Fitting) << "RooAbsTestStatistic::initSimMode: state " << type->GetName()
00491                          << " has no data entries, no slave calculator created" << endl ;
00492         }
00493       }
00494     }
00495   }
00496 
00497   dsetList->Delete() ;
00498   delete dsetList ;
00499   delete catIter ;
00500 }
00501 
00502 
00503 
00504 

Generated on Tue Jul 5 15:05:56 2011 for ROOT_528-00b_version by  doxygen 1.5.1