RooNDKeysPdf.cxx

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * Project: RooFit                                                           *
00003  * Package: RooFitModels                                                     *
00004  *    File: $Id: RooNDKeysPdf.cxx 31258 2009-11-17 22:41:06Z wouter $
00005  * Authors:                                                                  *
00006  *   Max Baak, CERN, mbaak@cern.ch *
00007  *                                                                           *
00008  * Redistribution and use in source and binary forms,                        *
00009  * with or without modification, are permitted according to the terms        *
00010  * listed in LICENSE (http://roofit.sourceforge.net/license.txt)             *
00011  *****************************************************************************/
00012 #include <iostream>
00013 #include <algorithm>
00014 #include <string>
00015 
00016 #include "TMath.h"
00017 #include "TMatrixDSymEigen.h"
00018 #include "RooNDKeysPdf.h"
00019 #include "RooAbsReal.h"
00020 #include "RooRealVar.h"
00021 #include "RooRandom.h"
00022 #include "RooDataSet.h"
00023 #include "RooHist.h"
00024 #include "RooMsgService.h"
00025 
00026 //////////////////////////////////////////////////////////////////////////////
00027 //
00028 // BEGIN_HTML
00029 // Generic N-dimensional implementation of a kernel estimation p.d.f. This p.d.f. models the distribution
00030 // of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
00031 // each contributing 1/N to the total integral of the p.d.f.
00032 // <p>
00033 // If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
00034 // local density of events, i.e. narrow for regions with high event density to preserve details and
00035 // wide for regions with log event density to promote smoothness. The details of the general algorithm
00036 // are described in the following paper: 
00037 // <p>
00038 // Cranmer KS, Kernel Estimation in High-Energy Physics.  
00039 //             Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
00040 // <p>
00041 // For multi-dimensional datasets, the kernels are modeled by multidimensional Gaussians. The kernels are 
00042 // constructed such that they reflect the correlation coefficients between the observables
00043 // in the input dataset.
00044 // END_HTML
00045 //
00046 
00047 using namespace std;
00048 
00049 ClassImp(RooNDKeysPdf)
00050 
00051 
00052 
00053 //_____________________________________________________________________________
00054 RooNDKeysPdf::RooNDKeysPdf(const char *name, const char *title,
00055                            const RooArgList& varList, RooDataSet& data,
00056                            TString options, Double_t rho, Double_t nSigma, Bool_t rotate) : 
00057   RooAbsPdf(name,title),
00058   _varList("varList","List of variables",this),
00059   _data(data),
00060   _options(options),
00061   _widthFactor(rho),
00062   _nSigma(nSigma),
00063   _weights(&_weights0),
00064   _rotate(rotate)
00065 {
00066   // Construct N-dimensional kernel estimation p.d.f. in observables 'varList'
00067   // from dataset 'data'. Options can be 
00068   //
00069   //   'a' = Use adaptive kernels (width varies with local event density)
00070   //   'm' = Mirror data points over observable boundaries. Improves modeling
00071   //         behavior at edges for distributions that are not close to zero
00072   //         at edge
00073   //   'd' = Debug flag
00074   //   'v' = Verbose flag
00075   // 
00076   // The parameter rho (default = 1) provides an overall scale factor that can
00077   // be applied to the bandwith calculated for each kernel. The nSigma parameter
00078   // determines the size of the box that is used to search for contributing kernels
00079   // around a given point in observable space. The nSigma parameters is used
00080   // in case of non-adaptive bandwidths and for the 1st non-adaptive pass for
00081   // the calculation of adaptive keys p.d.f.s.
00082   //
00083   // The optional weight arguments allows to specify an observable or function
00084   // expression in observables that specifies the weight of each event.
00085 
00086   // Constructor
00087   _varItr    = _varList.createIterator() ;
00088 
00089   TIterator* varItr = varList.createIterator() ;
00090   RooAbsArg* var ;
00091   for (Int_t i=0; (var = (RooAbsArg*)varItr->Next()); ++i) {
00092     if (!dynamic_cast<RooAbsReal*>(var)) {
00093       coutE(InputArguments) << "RooNDKeysPdf::ctor(" << GetName() << ") ERROR: variable " << var->GetName() 
00094                             << " is not of type RooAbsReal" << endl ;
00095       assert(0) ;
00096     }
00097     _varList.add(*var) ;
00098     _varName.push_back(var->GetName());
00099   }
00100   delete varItr ;
00101 
00102   createPdf();
00103 }
00104 
00105 
00106 
00107 //_____________________________________________________________________________
00108 RooNDKeysPdf::RooNDKeysPdf(const char *name, const char *title,
00109                            RooAbsReal& x, RooDataSet& data,
00110                            Mirror mirror, Double_t rho, Double_t nSigma, Bool_t rotate) : 
00111   RooAbsPdf(name,title),
00112   _varList("varList","List of variables",this),
00113   _data(data),
00114   _options("a"),
00115   _widthFactor(rho),
00116   _nSigma(nSigma), 
00117   _weights(&_weights0),
00118   _rotate(rotate)
00119 { 
00120   // Backward compatibility constructor for (1-dim) RooKeysPdf. If you are a new user,
00121   // please use the first constructor form.
00122 
00123   _varItr = _varList.createIterator() ;
00124   
00125   _varList.add(x) ;
00126   _varName.push_back(x.GetName());
00127 
00128   if (mirror!=NoMirror) {
00129     if (mirror!=MirrorBoth) 
00130       coutW(InputArguments) << "RooNDKeysPdf::RooNDKeysPdf() : Warning : asymmetric mirror(s) no longer supported." << endl;
00131     _options="m";
00132   }
00133 
00134   createPdf();
00135 }
00136 
00137 
00138 
00139 //_____________________________________________________________________________
00140 RooNDKeysPdf::RooNDKeysPdf(const char *name, const char *title, RooAbsReal& x, RooAbsReal & y,
00141                            RooDataSet& data, TString options, Double_t rho, Double_t nSigma, Bool_t rotate) : 
00142   RooAbsPdf(name,title),
00143   _varList("varList","List of variables",this),
00144   _data(data),
00145   _options(options),
00146   _widthFactor(rho),
00147   _nSigma(nSigma),
00148   _weights(&_weights0),
00149   _rotate(rotate)
00150 { 
00151   // Backward compatibility constructor for Roo2DKeysPdf. If you are a new user,
00152   // please use the first constructor form.
00153 
00154   _varItr = _varList.createIterator() ;
00155 
00156   _varList.add(RooArgSet(x,y)) ;
00157   _varName.push_back(x.GetName());
00158   _varName.push_back(y.GetName());
00159 
00160   createPdf();
00161 }
00162 
00163 
00164 
00165 //_____________________________________________________________________________
00166 RooNDKeysPdf::RooNDKeysPdf(const RooNDKeysPdf& other, const char* name) :
00167   RooAbsPdf(other,name), 
00168   _varList("varList",this,other._varList),
00169   _data(other._data),
00170   _options(other._options),
00171   _widthFactor(other._widthFactor),
00172   _nSigma(other._nSigma),
00173   _weights(&_weights0),
00174   _rotate(other._rotate)
00175 {
00176   // Constructor
00177   _varItr      = _varList.createIterator() ;
00178 
00179   _fixedShape  = other._fixedShape;
00180   _mirror      = other._mirror;
00181   _debug       = other._debug;
00182   _verbose     = other._verbose;
00183   _sqrt2pi     = other._sqrt2pi;
00184   _nDim        = other._nDim;
00185   _nEvents     = other._nEvents;
00186   _nEventsM    = other._nEventsM;
00187   _nEventsW    = other._nEventsW;
00188   _d           = other._d;
00189   _n           = other._n;
00190   _dataPts     = other._dataPts;
00191   _dataPtsR    = other._dataPtsR;
00192   _weights0    = other._weights0;
00193   _weights1    = other._weights1;
00194   if (_options.Contains("a")) { _weights = &_weights1; }
00195   //_sortIdcs    = other._sortIdcs;
00196   _sortTVIdcs  = other._sortTVIdcs;
00197   _varName     = other._varName;
00198   _rho         = other._rho;
00199   _x           = other._x;
00200   _x0          = other._x0 ;
00201   _x1          = other._x1 ;
00202   _x2          = other._x2 ;
00203   _xDatLo      = other._xDatLo;
00204   _xDatHi      = other._xDatHi;
00205   _xDatLo3s    = other._xDatLo3s;
00206   _xDatHi3s    = other._xDatHi3s;
00207   _mean        = other._mean;
00208   _sigma       = other._sigma;
00209 
00210   // BoxInfo
00211   _netFluxZ    = other._netFluxZ;
00212   _nEventsBW   = other._nEventsBW;
00213   _nEventsBMSW = other._nEventsBMSW;
00214   _xVarLo      = other._xVarLo;
00215   _xVarHi      = other._xVarHi;
00216   _xVarLoM3s   = other._xVarLoM3s;
00217   _xVarLoP3s   = other._xVarLoP3s;
00218   _xVarHiM3s   = other._xVarHiM3s;
00219   _xVarHiP3s   = other._xVarHiP3s;
00220   _bpsIdcs     = other._bpsIdcs;
00221   _sIdcs       = other._sIdcs;
00222   _bIdcs       = other._bIdcs;
00223   _bmsIdcs     = other._bmsIdcs;
00224 
00225   _rangeBoxInfo= other._rangeBoxInfo ;
00226   _fullBoxInfo = other._fullBoxInfo ;
00227 
00228   _idx         = other._idx;
00229   _minWeight   = other._minWeight;
00230   _maxWeight   = other._maxWeight;
00231   _wMap        = other._wMap;
00232 
00233   _covMat      = new TMatrixDSym(*other._covMat);
00234   _corrMat     = new TMatrixDSym(*other._corrMat);
00235   _rotMat      = new TMatrixD(*other._rotMat);
00236   _sigmaR      = new TVectorD(*other._sigmaR);
00237   _dx          = new TVectorD(*other._dx);
00238   _sigmaAvgR   = other._sigmaAvgR;
00239 }
00240 
00241 
00242 
00243 //_____________________________________________________________________________
00244 RooNDKeysPdf::~RooNDKeysPdf() 
00245 {
00246   if (_varItr)    delete _varItr;
00247   if (_covMat)    delete _covMat;
00248   if (_corrMat)   delete _corrMat;
00249   if (_rotMat)    delete _rotMat;
00250   if (_sigmaR)    delete _sigmaR;
00251   if (_dx)        delete _dx;
00252 
00253   // delete all the boxinfos map
00254   while ( !_rangeBoxInfo.empty() ) {
00255     map<pair<string,int>,BoxInfo*>::iterator iter = _rangeBoxInfo.begin();
00256     BoxInfo* box= (*iter).second;
00257     _rangeBoxInfo.erase(iter);
00258     delete box;
00259   }
00260 
00261   _dataPts.clear();
00262   _dataPtsR.clear();
00263   _weights0.clear();
00264   _weights1.clear();
00265   //_sortIdcs.clear();
00266   _sortTVIdcs.clear();
00267 }
00268 
00269 
00270 void
00271 
00272 //_____________________________________________________________________________
00273 RooNDKeysPdf::createPdf(Bool_t firstCall) const
00274 {
00275   // evaluation order of constructor.
00276 
00277   if (firstCall) {
00278     // set options
00279     setOptions();
00280     // initialization
00281     initialize();
00282   }
00283 
00284   // copy dataset, calculate sigma_i's, determine min and max event weight
00285   loadDataSet(firstCall);
00286   // mirror dataset around dataset boundaries -- does not depend on event weights
00287   if (_mirror) mirrorDataSet();
00288   // store indices and weights of events with high enough weights
00289   loadWeightSet();
00290 
00291   // store indices of events in variable boundaries and box shell.
00292 //calculateShell(&_fullBoxInfo);
00293   // calculate normalization needed in analyticalIntegral()
00294 //calculatePreNorm(&_fullBoxInfo);
00295 
00296   // lookup table for determining which events contribute to a certain coordinate
00297   sortDataIndices();
00298 
00299   // determine static and/or adaptive bandwidth
00300   calculateBandWidth();
00301 }
00302 
00303 
00304 void 
00305 
00306 //_____________________________________________________________________________
00307 RooNDKeysPdf::setOptions() const
00308 {
00309   // set the configuration
00310 
00311   _options.ToLower(); 
00312 
00313   if( _options.Contains("a") ) { _weights = &_weights1; }
00314   else                         { _weights = &_weights0; }
00315   if( _options.Contains("m") )   _mirror = true;
00316   else                           _mirror = false;
00317   if( _options.Contains("d") )   _debug  = true;
00318   else                           _debug  = false;
00319   if( _options.Contains("v") ) { _debug = true;  _verbose = true; } 
00320   else                         { _debug = false; _verbose = false; }
00321 
00322   cxcoutD(InputArguments) << "RooNDKeysPdf::setOptions()    options = " << _options 
00323                         << "\n\tbandWidthType    = " << _options.Contains("a")    
00324                         << "\n\tmirror           = " << _mirror
00325                         << "\n\tdebug            = " << _debug            
00326                         << "\n\tverbose          = " << _verbose  
00327                         << endl;
00328 
00329   if (_nSigma<2.0) {
00330     coutW(InputArguments) << "RooNDKeysPdf::setOptions() : Warning : nSigma = " << _nSigma << " < 2.0. "
00331                           << "Calculated normalization could be too large." 
00332                           << endl;
00333   }
00334 }
00335 
00336 
00337 void
00338 
00339 //_____________________________________________________________________________
00340 RooNDKeysPdf::initialize() const
00341 {
00342   // initialization
00343   _sqrt2pi   = sqrt(2.0*TMath::Pi()) ;
00344   _nDim      = _varList.getSize();
00345   _nEvents   = (Int_t)_data.numEntries();
00346   _nEventsM  = _nEvents;
00347   _fixedShape= kFALSE;
00348 
00349   if(_nDim==0) {
00350     coutE(InputArguments) << "ERROR:  RooNDKeysPdf::initialize() : The observable list is empty. "
00351                           << "Unable to begin generating the PDF." << endl;
00352     assert (_nDim!=0);
00353   }
00354 
00355   if(_nEvents==0) {
00356     coutE(InputArguments) << "ERROR:  RooNDKeysPdf::initialize() : The input data set is empty. "
00357                           << "Unable to begin generating the PDF." << endl;
00358     assert (_nEvents!=0);
00359   }
00360 
00361   _d         = static_cast<Double_t>(_nDim);
00362 
00363   vector<Double_t> dummy(_nDim,0.);
00364   _dataPts.resize(_nEvents,dummy);
00365   _weights0.resize(_nEvents,dummy);
00366   //_sortIdcs.resize(_nDim);
00367   _sortTVIdcs.resize(_nDim);
00368 
00369   _rho.resize(_nDim,_widthFactor);
00370 
00371   _x.resize(_nDim,0.);
00372   _x0.resize(_nDim,0.);
00373   _x1.resize(_nDim,0.);
00374   _x2.resize(_nDim,0.);
00375 
00376   _mean.resize(_nDim,0.);
00377   _sigma.resize(_nDim,0.);
00378 
00379   _xDatLo.resize(_nDim,0.);
00380   _xDatHi.resize(_nDim,0.);
00381   _xDatLo3s.resize(_nDim,0.);
00382   _xDatHi3s.resize(_nDim,0.);
00383 
00384   boxInfoInit(&_fullBoxInfo,0,0xFFFF);
00385 
00386   _minWeight=0;
00387   _maxWeight=0;
00388   _wMap.clear();
00389 
00390   _covMat = 0;
00391   _corrMat= 0;
00392   _rotMat = 0;
00393   _sigmaR = 0;
00394   _dx = new TVectorD(_nDim); _dx->Zero();
00395   _dataPtsR.resize(_nEvents,*_dx);
00396 
00397   _varItr->Reset() ;
00398   RooRealVar* var ;
00399   for(Int_t j=0; (var=(RooRealVar*)_varItr->Next()); ++j) {
00400     _xDatLo[j] = var->getMin();
00401     _xDatHi[j] = var->getMax();
00402   }
00403 }
00404 
00405 
00406 void
00407 
00408 //_____________________________________________________________________________
00409 RooNDKeysPdf::loadDataSet(Bool_t firstCall) const
00410 {
00411   // copy the dataset and calculate some useful variables
00412 
00413   // first some initialization
00414   _nEventsW=0.;
00415   TMatrixD mat(_nDim,_nDim); 
00416   if (!_covMat)  _covMat = new TMatrixDSym(_nDim);    
00417   if (!_corrMat) _corrMat= new TMatrixDSym(_nDim);    
00418   if (!_rotMat)  _rotMat = new TMatrixD(_nDim,_nDim); 
00419   if (!_sigmaR)  _sigmaR = new TVectorD(_nDim);       
00420   mat.Zero();
00421   _covMat->Zero();
00422   _corrMat->Zero();
00423   _rotMat->Zero();
00424   _sigmaR->Zero();
00425 
00426   const RooArgSet* values= _data.get();  
00427   vector<RooRealVar*> dVars(_nDim);
00428   for  (Int_t j=0; j<_nDim; j++) {
00429     dVars[j] = (RooRealVar*)values->find(_varName[j].c_str());
00430     _x0[j]=_x1[j]=_x2[j]=0.;
00431   }
00432 
00433   _idx.clear();
00434   for (Int_t i=0; i<_nEvents; i++) {
00435     _data.get(i); // fills dVars
00436     _idx.push_back(i);
00437     vector<Double_t>& point  = _dataPts[i];
00438     TVectorD& pointV = _dataPtsR[i];
00439 
00440     Double_t myweight = _data.weight(); // default is one?
00441     if ( TMath::Abs(myweight)>_maxWeight ) { _maxWeight = TMath::Abs(myweight); }
00442     _nEventsW += myweight;
00443 
00444     for (Int_t j=0; j<_nDim; j++) {
00445       for (Int_t k=0; k<_nDim; k++) 
00446         mat(j,k) += dVars[j]->getVal() * dVars[k]->getVal() * myweight;
00447 
00448       // only need to do once
00449       if (firstCall) 
00450         point[j] = pointV[j] = dVars[j]->getVal();
00451 
00452       _x0[j] += 1. * myweight; 
00453       _x1[j] += point[j] * myweight ; 
00454       _x2[j] += point[j] * point[j] * myweight ;
00455 
00456       // only need to do once
00457       if (firstCall) {
00458         if (point[j]<_xDatLo[j]) { _xDatLo[j]=point[j]; }
00459         if (point[j]>_xDatHi[j]) { _xDatHi[j]=point[j]; }
00460       }
00461     }
00462   }
00463 
00464   _n = TMath::Power(4./(_nEventsW*(_d+2.)), 1./(_d+4.)) ; 
00465   // = (4/[n(dim(R) + 2)])^1/(dim(R)+4); dim(R) = 2
00466   _minWeight = (0.5 - TMath::Erf(_nSigma/sqrt(2.))/2.) * _maxWeight;
00467 
00468   for (Int_t j=0; j<_nDim; j++) {
00469     _mean[j]  = _x1[j]/_x0[j];
00470     _sigma[j] = sqrt(_x2[j]/_x0[j]-_mean[j]*_mean[j]);
00471   }
00472  
00473   for (Int_t j=0; j<_nDim; j++) {
00474     for (Int_t k=0; k<_nDim; k++) 
00475       (*_covMat)(j,k) = mat(j,k)/_x0[j] - _mean[j]*_mean[k] ;
00476   }
00477 
00478   // find decorrelation matrix and eigenvalues (R)
00479   TMatrixDSymEigen evCalculator(*_covMat);
00480   *_rotMat = evCalculator.GetEigenVectors();
00481   *_rotMat = _rotMat->T(); // transpose
00482   *_sigmaR = evCalculator.GetEigenValues();
00483   for (Int_t j=0; j<_nDim; j++) { (*_sigmaR)[j] = sqrt((*_sigmaR)[j]); }
00484 
00485   for (Int_t j=0; j<_nDim; j++) {
00486     for (Int_t k=0; k<_nDim; k++) 
00487       (*_corrMat)(j,k) = (*_covMat)(j,k)/(_sigma[j]*_sigma[k]) ;
00488   }
00489 
00490   if (_verbose) {
00491     //_covMat->Print();
00492     _rotMat->Print();
00493     _corrMat->Print();
00494     _sigmaR->Print();
00495   }
00496 
00497   if (!_rotate) {
00498     _rotMat->Print();
00499     _sigmaR->Print();
00500     TMatrixD haar(_nDim,_nDim);
00501     TMatrixD unit(TMatrixD::kUnit,haar);
00502     *_rotMat = unit;
00503     for (Int_t j=0; j<_nDim; j++) { (*_sigmaR)[j] = _sigma[j]; }
00504     _rotMat->Print();
00505     _sigmaR->Print();
00506   }
00507 
00508   _sigmaAvgR=1.;
00509   for (Int_t j=0; j<_nDim; j++) { _sigmaAvgR *= (*_sigmaR)[j]; }
00510   _sigmaAvgR = TMath::Power(_sigmaAvgR, 1./_d) ;
00511 
00512   for (Int_t i=0; i<_nEvents; i++) {
00513     TVectorD& pointR = _dataPtsR[i];
00514     pointR *= *_rotMat;
00515   }
00516   
00517   coutI(Contents) << "RooNDKeysPdf::loadDataSet(" << this << ")" 
00518                   << "\n Number of events in dataset: " << _nEvents 
00519                   << "\n Weighted number of events in dataset: " << _nEventsW 
00520                   << endl; 
00521 }
00522 
00523 
00524 void
00525 
00526 //_____________________________________________________________________________
00527 RooNDKeysPdf::mirrorDataSet() const
00528 {
00529   // determine mirror dataset.
00530   // mirror points are added around the physical boundaries of the dataset
00531   // Two steps:
00532   // 1. For each entry, determine if it should be mirrored (the mirror configuration).
00533   // 2. For each mirror configuration, make the mirror points.
00534 
00535   for (Int_t j=0; j<_nDim; j++) {
00536     _xDatLo3s[j] = _xDatLo[j] + _nSigma * (_rho[j] * _n * _sigma[j]);
00537     _xDatHi3s[j] = _xDatHi[j] - _nSigma * (_rho[j] * _n * _sigma[j]);
00538   }
00539 
00540   vector<Double_t> dummy(_nDim,0.);
00541   
00542   // 1.
00543   for (Int_t i=0; i<_nEvents; i++) {    
00544     vector<Double_t>& x = _dataPts[i];
00545     
00546     Int_t size = 1;
00547     vector<vector<Double_t> > mpoints(size,dummy);
00548     vector<vector<Int_t> > mjdcs(size);
00549     
00550     // create all mirror configurations for event i
00551     for (Int_t j=0; j<_nDim; j++) {
00552       
00553       vector<Int_t>& mjdxK = mjdcs[0];
00554       vector<Double_t>& mpointK = mpoints[0];
00555       
00556       // single mirror *at physical boundaries*
00557       if ((x[j]>_xDatLo[j] && x[j]<_xDatLo3s[j]) && x[j]<(_xDatLo[j]+_xDatHi[j])/2.) { 
00558         mpointK[j] = 2.*_xDatLo[j]-x[j]; 
00559         mjdxK.push_back(j);
00560       } else if ((x[j]<_xDatHi[j] && x[j]>_xDatHi3s[j]) && x[j]>(_xDatLo[j]+_xDatHi[j])/2.) { 
00561         mpointK[j] = 2.*_xDatHi[j]-x[j]; 
00562         mjdxK.push_back(j);
00563       }
00564     }
00565     
00566     vector<Int_t>& mjdx0 = mjdcs[0];
00567     // no mirror point(s) for this event
00568     if (size==1 && mjdx0.size()==0) continue;
00569     
00570     // 2.
00571     // generate all mirror points for event i
00572     vector<Int_t>& mjdx = mjdcs[0];
00573     vector<Double_t>& mpoint = mpoints[0];
00574     
00575     // number of mirror points for this mirror configuration
00576     Int_t eMir = 1 << mjdx.size(); 
00577     vector<vector<Double_t> > epoints(eMir,x);
00578     
00579     for (Int_t m=0; m<Int_t(mjdx.size()); m++) {
00580       Int_t size1 = 1 << m;
00581       Int_t size2 = 1 << (m+1);
00582       // copy all previous mirror points
00583       for (Int_t l=size1; l<size2; ++l) { 
00584         epoints[l] = epoints[l-size1];
00585         // fill high mirror points
00586         vector<Double_t>& epoint = epoints[l];
00587         epoint[mjdx[Int_t(mjdx.size()-1)-m]] = mpoint[mjdx[Int_t(mjdx.size()-1)-m]];
00588       }
00589     }
00590     
00591     // remove duplicate mirror points
00592     // note that: first epoint == x
00593     epoints.erase(epoints.begin());
00594 
00595     // add mirror points of event i to total dataset
00596     TVectorD pointR(_nDim); 
00597 
00598     for (Int_t m=0; m<Int_t(epoints.size()); m++) {
00599       _idx.push_back(i);
00600       _dataPts.push_back(epoints[m]);
00601       //_weights0.push_back(_weights0[i]);
00602       for (Int_t j=0; j<_nDim; j++) { pointR[j] = (epoints[m])[j]; }
00603       pointR *= *_rotMat;
00604       _dataPtsR.push_back(pointR);
00605     }
00606     
00607     epoints.clear();
00608     mpoints.clear();
00609     mjdcs.clear();
00610   } // end of event loop
00611 
00612   _nEventsM = Int_t(_dataPts.size());
00613 }
00614 
00615 
00616 void
00617 //_____________________________________________________________________________
00618 RooNDKeysPdf::loadWeightSet() const
00619 {
00620   _wMap.clear();
00621 
00622   for (Int_t i=0; i<_nEventsM; i++) {
00623     _data.get(_idx[i]);
00624     Double_t myweight = _data.weight();
00625     //if ( TMath::Abs(myweight)>_minWeight ) { 
00626       _wMap[i] = myweight; 
00627     //}
00628   }
00629 
00630   coutI(Contents) << "RooNDKeysPdf::loadWeightSet(" << this << ") : Number of weighted events : " << _wMap.size() << endl;
00631 }
00632 
00633 
00634 void
00635 //_____________________________________________________________________________
00636 RooNDKeysPdf::calculateShell(BoxInfo* bi) const 
00637 {
00638   // determine points in +/- nSigma shell around the box determined by the variable
00639   // ranges. These points are needed in the normalization, to determine probability
00640   // leakage in and out of the box.
00641 
00642   for (Int_t j=0; j<_nDim; j++) {
00643     if (bi->xVarLo[j]==_xDatLo[j] && bi->xVarHi[j]==_xDatHi[j]) { 
00644       bi->netFluxZ = bi->netFluxZ && kTRUE; 
00645     } else { bi->netFluxZ = kFALSE; }
00646 
00647     bi->xVarLoM3s[j] = bi->xVarLo[j] - _nSigma * (_rho[j] * _n * _sigma[j]);
00648     bi->xVarLoP3s[j] = bi->xVarLo[j] + _nSigma * (_rho[j] * _n * _sigma[j]);
00649     bi->xVarHiM3s[j] = bi->xVarHi[j] - _nSigma * (_rho[j] * _n * _sigma[j]);
00650     bi->xVarHiP3s[j] = bi->xVarHi[j] + _nSigma * (_rho[j] * _n * _sigma[j]);
00651   }
00652 
00653   map<Int_t,Double_t>::iterator wMapItr = _wMap.begin();
00654 
00655   //for (Int_t i=0; i<_nEventsM; i++) {    
00656   for (; wMapItr!=_wMap.end(); ++wMapItr) {
00657     Int_t i = (*wMapItr).first;
00658 
00659     const vector<Double_t>& x = _dataPts[i];
00660     Bool_t inVarRange(kTRUE);
00661     Bool_t inVarRangePlusShell(kTRUE);
00662 
00663     for (Int_t j=0; j<_nDim; j++) {
00664 
00665       if (x[j]>bi->xVarLo[j] && x[j]<bi->xVarHi[j]) { 
00666         inVarRange = inVarRange && kTRUE; 
00667       } else { inVarRange = inVarRange && kFALSE; }    
00668 
00669       if (x[j]>bi->xVarLoM3s[j] && x[j]<bi->xVarHiP3s[j]) { 
00670         inVarRangePlusShell = inVarRangePlusShell && kTRUE;
00671       } else { inVarRangePlusShell = inVarRangePlusShell && kFALSE; }
00672     }
00673 
00674     // event in range?
00675     if (inVarRange) { 
00676       bi->bIdcs.push_back(i);
00677     }
00678 
00679     // event in shell?
00680     if (inVarRangePlusShell) { 
00681       bi->bpsIdcs[i] = kTRUE;
00682       Bool_t inShell(kFALSE);      
00683       for (Int_t j=0; j<_nDim; j++) {
00684         if ((x[j]>bi->xVarLoM3s[j] && x[j]<bi->xVarLoP3s[j]) && x[j]<(bi->xVarLo[j]+bi->xVarHi[j])/2.) { 
00685           inShell = kTRUE;
00686         } else if ((x[j]>bi->xVarHiM3s[j] && x[j]<bi->xVarHiP3s[j]) && x[j]>(bi->xVarLo[j]+bi->xVarHi[j])/2.) { 
00687           inShell = kTRUE;
00688         }
00689       }
00690       if (inShell) bi->sIdcs.push_back(i); // needed for normalization
00691       else { 
00692         bi->bmsIdcs.push_back(i);          // idem
00693       }
00694     }
00695   }
00696 
00697   coutI(Contents) << "RooNDKeysPdf::calculateShell() : " 
00698                   << "\n Events in shell " << bi->sIdcs.size() 
00699                   << "\n Events in box " << bi->bIdcs.size() 
00700                   << "\n Events in box and shell " << bi->bpsIdcs.size() 
00701                   << endl;
00702 }
00703 
00704 
00705 void
00706 //_____________________________________________________________________________
00707 RooNDKeysPdf::calculatePreNorm(BoxInfo* bi) const
00708 {
00709   //bi->nEventsBMSW=0.;
00710   //bi->nEventsBW=0.;
00711 
00712   // box minus shell
00713   for (Int_t i=0; i<Int_t(bi->bmsIdcs.size()); i++) 
00714     bi->nEventsBMSW += _wMap[bi->bmsIdcs[i]];
00715 
00716   // box
00717   for (Int_t i=0; i<Int_t(bi->bIdcs.size()); i++) 
00718     bi->nEventsBW += _wMap[bi->bIdcs[i]];
00719 
00720   cxcoutD(Eval) << "RooNDKeysPdf::calculatePreNorm() : " 
00721               << "\n nEventsBMSW " << bi->nEventsBMSW 
00722               << "\n nEventsBW " << bi->nEventsBW 
00723               << endl;
00724 }
00725 
00726 
00727 void
00728 //_____________________________________________________________________________
00729 RooNDKeysPdf::sortDataIndices(BoxInfo* bi) const
00730 {
00731   // sort entries, as needed for loopRange()
00732 
00733   itVec itrVecR;
00734   vector<TVectorD>::iterator dpRItr = _dataPtsR.begin();
00735   for (Int_t i=0; dpRItr!=_dataPtsR.end(); ++dpRItr, ++i) {
00736     if (bi) {
00737       if (bi->bpsIdcs.find(i)!=bi->bpsIdcs.end()) 
00738       //if (_wMap.find(i)!=_wMap.end()) 
00739         itrVecR.push_back(itPair(i,dpRItr));
00740     } else itrVecR.push_back(itPair(i,dpRItr));
00741   }
00742 
00743   for (Int_t j=0; j<_nDim; j++) { 
00744     _sortTVIdcs[j].clear();
00745     sort(itrVecR.begin(),itrVecR.end(),SorterTV_L2H(j));
00746     _sortTVIdcs[j] = itrVecR;
00747   }
00748 
00749   for (Int_t j=0; j<_nDim; j++) { 
00750     cxcoutD(Eval) << "RooNDKeysPdf::sortDataIndices() : Number of sorted events : " << _sortTVIdcs[j].size() << endl; 
00751   }
00752 }
00753 
00754 
00755 void
00756 //_____________________________________________________________________________
00757 RooNDKeysPdf::calculateBandWidth() const
00758 {
00759   cxcoutD(Eval) << "RooNDKeysPdf::calculateBandWidth()" << endl; 
00760 
00761   // non-adaptive bandwidth 
00762   // (default, and needed to calculate adaptive bandwidth)
00763 
00764   if(!_options.Contains("a")) {
00765       cxcoutD(Eval) << "RooNDKeysPdf::calculateBandWidth() Using static bandwidth." << endl;
00766   }    
00767   
00768   for (Int_t i=0; i<_nEvents; i++) {
00769     vector<Double_t>& weight = _weights0[i];
00770     for (Int_t j=0; j<_nDim; j++) { weight[j] = _rho[j] * _n * (*_sigmaR)[j] ; }
00771   }
00772 
00773   // adaptive width
00774   if(_options.Contains("a")) {
00775     cxcoutD(Eval) << "RooNDKeysPdf::calculateBandWidth() Using adaptive bandwidth." << endl;
00776       
00777     vector<Double_t> dummy(_nDim,0.);
00778     _weights1.resize(_nEvents,dummy);
00779 
00780     for(Int_t i=0; i<_nEvents; ++i) {
00781 
00782       vector<Double_t>& x = _dataPts[i];
00783       Double_t f =  TMath::Power( gauss(x,_weights0)/_nEventsW , -1./(2.*_d) ) ;
00784 
00785       vector<Double_t>& weight = _weights1[i];
00786       for (Int_t j=0; j<_nDim; j++) {
00787         Double_t norm = (_rho[j]*_n*(*_sigmaR)[j]) / sqrt(_sigmaAvgR) ; 
00788         weight[j] = norm * f / sqrt(12.) ;  //  note additional factor of sqrt(12) compared with HEP-EX/0011057
00789       }
00790     }
00791     _weights = &_weights1;
00792   } 
00793 }
00794 
00795 
00796 Double_t
00797 //_____________________________________________________________________________
00798 RooNDKeysPdf::gauss(vector<Double_t>& x, vector<vector<Double_t> >& weights) const 
00799 {
00800   // loop over all closest point to x, as determined by loopRange()
00801 
00802   if(_nEvents==0) return 0.;
00803 
00804   Double_t z=0.;
00805   map<Int_t,Bool_t> ibMap;
00806   ibMap.clear();
00807 
00808   // determine loop range for event x
00809   loopRange(x,ibMap);
00810 
00811   map<Int_t,Bool_t>::iterator ibMapItr = ibMap.begin();
00812 
00813   for (; ibMapItr!=ibMap.end(); ++ibMapItr) {
00814     Int_t i = (*ibMapItr).first;
00815 
00816     Double_t g(1.);
00817 
00818     if(i>=(Int_t)_idx.size()) {continue;} //---> 1.myline 
00819 
00820     const vector<Double_t>& point  = _dataPts[i];
00821     const vector<Double_t>& weight = weights[_idx[i]];
00822 
00823     for (Int_t j=0; j<_nDim; j++) { 
00824       (*_dx)[j] = x[j]-point[j]; 
00825     }
00826 
00827     if (_nDim>1) {
00828       *_dx *= *_rotMat; // rotate to decorrelated frame!
00829     }
00830 
00831     for (Int_t j=0; j<_nDim; j++) {
00832       Double_t r = (*_dx)[j];  //x[j] - point[j];
00833       Double_t c = 1./(2.*weight[j]*weight[j]);
00834 
00835       g *= exp( -c*r*r );
00836       g *= 1./(_sqrt2pi*weight[j]);
00837     }
00838     z += (g*_wMap[_idx[i]]);
00839   }
00840   return z;
00841 }
00842 
00843 
00844 void
00845 //_____________________________________________________________________________
00846 RooNDKeysPdf::loopRange(vector<Double_t>& x, map<Int_t,Bool_t>& ibMap) const
00847 {
00848   // determine closest points to x, to loop over in evaluate()
00849 
00850   TVectorD xRm(_nDim);
00851   TVectorD xRp(_nDim);
00852 
00853   for (Int_t j=0; j<_nDim; j++) { xRm[j] = xRp[j] = x[j]; }
00854 
00855   xRm *= *_rotMat;
00856   xRp *= *_rotMat;
00857   for (Int_t j=0; j<_nDim; j++) {
00858     xRm[j] -= _nSigma * (_rho[j] * _n * (*_sigmaR)[j]);
00859     xRp[j] += _nSigma * (_rho[j] * _n * (*_sigmaR)[j]);
00860   }
00861 
00862   vector<TVectorD> xvecRm(1,xRm);
00863   vector<TVectorD> xvecRp(1,xRp);
00864 
00865   map<Int_t,Bool_t> ibMapRT;
00866 
00867   for (Int_t j=0; j<_nDim; j++) {
00868     ibMap.clear();
00869     itVec::iterator lo = lower_bound(_sortTVIdcs[j].begin(), _sortTVIdcs[j].end(),
00870                                      itPair(0,xvecRm.begin()), SorterTV_L2H(j));
00871     itVec::iterator hi = upper_bound(_sortTVIdcs[j].begin(), _sortTVIdcs[j].end(),
00872                                      itPair(0,xvecRp.begin()), SorterTV_L2H(j));
00873     itVec::iterator it=lo;
00874     if (j==0) {
00875       if (_nDim==1) { for (it=lo; it!=hi; ++it) ibMap[(*it).first] = kTRUE; }
00876       else { for (it=lo; it!=hi; ++it) ibMapRT[(*it).first] = kTRUE; }
00877       continue;
00878     }
00879 
00880     for (it=lo; it!=hi; ++it) 
00881       if (ibMapRT.find((*it).first)!=ibMapRT.end()) { ibMap[(*it).first] = kTRUE; }
00882 
00883     ibMapRT.clear();
00884     if (j!=_nDim-1) { ibMapRT = ibMap; }
00885   }
00886 }
00887 
00888 
00889 void
00890 //_____________________________________________________________________________
00891 RooNDKeysPdf::boxInfoInit(BoxInfo* bi, const char* rangeName, Int_t /*code*/) const
00892 {
00893   vector<Bool_t> doInt(_nDim,kTRUE);
00894 
00895   bi->filled = kFALSE;
00896 
00897   bi->xVarLo.resize(_nDim,0.);
00898   bi->xVarHi.resize(_nDim,0.);
00899   bi->xVarLoM3s.resize(_nDim,0.);
00900   bi->xVarLoP3s.resize(_nDim,0.);
00901   bi->xVarHiM3s.resize(_nDim,0.);
00902   bi->xVarHiP3s.resize(_nDim,0.);
00903 
00904   bi->netFluxZ = kTRUE;
00905   bi->bpsIdcs.clear();
00906   bi->bIdcs.clear();
00907   bi->sIdcs.clear();
00908   bi->bmsIdcs.clear();
00909 
00910   bi->nEventsBMSW=0.;
00911   bi->nEventsBW=0.;
00912 
00913   _varItr->Reset() ;
00914   RooRealVar* var ;
00915   for(Int_t j=0; (var=(RooRealVar*)_varItr->Next()); ++j) {
00916     if (doInt[j]) {
00917       bi->xVarLo[j] = var->getMin(rangeName);
00918       bi->xVarHi[j] = var->getMax(rangeName);
00919     } else {
00920       bi->xVarLo[j] = var->getVal() ;
00921       bi->xVarHi[j] = var->getVal() ;
00922     }
00923   }
00924 }
00925 
00926 
00927 Double_t 
00928 //_____________________________________________________________________________
00929 RooNDKeysPdf::evaluate() const 
00930 {
00931   _varItr->Reset() ;
00932   RooAbsReal* var ;
00933   const RooArgSet* nset = _varList.nset() ;
00934   for(Int_t j=0; (var=(RooAbsReal*)_varItr->Next()); ++j) {    
00935     _x[j] = var->getVal(nset);
00936   }
00937 
00938   Double_t val = gauss(_x,*_weights);
00939 
00940   if (val>=1E-20)
00941     return val ;
00942   else 
00943     return (1E-20) ;
00944 }
00945 
00946 
00947 Int_t 
00948 //_____________________________________________________________________________
00949 RooNDKeysPdf::getAnalyticalIntegral(RooArgSet& allVars, RooArgSet& analVars, const char* rangeName) const
00950 {
00951 
00952   if (rangeName) return 0 ;
00953 
00954   Int_t code=0;
00955   if (matchArgs(allVars,analVars,RooArgSet(_varList))) { code=1; }
00956   
00957   return code;
00958 
00959 }
00960 
00961 
00962 Double_t 
00963 //_____________________________________________________________________________
00964 RooNDKeysPdf::analyticalIntegral(Int_t code, const char* rangeName) const
00965 {
00966   cxcoutD(Eval) << "Calling RooNDKeysPdf::analyticalIntegral(" << GetName() << ") with code " << code 
00967               << " and rangeName " << (rangeName?rangeName:"<none>") << endl;
00968 
00969   // determine which observables need to be integrated over ...
00970   Int_t nComb = 1 << (_nDim); 
00971   assert(code>=1 && code<nComb) ;
00972 
00973   vector<Bool_t> doInt(_nDim,kTRUE);
00974 
00975   // get BoxInfo
00976   BoxInfo* bi(0);
00977 
00978   if (rangeName) {
00979     string rangeNameStr(rangeName) ;
00980     bi = _rangeBoxInfo[make_pair(rangeNameStr,code)] ;
00981     if (!bi) {
00982       bi = new BoxInfo ;
00983       _rangeBoxInfo[make_pair(rangeNameStr,code)] = bi ;      
00984       boxInfoInit(bi,rangeName,code);
00985     }
00986   } else bi= &_fullBoxInfo ;
00987 
00988   // have boundaries changed?
00989   Bool_t newBounds(kFALSE);
00990   _varItr->Reset() ;
00991   RooRealVar* var ;
00992   for(Int_t j=0; (var=(RooRealVar*)_varItr->Next()); ++j) {
00993     if ((var->getMin(rangeName)-bi->xVarLo[j]!=0) ||
00994         (var->getMax(rangeName)-bi->xVarHi[j]!=0)) {
00995       newBounds = kTRUE;
00996     }
00997   }
00998 
00999   // reset
01000   if (newBounds) {
01001     cxcoutD(Eval) << "RooNDKeysPdf::analyticalIntegral() : Found new boundaries ... " << (rangeName?rangeName:"<none>") << endl;
01002     boxInfoInit(bi,rangeName,code);    
01003   }
01004 
01005   // recalculates netFluxZero and nEventsIR
01006   if (!bi->filled || newBounds) {
01007     // Fill box info with contents
01008     calculateShell(bi);
01009     calculatePreNorm(bi);
01010     bi->filled = kTRUE;
01011     sortDataIndices(bi);    
01012   }
01013 
01014   // first guess
01015   Double_t norm=bi->nEventsBW;
01016 
01017   if (_mirror && bi->netFluxZ) {
01018     // KEYS expression is self-normalized
01019     cxcoutD(Eval) << "RooNDKeysPdf::analyticalIntegral() : Using mirrored normalization : " << bi->nEventsBW << endl;
01020     return bi->nEventsBW;
01021   } 
01022   // calculate leakage in and out of variable range box
01023   else 
01024   {
01025     norm = bi->nEventsBMSW; 
01026     if (norm<0.) norm=0.;
01027     
01028     for (Int_t i=0; i<Int_t(bi->sIdcs.size()); ++i) {      
01029       Double_t prob=1.;
01030       const vector<Double_t>& x = _dataPts[bi->sIdcs[i]];
01031       const vector<Double_t>& weight = (*_weights)[_idx[bi->sIdcs[i]]];
01032       
01033       vector<Double_t> chi(_nDim,100.);
01034       
01035       for (Int_t j=0; j<_nDim; j++) {
01036         if(!doInt[j]) continue;
01037 
01038         if ((x[j]>bi->xVarLoM3s[j] && x[j]<bi->xVarLoP3s[j]) && x[j]<(bi->xVarLo[j]+bi->xVarHi[j])/2.) 
01039           chi[j] = (x[j]-bi->xVarLo[j])/weight[j];
01040         else if ((x[j]>bi->xVarHiM3s[j] && x[j]<bi->xVarHiP3s[j]) && x[j]>(bi->xVarLo[j]+bi->xVarHi[j])/2.) 
01041           chi[j] = (bi->xVarHi[j]-x[j])/weight[j];
01042 
01043         if (chi[j]>0) // inVarRange
01044           prob *= (0.5 + TMath::Erf(fabs(chi[j])/sqrt(2.))/2.);
01045         else // outside Var range
01046           prob *= (0.5 - TMath::Erf(fabs(chi[j])/sqrt(2.))/2.);
01047       }
01048 
01049       norm += prob * _wMap[_idx[bi->sIdcs[i]]];    
01050     } 
01051     
01052     cxcoutD(Eval) << "RooNDKeysPdf::analyticalIntegral() : Final normalization : " << norm << " " << bi->nEventsBW << endl;
01053     return norm;
01054   }
01055 }
01056 
01057 

Generated on Tue Jul 5 14:55:21 2011 for ROOT_528-00b_version by  doxygen 1.5.1