ROOT_528-00b_version: roofit/roofit/src/RooKeysPdf.cxx Source File

00001 /*****************************************************************************
00002  * Project: RooFit                                                           *
00003  * Package: RooFitModels                                                     *
00004  * @(#)root/roofit:$Id: RooKeysPdf.cxx 37505 2010-12-10 13:46:32Z wouter $
00005  * Authors:                                                                  *
00006  *   GR, Gerhard Raven,   UC San Diego,        raven@slac.stanford.edu       *
00007  *   DK, David Kirkby,    UC Irvine,         dkirkby@uci.edu                 *
00008  *   WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu       *
00009  *                                                                           *
00010  * Copyright (c) 2000-2005, Regents of the University of California          *
00011  *                          and Stanford University. All rights reserved.    *
00012  *                                                                           *
00013  * Redistribution and use in source and binary forms,                        *
00014  * with or without modification, are permitted according to the terms        *
00015  * listed in LICENSE (http://roofit.sourceforge.net/license.txt)             *
00016  *****************************************************************************/
00017 #include "RooFit.h"
00018 
00019 #include <math.h>
00020 #include "Riostream.h"
00021 #include "TMath.h"
00022 
00023 #include "RooKeysPdf.h"
00024 #include "RooAbsReal.h"
00025 #include "RooRealVar.h"
00026 #include "RooRandom.h"
00027 #include "RooDataSet.h"
00028 
00029 ClassImp(RooKeysPdf)
00030 
00031 
00032 //////////////////////////////////////////////////////////////////////////////
00033 //
00034 // BEGIN_HTML
00035 // Class RooKeysPdf implements a one-dimensional kernel estimation p.d.f which model the distribution
00036 // of an arbitrary input dataset as a superposition of Gaussian kernels, one for each data point,
00037 // each contributing 1/N to the total integral of the p.d.f.
00038 // <p>
00039 // If the 'adaptive mode' is enabled, the width of the Gaussian is adaptively calculated from the
00040 // local density of events, i.e. narrow for regions with high event density to preserve details and
00041 // wide for regions with log event density to promote smoothness. The details of the general algorithm
00042 // are described in the following paper: 
00043 // <p>
00044 // Cranmer KS, Kernel Estimation in High-Energy Physics.  
00045 //             Computer Physics Communications 136:198-207,2001 - e-Print Archive: hep ex/0011057
00046 // <p>
00047 // END_HTML
00048 //
00049 
00050 
00051 //_____________________________________________________________________________
00052   RooKeysPdf::RooKeysPdf() : _nEvents(0), _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
00053                              _mirrorLeft(kFALSE), _mirrorRight(kFALSE), 
00054                              _asymLeft(kFALSE), _asymRight(kFALSE)
00055 { 
00056   // coverity[UNINIT_CTOR]
00057 }
00058 
00059 
00060 //_____________________________________________________________________________
00061 RooKeysPdf::RooKeysPdf(const char *name, const char *title,
00062                        RooAbsReal& x, RooDataSet& data,
00063                        Mirror mirror, Double_t rho) :
00064   RooAbsPdf(name,title),
00065   _x("x","Dependent",this,x),
00066   _nEvents(0),
00067   _dataPts(0),
00068   _dataWgts(0),
00069   _weights(0),
00070   _mirrorLeft(mirror==MirrorLeft || mirror==MirrorBoth || mirror==MirrorLeftAsymRight),
00071   _mirrorRight(mirror==MirrorRight || mirror==MirrorBoth || mirror==MirrorAsymLeftRight),
00072   _asymLeft(mirror==MirrorAsymLeft || mirror==MirrorAsymLeftRight || mirror==MirrorAsymBoth),
00073   _asymRight(mirror==MirrorAsymRight || mirror==MirrorLeftAsymRight || mirror==MirrorAsymBoth),
00074   _rho(rho)
00075 {
00076   // cache stuff about x
00077   snprintf(_varName, 128,"%s", x.GetName());
00078   RooRealVar real= (RooRealVar&)(_x.arg());
00079   _lo = real.getMin();
00080   _hi = real.getMax();
00081   _binWidth = (_hi-_lo)/(_nPoints-1);
00082 
00083   // form the lookup table
00084   LoadDataSet(data);
00085 }
00086 
00087 
00088 
00089 //_____________________________________________________________________________
00090 RooKeysPdf::RooKeysPdf(const RooKeysPdf& other, const char* name):
00091   RooAbsPdf(other,name), _x("x",this,other._x), _nEvents(other._nEvents),
00092   _dataPts(0), _dataWgts(0), _weights(0), _sumWgt(0),
00093   _mirrorLeft( other._mirrorLeft ), _mirrorRight( other._mirrorRight ),
00094   _asymLeft(other._asymLeft), _asymRight(other._asymRight),
00095   _rho( other._rho ) {
00096 
00097   // cache stuff about x
00098   snprintf(_varName, 128, "%s", other._varName );
00099   _lo = other._lo;
00100   _hi = other._hi;
00101   _binWidth = other._binWidth;
00102 
00103   // copy over data and weights... not necessary, commented out for speed
00104 //    _dataPts = new Double_t[_nEvents];
00105 //    _weights = new Double_t[_nEvents];  
00106 //    for (Int_t i= 0; i<_nEvents; i++) {
00107 //      _dataPts[i]= other._dataPts[i];
00108 //      _weights[i]= other._weights[i];
00109 //    }
00110 
00111   // copy over the lookup table
00112   for (Int_t i= 0; i<_nPoints+1; i++)
00113     _lookupTable[i]= other._lookupTable[i];
00114   
00115 }
00116 
00117 
00118 //_____________________________________________________________________________
00119 RooKeysPdf::~RooKeysPdf() {
00120   delete[] _dataPts;
00121   delete[] _dataWgts;
00122   delete[] _weights;
00123 }
00124 
00125 
00126 void
00127 
00128 //_____________________________________________________________________________
00129 RooKeysPdf::LoadDataSet( RooDataSet& data) {
00130   delete[] _dataPts;
00131   delete[] _dataWgts;
00132   delete[] _weights;
00133 
00134   // make new arrays for data and weights to fill
00135   _nEvents= (Int_t)data.numEntries();
00136   if (_mirrorLeft) _nEvents += data.numEntries();
00137   if (_mirrorRight) _nEvents += data.numEntries();
00138 
00139   _dataPts  = new Double_t[_nEvents];
00140   _dataWgts = new Double_t[_nEvents];
00141   _weights  = new Double_t[_nEvents];
00142   _sumWgt = 0 ;
00143 
00144   Double_t x0(0);
00145   Double_t x1(0);
00146   Double_t x2(0);
00147 
00148   Int_t i, idata=0;
00149   for (i=0; i<data.numEntries(); i++) {
00150     const RooArgSet *values= data.get(i);
00151     RooRealVar real= (RooRealVar&)(values->operator[](_varName));
00152 
00153     _dataPts[idata]= real.getVal();
00154     _dataWgts[idata] = data.weight() ;
00155     x0 += _dataWgts[idata] ; x1+=_dataWgts[idata]*_dataPts[idata]; x2+=_dataWgts[idata]*_dataPts[idata]*_dataPts[idata];
00156     idata++;
00157     _sumWgt+= data.weight() ;
00158 
00159     if (_mirrorLeft) {
00160       _dataPts[idata]= 2*_lo - real.getVal();
00161       _dataWgts[idata]= data.weight() ;
00162       _sumWgt+= data.weight() ;
00163       idata++;
00164     }
00165 
00166     if (_mirrorRight) {
00167       _dataPts[idata]  = 2*_hi - real.getVal();
00168       _dataWgts[idata] = data.weight() ;
00169       _sumWgt+= data.weight() ;
00170       idata++;
00171     }
00172   }
00173 
00174   Double_t meanv=x1/x0;
00175   Double_t sigmav=sqrt(x2/x0-meanv*meanv);
00176   Double_t h=TMath::Power(Double_t(4)/Double_t(3),0.2)*TMath::Power(_nEvents,-0.2)*_rho;
00177   Double_t hmin=h*sigmav*sqrt(2.)/10;
00178   Double_t norm=h*sqrt(sigmav)/(2.0*sqrt(3.0));
00179 
00180   _weights=new Double_t[_nEvents];
00181   for(Int_t j=0;j<_nEvents;++j) {
00182     _weights[j]=norm/sqrt(g(_dataPts[j],h*sigmav));
00183     if (_weights[j]<hmin) _weights[j]=hmin;
00184   }
00185   
00186   for (i=0;i<_nPoints+1;++i) 
00187     _lookupTable[i]=evaluateFull( _lo+Double_t(i)*_binWidth );
00188 
00189   
00190 }
00191 
00192 
00193 
00194 //_____________________________________________________________________________
00195 Double_t RooKeysPdf::evaluate() const {
00196   Int_t i = (Int_t)floor((Double_t(_x)-_lo)/_binWidth);
00197   if (i<0) {
00198     cerr << "got point below lower bound:"
00199          << Double_t(_x) << " < " << _lo
00200          << " -- performing linear extrapolation..." << endl;
00201     i=0;
00202   }
00203   if (i>_nPoints-1) {
00204     cerr << "got point above upper bound:"
00205          << Double_t(_x) << " > " << _hi
00206          << " -- performing linear extrapolation..." << endl;
00207     i=_nPoints-1;
00208   }
00209   Double_t dx = (Double_t(_x)-(_lo+i*_binWidth))/_binWidth;
00210   
00211   // for now do simple linear interpolation.
00212   // one day replace by splines...
00213   return (_lookupTable[i]+dx*(_lookupTable[i+1]-_lookupTable[i]));
00214 }
00215 
00216 
00217 //_____________________________________________________________________________
00218 Double_t RooKeysPdf::evaluateFull( Double_t x ) const {
00219   Double_t y=0;
00220 
00221   for (Int_t i=0;i<_nEvents;++i) {
00222     Double_t chi=(x-_dataPts[i])/_weights[i];
00223     y+=_dataWgts[i]*exp(-0.5*chi*chi)/_weights[i];
00224 
00225     // if mirroring the distribution across either edge of
00226     // the range ("Boundary Kernels"), pick up the additional
00227     // contributions
00228 //      if (_mirrorLeft) {
00229 //        chi=(x-(2*_lo-_dataPts[i]))/_weights[i];
00230 //        y+=exp(-0.5*chi*chi)/_weights[i];
00231 //      }
00232     if (_asymLeft) {
00233       chi=(x-(2*_lo-_dataPts[i]))/_weights[i];
00234       y-=_dataWgts[i]*exp(-0.5*chi*chi)/_weights[i];
00235     }
00236 //      if (_mirrorRight) {
00237 //        chi=(x-(2*_hi-_dataPts[i]))/_weights[i];
00238 //        y+=exp(-0.5*chi*chi)/_weights[i];
00239 //      }
00240     if (_asymRight) {
00241       chi=(x-(2*_hi-_dataPts[i]))/_weights[i];
00242       y-=_dataWgts[i]*exp(-0.5*chi*chi)/_weights[i];
00243     }
00244   }
00245   
00246   static const Double_t sqrt2pi(sqrt(2*TMath::Pi()));  
00247   return y/(sqrt2pi*_sumWgt);
00248 }
00249 
00250 
00251 //_____________________________________________________________________________
00252 Double_t RooKeysPdf::g(Double_t x,Double_t sigmav) const {
00253   
00254   Double_t c=Double_t(1)/(2*sigmav*sigmav);
00255 
00256   Double_t y=0;
00257   for (Int_t i=0;i<_nEvents;++i) {
00258     Double_t r=x-_dataPts[i];
00259     y+=exp(-c*r*r);
00260   }
00261   
00262   static const Double_t sqrt2pi(sqrt(2*TMath::Pi()));  
00263   return y/(sigmav*sqrt2pi*_nEvents);
00264 }