KDEKernel.cxx

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: KDEKernel.cxx 29195 2009-06-24 10:39:49Z brun $ 
00002 // Author: Asen Christov
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : TMVA::KDEKernel                                                       *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Implementation (see header for description)                               *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Asen Christov   <christov@physik.uni-freiburg.de> - Freiburg U., Germany  *
00015  *                                                                                *
00016  * Copyright (c) 2005:                                                            *
00017  *      CERN, Switzerland                                                         * 
00018  *      MPI-K Heidelberg, Germany                                                 * 
00019  *      Freiburg U., Germany                                                      * 
00020  *                                                                                *
00021  * Redistribution and use in source and binary forms, with or without             *
00022  * modification, are permitted according to the terms listed in LICENSE           *
00023  * (http://tmva.sourceforge.net/LICENSE)                                          *
00024  **********************************************************************************/
00025 
00026 #include "TH1.h"
00027 #include "TH1F.h"
00028 #include "TF1.h"
00029 
00030 #if ROOT_VERSION_CODE >= 364802
00031 #ifndef ROOT_TMathBase
00032 #include "TMathBase.h"
00033 #endif
00034 #else
00035 #ifndef ROOT_TMath
00036 #include "TMath.h"
00037 #endif
00038 #endif
00039 
00040 #include "TMVA/KDEKernel.h"
00041 #include "TMVA/MsgLogger.h"
00042 
00043 ClassImp(TMVA::KDEKernel)
00044 
00045 //_______________________________________________________________________
00046 TMVA::KDEKernel::KDEKernel( EKernelIter kiter, const TH1 *hist, Float_t lower_edge, Float_t upper_edge,
00047                             EKernelBorder kborder, Float_t FineFactor )
00048    : fSigma( 1. ),
00049      fIter ( kiter ),
00050      fLowerEdge (lower_edge ),
00051      fUpperEdge (upper_edge),
00052      fFineFactor ( FineFactor ),
00053      fKernel_integ ( 0 ),
00054      fKDEborder ( kborder ),
00055      fLogger( new MsgLogger("KDEKernel") )
00056 {
00057    // constructor
00058    // sanity check
00059    if (hist == NULL) {
00060       Log() << kFATAL << "Called without valid histogram pointer (hist)!" << Endl;
00061    } 
00062 
00063    fHist          = (TH1F*)hist->Clone();
00064    fFirstIterHist = (TH1F*)hist->Clone();
00065    fFirstIterHist->Reset(); // now it is empty but with the proper binning
00066    fSigmaHist     = (TH1F*)hist->Clone();
00067    fSigmaHist->Reset(); // now fSigmaHist is empty but with the proper binning
00068    
00069    fHiddenIteration=false;
00070 }
00071 
00072 //_______________________________________________________________________
00073 TMVA::KDEKernel::~KDEKernel()
00074 {
00075    // destructor
00076    if (fHist           != NULL) delete fHist;
00077    if (fFirstIterHist  != NULL) delete fFirstIterHist;
00078    if (fSigmaHist      != NULL) delete fSigmaHist;
00079    if (fKernel_integ   != NULL) delete fKernel_integ;
00080    delete fLogger;
00081 }
00082 
00083 //_______________________________________________________________________
00084 Double_t GaussIntegral(Double_t *x, Double_t *par)
00085 {
00086    // when using Gaussian as Kernel function this is faster way to calculate the integrals
00087    if ( (par[1]<=0) || (x[0]>x[1])) return -1.;
00088   
00089    Float_t xs1=(x[0]-par[0])/par[1];
00090    Float_t xs2=(x[1]-par[0])/par[1];
00091   
00092    if (xs1==0) {
00093       if (xs2==0) return 0.;
00094       if (xs2>0 ) return 0.5*TMath::Erf(xs2);
00095    }
00096    if (xs2==0) return 0.5*TMath::Erf(TMath::Abs(xs1));
00097    if (xs1>0) return 0.5*(TMath::Erf(xs2)-TMath::Erf(xs1));
00098    if (xs1<0) {
00099       if (xs2>0 ) return 0.5*(TMath::Erf(xs2)+TMath::Erf(TMath::Abs(xs1)));
00100       else return 0.5*(TMath::Erf(TMath::Abs(xs1))-TMath::Erf(TMath::Abs(xs2)));
00101    }
00102    return -1.;
00103 }
00104 
00105 //_______________________________________________________________________
00106 void TMVA::KDEKernel::SetKernelType( EKernelType ktype )
00107 {
00108    // fIter == 1 ---> nonadaptive KDE
00109    // fIter == 2 ---> adaptive KDE
00110  
00111    if (ktype == kGauss) {
00112 
00113       // i.e. gauss kernel
00114       //
00115       // this is going to be done for both (nonadaptive KDE and adaptive KDE)
00116       // for nonadaptive KDE this is the only = final thing to do
00117       // for adaptive KDE this is going to be used in the first (hidden) iteration
00118       fKernel_integ = new TF1("GaussIntegral",GaussIntegral,fLowerEdge,fUpperEdge,4); 
00119       fSigma = ( TMath::Sqrt(2.0)
00120                  *TMath::Power(4./3., 0.2)
00121                  *fHist->GetRMS()
00122                  *TMath::Power(fHist->Integral(), -0.2) ); 
00123       // this formula for sigma is valid for Gaussian Kernel function (nonadaptive KDE).
00124       // formula found in:
00125       // Multivariate Density Estimation, Theory, Practice and Visualization D. W. SCOTT, 1992 New York, Wiley
00126       if (fSigma <= 0 ) {
00127          Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
00128       }
00129    }
00130 
00131    if (fIter == kAdaptiveKDE) {
00132 
00133       // this is done only for adaptive KDE      
00134       
00135       // fill a temporary histo using nonadaptive KDE
00136       // this histo is identical with the final output when using only nonadaptive KDE
00137       fHiddenIteration=true;
00138       
00139       Float_t histoLowEdge=fHist->GetBinLowEdge(1);
00140       Float_t histoUpperEdge=fHist->GetBinLowEdge(fHist->GetNbinsX()+1);
00141 
00142       for (Int_t i=1;i<fHist->GetNbinsX();i++) {
00143          // loop over the bins of the original histo
00144          for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
00145             // loop over the bins of the PDF histo and fill it
00146             fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
00147                                     this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
00148                                                                fFirstIterHist->GetBinLowEdge(j+1),
00149                                                                fHist->GetBinCenter(i),
00150                                                                i)
00151                                     );
00152          }
00153          if (fKDEborder == 3) { // mirror the saples and fill them again
00154          // in order to save time do the mirroring only for the first (the lowwer) 1/5 of the histo to the left; 
00155          // and the last (the higher) 1/5 of the histo to the right.
00156          // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
00157             if (i < fHist->GetNbinsX()/5  ) {  // the first (the lowwer) 1/5 of the histo
00158                for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
00159                // loop over the bins of the PDF histo and fill it
00160                fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
00161                                        this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
00162                                                                fFirstIterHist->GetBinLowEdge(j+1),
00163                                                                2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
00164                                                                i)
00165                                       );
00166                }
00167             }
00168             if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
00169                for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
00170                // loop over the bins of the PDF histo and fill it
00171                fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
00172                                        this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
00173                                                                fFirstIterHist->GetBinLowEdge(j+1),
00174                                                                2*histoUpperEdge-fHist->GetBinCenter(i), // mirroring to the right
00175                                                                i)
00176                                       );
00177                }            
00178             }
00179          }
00180       }
00181       
00182       fFirstIterHist->SetEntries(fHist->GetEntries()); //set the number of entries to be the same as the original histo
00183 
00184       // do "function like" integration = sum of (bin_width*bin_content):
00185       Float_t integ=0;
00186       for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) 
00187          integ+=fFirstIterHist->GetBinContent(j)*fFirstIterHist->GetBinWidth(j);
00188       fFirstIterHist->Scale(1./integ);
00189       
00190       fHiddenIteration=false;
00191 
00192       // OK, now we have the first iteration, 
00193       // next: calculate the Sigmas (Widths) for the second (adaptive) iteration 
00194       // based on the output of the first iteration 
00195       // these Sigmas will be stored in histo called fSigmaHist
00196       for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
00197          // loop over the bins of the PDF histo and fill fSigmaHist
00198          if (fSigma*TMath::Sqrt(1.0/fFirstIterHist->GetBinContent(j)) <= 0 ) {
00199             Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
00200          }
00201          
00202          fSigmaHist->SetBinContent(j,fFineFactor*fSigma/TMath::Sqrt(fFirstIterHist->GetBinContent(j)));
00203       }
00204    }
00205 
00206    if (fKernel_integ ==0 ) {
00207       Log() << kFATAL << "KDE kernel not correctly initialized!" << Endl;
00208    }
00209 }
00210 
00211 //_______________________________________________________________________
00212 Float_t TMVA::KDEKernel::GetBinKernelIntegral( Float_t lowr, Float_t highr, Float_t mean, Int_t binnum )
00213 {
00214    // calculates the integral of the Kernel
00215    if ((fIter == kNonadaptiveKDE) || fHiddenIteration  ) 
00216       fKernel_integ->SetParameters(mean,fSigma); // non adaptive KDE
00217    else if ((fIter == kAdaptiveKDE) && !fHiddenIteration ) 
00218       fKernel_integ->SetParameters(mean,fSigmaHist->GetBinContent(binnum)); // adaptive KDE
00219 
00220    if ( fKDEborder == 2 ) {  // renormalization of the kernel function
00221       Float_t renormFactor=1.0/fKernel_integ->Eval(fLowerEdge,fUpperEdge);
00222       return (renormFactor*fKernel_integ->Eval(lowr,highr));
00223    }
00224                                    
00225    // the RenormFactor takes care aboud the "border" effects, i.e. sets the 
00226    // integral to one inside the histogram borders
00227    return (fKernel_integ->Eval(lowr,highr));  
00228 }
00229 

Generated on Tue Jul 5 15:25:00 2011 for ROOT_528-00b_version by  doxygen 1.5.1