ROOT_528-00b_version: tmva/src/LDA.cxx Source File

00001 // $Id: LDA.cxx 29195 2009-06-24 10:39:49Z brun $
00002 /**********************************************************************************
00003  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00004  * Package: TMVA                                                                  *
00005  * Class  : LDA                                                                   *
00006  * Web    : http://tmva.sourceforge.net                                           *
00007  *                                                                                *
00008  * Description:                                                                   *
00009  *      Local LDA method used by MethodKNN to compute MVA value.                  *
00010  *      This is experimental code under development. This class computes          *
00011  *      parameters of signal and background PDFs using Gaussian aproximation.     *
00012  *                                                                                *
00013  * Author:                                                                        *
00014  *      John Alison John.Alison@cern.ch - University of Pennsylvania, USA         *
00015  *                                                                                *
00016  * Copyright (c) 2007:                                                            *
00017  *      CERN, Switzerland                                                         * 
00018  *      MPI-K Heidelberg, Germany                                                 * 
00019  *      University of Pennsylvania, USA                                           *
00020  *                                                                                *
00021  * Redistribution and use in source and binary forms, with or without             *
00022  * modification, are permitted according to the terms listed in LICENSE           *
00023  * (http://tmva.sourceforge.net/LICENSE)                                          *
00024  **********************************************************************************/
00025 
00026 // Local
00027 #include "TMVA/LDA.h"
00028 
00029 // C/C++
00030 #include <iostream>
00031 
00032 #ifndef ROOT_TDecompSVD
00033 #include "TDecompSVD.h"       
00034 #endif
00035 #ifndef ROOT_TMatrixF
00036 #include "TMatrixF.h"       
00037 #endif
00038 #ifndef ROOT_TMath
00039 #include "TMath.h"       
00040 #endif
00041 
00042 #ifndef ROOT_TMVA_Types
00043 #include "TMVA/Types.h"       
00044 #endif
00045 #ifndef ROOT_TMVA_MsgLogger
00046 #include "TMVA/MsgLogger.h"       
00047 #endif
00048 
00049 //_______________________________________________________________________
00050 TMVA::LDA::LDA( Float_t tolerence, Bool_t debug ) 
00051    : fTolerence(tolerence),
00052      fNumParams(0),
00053      fSigma(0),
00054      fSigmaInverse(0),
00055      fDebug(debug),
00056      fLogger( new MsgLogger("LDA", (debug?kINFO:kDEBUG)) )
00057 {
00058    // constructor
00059 }
00060 
00061 //_______________________________________________________________________
00062 TMVA::LDA::~LDA()
00063 {
00064    // destructor
00065    delete fLogger;
00066 }
00067 
00068 //_______________________________________________________________________
00069 void TMVA::LDA::Initialize(const LDAEvents& inputSignalEvents, const LDAEvents& inputBackgroundEvents)
00070 {
00071    // Create LDA matrix using local events found by knn method
00072    Log() << kDEBUG << "There are: " << inputSignalEvents.size() << " input signal events " << Endl;
00073    Log() << kDEBUG << "There are: " << inputBackgroundEvents.size() << " input background events " << Endl;
00074 
00075    fNumParams = inputSignalEvents[0].size();
00076   
00077    UInt_t numSignalEvents = inputSignalEvents.size();
00078    UInt_t numBackEvents  = inputBackgroundEvents.size();
00079    UInt_t numTotalEvents = numSignalEvents + numBackEvents;
00080    fEventFraction[0] = (Float_t)numBackEvents/numTotalEvents;
00081    fEventFraction[1] = (Float_t)numSignalEvents/numTotalEvents;
00082    UInt_t K = 2;
00083 
00084    // get the mean of the signal and background for each parameter
00085    std::vector<Float_t> m_muSignal (fNumParams,0.0);
00086    std::vector<Float_t> m_muBackground (fNumParams,0.0);
00087    for (UInt_t param=0; param < fNumParams; ++param) {
00088       for (UInt_t eventNumber=0; eventNumber < numSignalEvents; ++eventNumber)
00089          m_muSignal[param] += inputSignalEvents[eventNumber][param];
00090       for (UInt_t eventNumber=0; eventNumber < numBackEvents; ++eventNumber)
00091          m_muBackground[param] += inputBackgroundEvents[eventNumber][param]/numBackEvents;
00092       m_muSignal[param] /= numSignalEvents;
00093       m_muBackground[param] /= numBackEvents;
00094    }
00095    fMu[0] = m_muBackground;
00096    fMu[1] = m_muSignal;
00097 
00098    if (fDebug) {
00099       Log() << kDEBUG << "the signal means" << Endl;
00100       for (UInt_t param=0; param < fNumParams; ++param)
00101          Log() << kDEBUG << m_muSignal[param] << Endl;
00102       Log() << kDEBUG << "the background means" << Endl;
00103       for (UInt_t param=0; param < inputBackgroundEvents[0].size(); ++param)
00104          Log() << kDEBUG << m_muBackground[param] << Endl;
00105    }
00106   
00107    // sigma is a sum of two symmetric matrices, one for the background and one for signal
00108    // get the matricies seperately (def not be the best way to do it!)
00109   
00110    // the signal, background, and total matrix
00111    TMatrixF sigmaSignal(fNumParams, fNumParams);
00112    TMatrixF sigmaBack(fNumParams, fNumParams);
00113    if (fSigma!=0) delete fSigma;
00114    fSigma = new TMatrixF(fNumParams, fNumParams);
00115    for (UInt_t row=0; row < fNumParams; ++row) {
00116       for (UInt_t col=0; col < fNumParams; ++col) {
00117          sigmaSignal[row][col] = 0;
00118          sigmaBack[row][col] = 0;
00119          (*fSigma)[row][col] = 0;
00120       }
00121    }
00122 
00123    for (UInt_t eventNumber=0; eventNumber < numSignalEvents; ++eventNumber) {
00124       for (UInt_t row=0; row < fNumParams; ++row) {
00125          for (UInt_t col=0; col < fNumParams; ++col) {
00126             sigmaSignal[row][col] += (inputSignalEvents[eventNumber][row] - m_muSignal[row]) * (inputSignalEvents[eventNumber][col] - m_muSignal[col] );
00127          }
00128       }
00129    }
00130   
00131    for (UInt_t eventNumber=0; eventNumber < numBackEvents; ++eventNumber) {
00132       for (UInt_t row=0; row < fNumParams; ++row) {
00133          for (UInt_t col=0; col < fNumParams; ++col) {
00134             sigmaBack[row][col] += (inputBackgroundEvents[eventNumber][row] - m_muBackground[row]) * (inputBackgroundEvents[eventNumber][col] - m_muBackground[col] );
00135          }
00136       }
00137    }   
00138 
00139    // the total matrix 
00140    *fSigma = sigmaBack + sigmaSignal;
00141    *fSigma *= 1.0/(numTotalEvents - K);
00142   
00143    if (fDebug) {
00144       Log() << "after filling sigmaSignal" <<Endl;
00145       sigmaSignal.Print();
00146       Log() << "after filling sigmaBack" <<Endl;
00147       sigmaBack.Print();
00148       Log() << "after filling total Sigma" <<Endl;
00149       fSigma->Print();
00150    }
00151 
00152    TDecompSVD solutionSVD = TDecompSVD( *fSigma );
00153    TMatrixF   decomposed  = TMatrixF( fNumParams, fNumParams );
00154    TMatrixF diag  ( fNumParams, fNumParams );
00155    TMatrixF uTrans( fNumParams, fNumParams );
00156    TMatrixF vTrans( fNumParams, fNumParams );
00157    if (solutionSVD.Decompose()) {
00158       for (UInt_t i = 0; i< fNumParams; ++i) {
00159          if (solutionSVD.GetSig()[i] > fTolerence)
00160             diag(i,i) = solutionSVD.GetSig()[i];
00161          else
00162             diag(i,i) = fTolerence;
00163       }
00164 
00165       if (fDebug) {
00166          Log() << "the diagonal" <<Endl;
00167          diag.Print();
00168       }
00169 
00170       decomposed = solutionSVD.GetV();
00171       decomposed *= diag;
00172       decomposed *= solutionSVD.GetU();
00173     
00174       if (fDebug) {
00175          Log() << "the decomposition " <<Endl;
00176          decomposed.Print();
00177       }
00178     
00179       *fSigmaInverse = uTrans.Transpose(solutionSVD.GetU());
00180       *fSigmaInverse /= diag;
00181       *fSigmaInverse *= vTrans.Transpose(solutionSVD.GetV());
00182 
00183       if (fDebug) {
00184          Log() << "the SigmaInverse " <<Endl;
00185          fSigmaInverse->Print();
00186         
00187          Log() << "the real " <<Endl;
00188          fSigma->Invert();
00189          fSigma->Print();
00190       
00191          Bool_t problem = false;
00192          for (UInt_t i =0; i< fNumParams; ++i) {
00193             for (UInt_t j =0; j< fNumParams; ++j) {
00194                if (TMath::Abs((Float_t)(*fSigma)(i,j) - (Float_t)(*fSigmaInverse)(i,j)) > 0.01) {
00195                   Log() << "problem, i= "<< i << " j= " << j << Endl; 
00196                   Log() << "Sigma(i,j)= "<< (*fSigma)(i,j) << " SigmaInverse(i,j)= " << (*fSigmaInverse)(i,j) <<Endl; 
00197                   Log() << "The difference is : " << TMath::Abs((Float_t)(*fSigma)(i,j) - (Float_t)(*fSigmaInverse)(i,j)) <<Endl;
00198                   problem = true;
00199                }
00200             }
00201          }
00202          if (problem) Log() << kWARNING << "Problem with the inversion!" << Endl;
00203          
00204       }    
00205    }
00206 }
00207 
00208 //_______________________________________________________________________
00209 Float_t TMVA::LDA::FSub(const std::vector<Float_t>& x, Int_t k)
00210 {
00211    //
00212    // Probability value using Gaussian approximation
00213    //
00214    Float_t prefactor  = 1.0/(TMath::TwoPi()*TMath::Sqrt(fSigma->Determinant()));
00215    std::vector<Float_t> m_transPoseTimesSigmaInverse;
00216   
00217    for (UInt_t j=0; j < fNumParams; ++j) {
00218       Float_t m_temp = 0;
00219       for (UInt_t i=0; i < fNumParams; ++i) {
00220          m_temp += (x[i] - fMu[k][i]) * (*fSigmaInverse)(j,i);
00221       }
00222       m_transPoseTimesSigmaInverse.push_back(m_temp);
00223    }
00224   
00225    Float_t exponent = 0.0;
00226    for (UInt_t i=0; i< fNumParams; ++i) {
00227       exponent += m_transPoseTimesSigmaInverse[i]*(x[i] - fMu[k][i]);
00228    }
00229   
00230    exponent *= -0.5;
00231 
00232    return prefactor*TMath::Exp( exponent );
00233 }
00234 
00235 //_______________________________________________________________________
00236 Float_t TMVA::LDA::GetProb(const std::vector<Float_t>& x, Int_t k)
00237 {
00238    //
00239    // Signal probability with Gaussian approximation
00240    //
00241    Float_t m_numerator = FSub(x,k)*fEventFraction[k];
00242    Float_t m_denominator = FSub(x,0)*fEventFraction[0]+FSub(x,1)*fEventFraction[1];
00243 
00244    return m_numerator/m_denominator;
00245 }
00246 
00247 //_______________________________________________________________________
00248 Float_t TMVA::LDA::GetLogLikelihood( const std::vector<Float_t>& x, Int_t k )
00249 {
00250    //
00251    // Log likelihood function with Gaussian approximation
00252    //
00253    return TMath::Log( FSub(x,k)/FSub(x,!k) ) + TMath::Log( fEventFraction[k]/fEventFraction[!k] );
00254 }