00001 // @(#)root/tmva $Id: SeparationBase.cxx 37986 2011-02-04 21:42:15Z pcanal $ 00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss 00003 00004 /********************************************************************************** 00005 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * 00006 * Package: TMVA * 00007 * Class : SeparationBase * 00008 * Web : http://tmva.sourceforge.net * 00009 * * 00010 * Description: An interface to different separation critiera useded in various * 00011 * training algorithms, as there are: * 00012 * * 00013 * There are two things: the Separation Index, and the Separation Gain * 00014 * Separation Index: * 00015 * Measure of the "purity" of a sample. If all elements (events) in the * 00016 * sample belong to the same class (e.g. signal or backgr), than the * 00017 * separation index is 0 (meaning 100% purity (or 0% purity as it is * 00018 * symmetric. The index becomes maximal, for perfectly mixed samples * 00019 * eg. purity=50% , N_signal = N_bkg * 00020 * * 00021 * Separation Gain: * 00022 * the measure of how the quality of separation of the sample increases * 00023 * by splitting the sample e.g. into a "left-node" and a "right-node" * 00024 * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) * 00025 * this is then the quality crition which is optimized for when trying * 00026 * to increase the information in the system (making the best selection * 00027 * * 00028 * Authors (alphabetical): * 00029 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * 00030 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * 00031 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada * 00032 * * 00033 * Copyright (c) 2005: * 00034 * CERN, Switzerland * 00035 * U. of Victoria, Canada * 00036 * Heidelberg U., Germany * 00037 * * 00038 * Redistribution and use in source and binary forms, with or without * 00039 * modification, are permitted according to the terms listed in LICENSE * 00040 * (http://ttmva.sourceforge.net/LICENSE) * 00041 **********************************************************************************/ 00042 00043 #include "TMVA/SeparationBase.h" 00044 00045 ClassImp(TMVA::SeparationBase) 00046 00047 #include <limits> 00048 #include "TMath.h" 00049 00050 00051 TMVA::SeparationBase::SeparationBase() : 00052 fName(""), 00053 fPrecisionCut(TMath::Sqrt(std::numeric_limits<double>::epsilon())) 00054 { 00055 // default constructor 00056 } 00057 00058 //copy constructor 00059 TMVA::SeparationBase::SeparationBase( const SeparationBase& s ) : 00060 fName(s.fName), 00061 fPrecisionCut(TMath::Sqrt(std::numeric_limits<double>::epsilon())) 00062 { 00063 // copy constructor 00064 } 00065 00066 //_______________________________________________________________________ 00067 Double_t TMVA::SeparationBase::GetSeparationGain(const Double_t &nSelS, const Double_t& nSelB, 00068 const Double_t& nTotS, const Double_t& nTotB) 00069 { 00070 // Separation Gain: 00071 // the measure of how the quality of separation of the sample increases 00072 // by splitting the sample e.g. into a "left-node" and a "right-node" 00073 // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) 00074 // this is then the quality crition which is optimized for when trying 00075 // to increase the information in the system (making the best selection 00076 00077 if ( (nTotS-nSelS)==nSelS && (nTotB-nSelB)==nSelB) return 0.; 00078 00079 Double_t parentIndex = (nTotS+nTotB) *this->GetSeparationIndex(nTotS,nTotB); 00080 00081 Double_t leftIndex = ( ((nTotS - nSelS) + (nTotB - nSelB)) 00082 * this->GetSeparationIndex(nTotS-nSelS,nTotB-nSelB) ); 00083 Double_t rightIndex = (nSelS+nSelB) * this->GetSeparationIndex(nSelS,nSelB); 00084 00085 Double_t diff = parentIndex - leftIndex - rightIndex; 00086 // if (!fInit){ 00087 fPrecisionCut = (TMath::Sqrt(std::numeric_limits<double>::epsilon())); 00088 // fInit = kTRUE; 00089 // } 00090 if(diff/parentIndex<fPrecisionCut ) return 0; 00091 00092 return diff; 00093 } 00094 00095