00001 // @(#)root/tmva $Id: GiniIndex.cxx 33928 2010-06-15 16:19:31Z stelzer $ 00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss 00003 00004 /********************************************************************************** 00005 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * 00006 * Package: TMVA * 00007 * Class : TMVA::GiniIndex * 00008 * Web : http://tmva.sourceforge.net * 00009 * * 00010 * Description: Implementation of the GiniIndex as separation criterion * 00011 * Large Gini Indices (maximum 0.5) mean , that the sample is well * 00012 * mixed (same amount of signal and bkg) * 00013 * bkg. Small Indices mean, well separated. * 00014 * general defniniton: * 00015 * Gini(Sample M) = 1 - (c(1)/N)^2 - (c(2)/N)^2 .... - (c(k)/N)^2 * 00016 * Where: M is a smaple of whatever N elements (events) * 00017 * that belong to K different classes * 00018 * c(k) is the number of elements that belong to class k * 00019 * for just Signal and Background classes this boils down to: * 00020 * Gini(Sample) = 2s*b/(s+b)^2 * 00021 * * 00022 * Authors (alphabetical): * 00023 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * 00024 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * 00025 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada * 00026 * * 00027 * Copyright (c) 2005: * 00028 * CERN, Switzerland * 00029 * U. of Victoria, Canada * 00030 * Heidelberg U., Germany * 00031 * * 00032 * Redistribution and use in source and binary forms, with or without * 00033 * modification, are permitted according to the terms listed in LICENSE * 00034 * (http://tmva.sourceforge.net/LICENSE) * 00035 **********************************************************************************/ 00036 00037 //_______________________________________________________________________ 00038 // 00039 // Implementation of the GiniIndex as separation criterion 00040 // 00041 //_______________________________________________________________________ 00042 00043 #include "TMVA/GiniIndex.h" 00044 00045 ClassImp(TMVA::GiniIndex) 00046 00047 //_______________________________________________________________________ 00048 Double_t TMVA::GiniIndex::GetSeparationIndex( const Double_t &s, const Double_t &b ) 00049 { 00050 // Gini(Sample M) = 1 - (c(1)/N)^2 - (c(2)/N)^2 .... - (c(k)/N)^2 00051 // Where: M is a smaple of whatever N elements (events) 00052 // that belong to K different classes 00053 // c(k) is the number of elements that belong to class k 00054 // for just Signal and Background classes this boils down to: 00055 // Gini(Sample) = 2s*b/(s+b)^2 ( = 2 * purity * (1-purity) ) 00056 // 00057 // !! what we use here is 2*Gini.. as for the later use the factor 00058 // 2 is irrelevant and hence I'd like to save this calculation 00059 00060 if (s+b <= 0) return 0; 00061 if (s<=0 || b <=0) return 0; 00062 else return s*b/(s+b)/(s+b); 00063 } 00064 00065