00001 // @(#)root/tmva $Id: SeparationBase.h 37986 2011-02-04 21:42:15Z pcanal $ 00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss 00003 00004 /********************************************************************************** 00005 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * 00006 * Package: TMVA * 00007 * Class : SeparationBase * 00008 * Web : http://tmva.sourceforge.net * 00009 * * 00010 * Description: An interface to different separation critiera useded in various * 00011 * training algorithms, as there are: * 00012 * Gini-Index, Cross Entropy, Misclassification Error, e.t.c. * 00013 * * 00014 * There are two things: the Separation Index, and the Separation Gain * 00015 * Separation Index: * 00016 * Measure of the "purity" of a sample. If all elements (events) in the * 00017 * sample belong to the same class (e.g. signal or backgr), than the * 00018 * separation index is 0 (meaning 100% purity (or 0% purity as it is * 00019 * symmetric. The index becomes maximal, for perfectly mixed samples * 00020 * eg. purity=50% , N_signal = N_bkg * 00021 * * 00022 * Separation Gain: * 00023 * the measure of how the quality of separation of the sample increases * 00024 * by splitting the sample e.g. into a "left-node" and a "right-node" * 00025 * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) * 00026 * this is then the quality crition which is optimized for when trying * 00027 * to increase the information in the system (making the best selection * 00028 * * 00029 * * 00030 * Authors (alphabetical): * 00031 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland * 00032 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * 00033 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada * 00034 * * 00035 * Copyright (c) 2005: * 00036 * CERN, Switzerland * 00037 * U. of Victoria, Canada * 00038 * Heidelberg U., Germany * 00039 * * 00040 * Redistribution and use in source and binary forms, with or without * 00041 * modification, are permitted according to the terms listed in LICENSE * 00042 * (http://tmva.sourceforge.net/LICENSE) * 00043 **********************************************************************************/ 00044 00045 #ifndef ROOT_TMVA_SeparationBase 00046 #define ROOT_TMVA_SeparationBase 00047 00048 ////////////////////////////////////////////////////////////////////////// 00049 // // 00050 // SeparationBase // 00051 // // 00052 // An interface to calculate the "SeparationGain" for different // 00053 // separation critiera used in various training algorithms // 00054 // // 00055 // There are two things: the Separation Index, and the Separation Gain // 00056 // Separation Index: // 00057 // Measure of the "purity" of a sample. If all elements (events) in the // 00058 // sample belong to the same class (e.g. signal or backgr), than the // 00059 // separation index is 0 (meaning 100% purity (or 0% purity as it is // 00060 // symmetric. The index becomes maximal, for perfectly mixed samples // 00061 // eg. purity=50% , N_signal = N_bkg // 00062 // // 00063 // Separation Gain: // 00064 // the measure of how the quality of separation of the sample increases // 00065 // by splitting the sample e.g. into a "left-node" and a "right-node" // 00066 // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) // 00067 // this is then the quality crition which is optimized for when trying // 00068 // to increase the information in the system (making the best selection // 00069 // // 00070 ////////////////////////////////////////////////////////////////////////// 00071 00072 #ifndef ROOT_Rtypes 00073 #include "Rtypes.h" 00074 #endif 00075 00076 #ifndef ROOT_TString 00077 #include "TString.h" 00078 #endif 00079 00080 #ifndef ROOT_TMath 00081 #include "TMath.h" 00082 #endif 00083 00084 #include <limits> 00085 00086 namespace TMVA { 00087 00088 class SeparationBase { 00089 00090 public: 00091 00092 // default constructor 00093 SeparationBase(); 00094 00095 //copy constructor 00096 SeparationBase( const SeparationBase& s ); 00097 00098 // destructor 00099 virtual ~SeparationBase(){} 00100 00101 // Return the gain in separation of the original sample is splitted in two sub-samples 00102 // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) 00103 Double_t GetSeparationGain( const Double_t& nSelS, const Double_t& nSelB, 00104 const Double_t& nTotS, const Double_t& nTotB ); 00105 00106 // Return the separation index (a measure for "purity" of the sample") 00107 virtual Double_t GetSeparationIndex( const Double_t &s, const Double_t &b ) = 0; 00108 00109 // Return the name of the concrete Index implementation 00110 const TString& GetName() { return fName; } 00111 00112 protected: 00113 00114 TString fName; // name of the concrete Separation Index impementation 00115 00116 Double_t fPrecisionCut; 00117 00118 ClassDef(SeparationBase,0) // Interface to different separation critiera used in training algorithms 00119 }; 00120 00121 00122 } // namespace TMVA 00123 00124 #endif