SeparationBase.h

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: SeparationBase.h 37986 2011-02-04 21:42:15Z pcanal $
00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss 
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : SeparationBase                                                        *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description: An interface to different separation critiera useded in various   *
00011  *              training algorithms, as there are:                                *
00012  *              Gini-Index, Cross Entropy, Misclassification Error, e.t.c.        *
00013  *                                                                                *
00014  *          There are two things: the Separation Index, and the Separation Gain   *
00015  *          Separation Index:                                                     *
00016  *          Measure of the "purity" of a sample. If all elements (events) in the  *
00017  *          sample belong to the same class (e.g. signal or backgr), than the     *
00018  *          separation index is 0 (meaning 100% purity (or 0% purity as it is     *
00019  *          symmetric. The index becomes maximal, for perfectly mixed samples     *
00020  *          eg. purity=50% , N_signal = N_bkg                                     *
00021  *                                                                                *
00022  *          Separation Gain:                                                      *
00023  *          the measure of how the quality of separation of the sample increases  *
00024  *          by splitting the sample e.g. into a "left-node" and a "right-node"    *
00025  *          (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)  *
00026  *          this is then the quality crition which is optimized for when trying   *
00027  *          to increase the information in the system (making the best selection  *
00028  *                                                                                *
00029  *                                                                                *
00030  * Authors (alphabetical):                                                        *
00031  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
00032  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
00033  *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada         *
00034  *                                                                                *
00035  * Copyright (c) 2005:                                                            *
00036  *      CERN, Switzerland                                                         * 
00037  *      U. of Victoria, Canada                                                    * 
00038  *      Heidelberg U., Germany                                                    * 
00039  *                                                                                *
00040  * Redistribution and use in source and binary forms, with or without             *
00041  * modification, are permitted according to the terms listed in LICENSE           *
00042  * (http://tmva.sourceforge.net/LICENSE)                                          *
00043  **********************************************************************************/
00044 
00045 #ifndef ROOT_TMVA_SeparationBase
00046 #define ROOT_TMVA_SeparationBase
00047 
00048 //////////////////////////////////////////////////////////////////////////
00049 //                                                                      //
00050 // SeparationBase                                                       //
00051 //                                                                      //
00052 // An interface to calculate the "SeparationGain" for different         //
00053 // separation critiera used in various training algorithms              //
00054 //                                                                      //
00055 // There are two things: the Separation Index, and the Separation Gain  //
00056 // Separation Index:                                                    //
00057 // Measure of the "purity" of a sample. If all elements (events) in the //
00058 // sample belong to the same class (e.g. signal or backgr), than the    //
00059 // separation index is 0 (meaning 100% purity (or 0% purity as it is    //
00060 // symmetric. The index becomes maximal, for perfectly mixed samples    //
00061 // eg. purity=50% , N_signal = N_bkg                                    //
00062 //                                                                      //
00063 // Separation Gain:                                                     //
00064 // the measure of how the quality of separation of the sample increases //
00065 // by splitting the sample e.g. into a "left-node" and a "right-node"   //
00066 // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) //
00067 // this is then the quality crition which is optimized for when trying  //
00068 // to increase the information in the system (making the best selection //
00069 //                                                                      //
00070 //////////////////////////////////////////////////////////////////////////
00071 
00072 #ifndef ROOT_Rtypes
00073 #include "Rtypes.h"
00074 #endif
00075 
00076 #ifndef ROOT_TString
00077 #include "TString.h"
00078 #endif
00079 
00080 #ifndef ROOT_TMath
00081 #include "TMath.h"
00082 #endif
00083 
00084 #include <limits>
00085 
00086 namespace TMVA {
00087 
00088    class SeparationBase {
00089 
00090    public:
00091 
00092       // default constructor
00093       SeparationBase();
00094 
00095       //copy constructor
00096       SeparationBase( const SeparationBase& s );
00097 
00098       // destructor
00099       virtual ~SeparationBase(){}
00100 
00101       // Return the gain in separation of the original sample is splitted in two sub-samples
00102       // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
00103       Double_t GetSeparationGain( const Double_t& nSelS, const Double_t& nSelB,
00104                                   const Double_t& nTotS, const Double_t& nTotB );
00105 
00106       // Return the separation index (a measure for "purity" of the sample")
00107       virtual Double_t GetSeparationIndex( const Double_t &s, const Double_t &b ) = 0;
00108 
00109       // Return the name of the concrete Index implementation
00110       const TString& GetName() { return fName; }
00111 
00112    protected:
00113 
00114       TString fName;  // name of the concrete Separation Index impementation
00115 
00116       Double_t fPrecisionCut;
00117 
00118       ClassDef(SeparationBase,0) // Interface to different separation critiera used in training algorithms
00119    };
00120 
00121 
00122 } // namespace TMVA
00123 
00124 #endif

Generated on Tue Jul 5 14:27:38 2011 for ROOT_528-00b_version by  doxygen 1.5.1