00001 // @(#)root/tmva $Id: RegressionVariance.h 29122 2009-06-22 06:51:30Z brun $ 00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss 00003 00004 /********************************************************************************** 00005 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * 00006 * Package: TMVA * 00007 * Class : RegressionVariance * 00008 * Web : http://tmva.sourceforge.net * 00009 * * 00010 * Description: Calculate the separation critiera useded in regression * 00011 * * 00012 * There are two things: the Separation Index, and the Separation Gain * 00013 * Separation Index: * 00014 * Measure of the "Variance" of a sample. * 00015 * * 00016 * Separation Gain: * 00017 * the measure of how the quality of separation of the sample increases * 00018 * by splitting the sample e.g. into a "left-node" and a "right-node" * 00019 * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) * 00020 * this is then the quality crition which is optimized for when trying * 00021 * to increase the information in the system (making the best selection * 00022 * * 00023 * * 00024 * Authors (alphabetical): * 00025 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * 00026 * * 00027 * Copyright (c) 2005: * 00028 * CERN, Switzerland * 00029 * U. of Victoria, Canada * 00030 * Heidelberg U., Germany * 00031 * * 00032 * Redistribution and use in source and binary forms, with or without * 00033 * modification, are permitted according to the terms listed in LICENSE * 00034 * (http://tmva.sourceforge.net/LICENSE) * 00035 **********************************************************************************/ 00036 00037 #ifndef ROOT_TMVA_RegressionVariance 00038 #define ROOT_TMVA_RegressionVariance 00039 00040 ////////////////////////////////////////////////////////////////////////// 00041 // // 00042 // RegressionVariance // 00043 // // 00044 // Calculate the "SeparationGain" for Regression analysis // 00045 // separation critiera used in various training algorithms // 00046 // // 00047 // There are two things: the Separation Index, and the Separation Gain // 00048 // Separation Index: // 00049 // Measure of the "Variance" of a sample. // 00050 // // 00051 // Separation Gain: // 00052 // the measure of how the quality of separation of the sample increases // 00053 // by splitting the sample e.g. into a "left-node" and a "right-node" // 00054 // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) // 00055 // this is then the quality crition which is optimized for when trying // 00056 // to increase the information in the system (making the best selection // 00057 // // 00058 ////////////////////////////////////////////////////////////////////////// 00059 00060 #ifndef ROOT_Rtypes 00061 #include "Rtypes.h" 00062 #endif 00063 00064 #ifndef ROOT_TString 00065 #include "TString.h" 00066 #endif 00067 00068 namespace TMVA { 00069 00070 class RegressionVariance { 00071 00072 public: 00073 00074 //default constructor 00075 RegressionVariance(){fName = "Variance for Regression";} 00076 00077 //copy constructor 00078 RegressionVariance( const RegressionVariance& s ): fName ( s.fName ) {} 00079 00080 // destructor 00081 virtual ~RegressionVariance(){} 00082 00083 // Return the gain in separation of the original sample is splitted in two sub-samples 00084 // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) 00085 Double_t GetSeparationGain( const Double_t &nLeft, const Double_t &targetLeft, const Double_t &target2Left, 00086 const Double_t &nTot, const Double_t &targetTot, const Double_t &target2Tot ); 00087 00088 // Return the separation index (a measure for "purity" of the sample") 00089 virtual Double_t GetSeparationIndex( const Double_t &n, const Double_t &target, const Double_t &target2 ); 00090 00091 // Return the name of the concrete Index implementation 00092 TString GetName() { return fName; } 00093 00094 protected: 00095 00096 TString fName; // name of the concrete Separation Index impementation 00097 00098 ClassDef(RegressionVariance,0) // Interface to different separation critiera used in training algorithms 00099 }; 00100 00101 00102 } // namespace TMVA 00103 00104 #endif