00001 // @(#)root/tmva $Id: RegressionVariance.cxx 37513 2010-12-10 14:29:06Z stelzer $ 00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss 00003 00004 /********************************************************************************** 00005 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * 00006 * Package: TMVA * 00007 * Class : RegressionVariance * 00008 * Web : http://tmva.sourceforge.net * 00009 * * 00010 * Description: Calculate the separation critiera useded in regression * 00011 * * 00012 * There are two things: the Separation Index, and the Separation Gain * 00013 * Separation Index: * 00014 * Measure of the "Variance" of a sample. * 00015 * * 00016 * Separation Gain: * 00017 * the measure of how the quality of separation of the sample increases * 00018 * by splitting the sample e.g. into a "left-node" and a "right-node" * 00019 * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) * 00020 * this is then the quality crition which is optimized for when trying * 00021 * to increase the information in the system (making the best selection * 00022 * * 00023 * * 00024 * Authors (alphabetical): * 00025 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany * 00026 * * 00027 * Copyright (c) 2005: * 00028 * CERN, Switzerland * 00029 * U. of Victoria, Canada * 00030 * Heidelberg U., Germany * 00031 * * 00032 * Redistribution and use in source and binary forms, with or without * 00033 * modification, are permitted according to the terms listed in LICENSE * 00034 * (http://ttmva.sourceforge.net/LICENSE) * 00035 **********************************************************************************/ 00036 #include <iostream> 00037 #include "TMath.h" 00038 #include "TMVA/RegressionVariance.h" 00039 00040 ClassImp(TMVA::RegressionVariance) 00041 00042 //_______________________________________________________________________ 00043 Double_t TMVA::RegressionVariance::GetSeparationGain(const Double_t &nLeft, 00044 const Double_t& targetLeft , const Double_t& target2Left , 00045 const Double_t &nTot, 00046 const Double_t& targetTot , const Double_t& target2Tot) 00047 { 00048 // Separation Gain: 00049 // the measure of how the quality of separation of the sample increases 00050 // by splitting the sample e.g. into a "left-node" and a "right-node" 00051 // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) 00052 // this is then the quality crition which is optimized for when trying 00053 // to increase the information in the system 00054 // for the Regression: as the "Gain is maximised", the RMS (sqrt(variance)) 00055 // which is used as a "separation" index should be as small as possible. 00056 // the "figure of merit" here has to be -(rms left+rms-right) or 1/rms... 00057 00058 00059 if ( nTot==nLeft || nLeft==0 ) return 0.; 00060 00061 Double_t parentIndex = nTot * this->GetSeparationIndex(nTot,targetTot,target2Tot); 00062 Double_t leftIndex = ( (nTot - nLeft) * this->GetSeparationIndex(nTot-nLeft,targetTot-targetLeft,target2Tot-target2Left) ); 00063 Double_t rightIndex = nLeft * this->GetSeparationIndex(nLeft,targetLeft,target2Left); 00064 00065 // return 1/ (leftIndex + rightIndex); 00066 return (parentIndex - leftIndex - rightIndex)/(parentIndex); 00067 } 00068 00069 //_______________________________________________________________________ 00070 Double_t TMVA::RegressionVariance::GetSeparationIndex(const Double_t& n, 00071 const Double_t& target , const Double_t& target2) 00072 { 00073 // Separation Index: a simple Variance 00074 00075 // return TMath::Sqrt(( target2 - target*target/n) / n); 00076 return ( target2 - target*target/n) / n; 00077 00078 } 00079 00080 00081