MethodDT.cxx

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodDT.cxx 36966 2010-11-26 09:50:13Z evt $
00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodDT (DT = Decision Trees)                                         *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Analysis of Boosted Decision Trees                                        *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
00015  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
00016  *      Or Cohen        <orcohenor@gmail.com>    - Weizmann Inst., Israel         *
00017  *                                                                                *
00018  * Copyright (c) 2005:                                                            *
00019  *      CERN, Switzerland                                                         *
00020  *      MPI-K Heidelberg, Germany                                                 *
00021  *                                                                                *
00022  * Redistribution and use in source and binary forms, with or without             *
00023  * modification, are permitted according to the terms listed in LICENSE           *
00024  * (http://tmva.sourceforge.net/LICENSE)                                          *
00025  **********************************************************************************/
00026 
00027 //_______________________________________________________________________
00028 //
00029 // Analysis of Boosted Decision Trees
00030 //
00031 // Boosted decision trees have been successfully used in High Energy
00032 // Physics analysis for example by the MiniBooNE experiment
00033 // (Yang-Roe-Zhu, physics/0508045). In Boosted Decision Trees, the
00034 // selection is done on a majority vote on the result of several decision
00035 // trees, which are all derived from the same training sample by
00036 // supplying different event weights during the training.
00037 //
00038 // Decision trees:
00039 //
00040 // successive decision nodes are used to categorize the
00041 // events out of the sample as either signal or background. Each node
00042 // uses only a single discriminating variable to decide if the event is
00043 // signal-like ("goes right") or background-like ("goes left"). This
00044 // forms a tree like structure with "baskets" at the end (leave nodes),
00045 // and an event is classified as either signal or background according to
00046 // whether the basket where it ends up has been classified signal or
00047 // background during the training. Training of a decision tree is the
00048 // process to define the "cut criteria" for each node. The training
00049 // starts with the root node. Here one takes the full training event
00050 // sample and selects the variable and corresponding cut value that gives
00051 // the best separation between signal and background at this stage. Using
00052 // this cut criterion, the sample is then divided into two subsamples, a
00053 // signal-like (right) and a background-like (left) sample. Two new nodes
00054 // are then created for each of the two sub-samples and they are
00055 // constructed using the same mechanism as described for the root
00056 // node. The devision is stopped once a certain node has reached either a
00057 // minimum number of events, or a minimum or maximum signal purity. These
00058 // leave nodes are then called "signal" or "background" if they contain
00059 // more signal respective background events from the training sample.
00060 //
00061 // Boosting:
00062 //
00063 // the idea behind the boosting is, that signal events from the training
00064 // sample, that *end up in a background node (and vice versa) are given a
00065 // larger weight than events that are in the correct leave node. This
00066 // results in a re-weighed training event sample, with which then a new
00067 // decision tree can be developed. The boosting can be applied several
00068 // times (typically 100-500 times) and one ends up with a set of decision
00069 // trees (a forest).
00070 //
00071 // Bagging:
00072 //
00073 // In this particular variant of the Boosted Decision Trees the boosting
00074 // is not done on the basis of previous training results, but by a simple
00075 // stochasitc re-sampling of the initial training event sample.
00076 //
00077 // Analysis:
00078 //
00079 // applying an individual decision tree to a test event results in a
00080 // classification of the event as either signal or background. For the
00081 // boosted decision tree selection, an event is successively subjected to
00082 // the whole set of decision trees and depending on how often it is
00083 // classified as signal, a "likelihood" estimator is constructed for the
00084 // event being signal or background. The value of this estimator is the
00085 // one which is then used to select the events from an event sample, and
00086 // the cut value on this estimator defines the efficiency and purity of
00087 // the selection.
00088 //*
00089 //_______________________________________________________________________
00090 
00091 #include <algorithm>
00092 #include "Riostream.h"
00093 #include "TRandom3.h"
00094 #include "TMath.h"
00095 #include "TObjString.h"
00096 
00097 #include "TMVA/ClassifierFactory.h"
00098 #include "TMVA/MethodDT.h"
00099 #include "TMVA/Tools.h"
00100 #include "TMVA/Timer.h"
00101 #include "TMVA/Ranking.h"
00102 #include "TMVA/SdivSqrtSplusB.h"
00103 #include "TMVA/BinarySearchTree.h"
00104 #include "TMVA/SeparationBase.h"
00105 #include "TMVA/GiniIndex.h"
00106 #include "TMVA/CrossEntropy.h"
00107 #include "TMVA/MisClassificationError.h"
00108 #include "TMVA/MethodBoost.h"
00109 #include "TMVA/CCPruner.h"
00110 
00111 using std::vector;
00112 
00113 REGISTER_METHOD(DT)
00114 
00115 ClassImp(TMVA::MethodDT)
00116 
00117 //_______________________________________________________________________
00118 TMVA::MethodDT::MethodDT( const TString& jobName,
00119                           const TString& methodTitle,
00120                           DataSetInfo& theData,
00121                           const TString& theOption,
00122                           TDirectory* theTargetDir ) :
00123    TMVA::MethodBase( jobName, Types::kDT, methodTitle, theData, theOption, theTargetDir )
00124    , fTree(0)
00125    , fNodeMinEvents(0)
00126    , fNCuts(0)
00127    , fUseYesNoLeaf(kFALSE)
00128    , fNodePurityLimit(0)
00129    , fNNodesMax(0)
00130    , fMaxDepth(0)
00131    , fErrorFraction(0)
00132    , fPruneStrength(0)
00133    , fPruneMethod(DecisionTree::kNoPruning)
00134    , fAutomatic(kFALSE)
00135    , fRandomisedTrees(kFALSE)
00136    , fUseNvars(0)
00137    , fPruneBeforeBoost(kFALSE)
00138    , fDeltaPruneStrength(0)
00139 {
00140    // the standard constructor for just an ordinar "decision trees"
00141 }
00142 
00143 //_______________________________________________________________________
00144 TMVA::MethodDT::MethodDT( DataSetInfo& dsi,
00145                           const TString& theWeightFile,
00146                           TDirectory* theTargetDir ) :
00147    TMVA::MethodBase( Types::kDT, dsi, theWeightFile, theTargetDir )
00148    , fTree(0)
00149    , fNodeMinEvents(0)
00150    , fNCuts(0)
00151    , fUseYesNoLeaf(kFALSE)
00152    , fNodePurityLimit(0)
00153    , fNNodesMax(0)
00154    , fMaxDepth(0)
00155    , fErrorFraction(0)
00156    , fPruneStrength(0)
00157    , fPruneMethod(DecisionTree::kNoPruning)
00158    , fAutomatic(kFALSE)
00159    , fRandomisedTrees(kFALSE)
00160    , fUseNvars(0)
00161    , fPruneBeforeBoost(kFALSE)
00162    , fDeltaPruneStrength(0)
00163 {
00164    //constructor from Reader
00165 }
00166 
00167 //_______________________________________________________________________
00168 Bool_t TMVA::MethodDT::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ )
00169 {
00170    // FDA can handle classification with 2 classes and regression with one regression-target
00171    if( type == Types::kClassification && numberClasses == 2 ) return kTRUE;
00172    return kFALSE;
00173 }
00174 
00175 
00176 //_______________________________________________________________________
00177 void TMVA::MethodDT::DeclareOptions()
00178 {
00179    // define the options (their key words) that can be set in the option string
00180    // UseRandomisedTrees  choose at each node splitting a random set of variables 
00181    // UseNvars         use UseNvars variables in randomised trees
00182    // SeparationType   the separation criterion applied in the node splitting
00183    //                  known: GiniIndex
00184    //                         MisClassificationError
00185    //                         CrossEntropy
00186    //                         SDivSqrtSPlusB
00187    // nEventsMin:      the minimum number of events in a node (leaf criteria, stop splitting)
00188    // nCuts:           the number of steps in the optimisation of the cut for a node (if < 0, then
00189    //                  step size is determined by the events)
00190    // UseYesNoLeaf     decide if the classification is done simply by the node type, or the S/B
00191    //                  (from the training) in the leaf node
00192    // NodePurityLimit  the minimum purity to classify a node as a signal node (used in pruning and boosting to determine
00193    //                  misclassification error rate)
00194    // PruneMethod      The Pruning method: 
00195    //                  known: NoPruning  // switch off pruning completely
00196    //                         ExpectedError
00197    //                         CostComplexity 
00198    // PruneStrength    a parameter to adjust the amount of pruning. Should be large enouth such that overtraining is avoided");
00199 
00200    DeclareOptionRef(fRandomisedTrees,"UseRandomisedTrees","Choose at each node splitting a random set of variables and *bagging*");
00201    DeclareOptionRef(fUseNvars,"UseNvars","Number of variables used if randomised Tree option is chosen");
00202    DeclareOptionRef(fUseYesNoLeaf=kTRUE, "UseYesNoLeaf", 
00203                     "Use Sig or Bkg node type or the ratio S/B as classification in the leaf node");
00204    DeclareOptionRef(fNodePurityLimit=0.5, "NodePurityLimit", "In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
00205    DeclareOptionRef(fPruneBeforeBoost=kFALSE, "PruneBeforeBoost", 
00206                     "Whether to perform the prune process right after the training or after the boosting");
00207    DeclareOptionRef(fSepTypeS="GiniIndex", "SeparationType", "Separation criterion for node splitting");
00208    AddPreDefVal(TString("MisClassificationError"));
00209    AddPreDefVal(TString("GiniIndex"));
00210    AddPreDefVal(TString("CrossEntropy"));
00211    AddPreDefVal(TString("SDivSqrtSPlusB"));
00212    DeclareOptionRef(fNodeMinEvents, "nEventsMin", "Minimum number of events in a leaf node (default: max(20, N_train/(Nvar^2)/10) ) ");
00213    DeclareOptionRef(fNCuts, "nCuts", "Number of steps during node cut optimisation");
00214    DeclareOptionRef(fPruneStrength, "PruneStrength", "Pruning strength (negative value == automatic adjustment)");
00215    DeclareOptionRef(fPruneMethodS, "PruneMethod", "Pruning method: NoPruning (switched off), ExpectedError or CostComplexity");
00216    
00217    AddPreDefVal(TString("NoPruning"));
00218    AddPreDefVal(TString("ExpectedError"));
00219    AddPreDefVal(TString("CostComplexity"));
00220 
00221    DeclareOptionRef(fNNodesMax=100000,"NNodesMax","Max number of nodes in tree");
00222    if (DoRegression()) {
00223       DeclareOptionRef(fMaxDepth=50,"MaxDepth","Max depth of the decision tree allowed");
00224    }else{
00225       DeclareOptionRef(fMaxDepth=3,"MaxDepth","Max depth of the decision tree allowed");
00226    }
00227 }
00228 
00229 //_______________________________________________________________________
00230 void TMVA::MethodDT::ProcessOptions() 
00231 {
00232    // the option string is decoded, for available options see "DeclareOptions"
00233    fSepTypeS.ToLower();
00234    if      (fSepTypeS == "misclassificationerror") fSepType = new MisClassificationError();
00235    else if (fSepTypeS == "giniindex")              fSepType = new GiniIndex();
00236    else if (fSepTypeS == "crossentropy")           fSepType = new CrossEntropy();
00237    else if (fSepTypeS == "sdivsqrtsplusb")         fSepType = new SdivSqrtSplusB();
00238    else {
00239       Log() << kINFO << GetOptions() << Endl;
00240       Log() << kFATAL << "<ProcessOptions> unknown Separation Index option called" << Endl;
00241    }     
00242 
00243    //   std::cout << "fSeptypes " << fSepTypeS << "  fseptype " << fSepType << std::endl;
00244 
00245    fPruneMethodS.ToLower();
00246    if      (fPruneMethodS == "expectederror" )   fPruneMethod = DecisionTree::kExpectedErrorPruning;
00247    else if (fPruneMethodS == "costcomplexity" )  fPruneMethod = DecisionTree::kCostComplexityPruning;
00248    else if (fPruneMethodS == "nopruning" )       fPruneMethod = DecisionTree::kNoPruning;
00249    else {
00250       Log() << kINFO << GetOptions() << Endl;
00251       Log() << kFATAL << "<ProcessOptions> unknown PruneMethod option called" << Endl;
00252    }
00253 
00254    if (fPruneStrength < 0) fAutomatic = kTRUE;
00255    else fAutomatic = kFALSE;
00256    if (fAutomatic && fPruneMethod==!DecisionTree::kCostComplexityPruning){
00257       Log() << kFATAL 
00258             <<  "Sorry autmoatic pruning strength determination is not implemented yet for ExpectedErrorPruning" << Endl;
00259    }
00260 
00261 
00262    if (this->Data()->HasNegativeEventWeights()){
00263       Log() << kINFO << " You are using a Monte Carlo that has also negative weights. "
00264               << "That should in principle be fine as long as on average you end up with "
00265               << "something positive. For this you have to make sure that the minimal number "
00266               << "of (unweighted) events demanded for a tree node (currently you use: nEventsMin="
00267               <<fNodeMinEvents<<", you can set this via the BDT option string when booking the "
00268               << "classifier) is large enough to allow for reasonable averaging!!! "
00269               << " If this does not help.. maybe you want to try the option: NoNegWeightsInTraining  "
00270               << "which ignores events with negative weight in the training. " << Endl
00271               << Endl << "Note: You'll get a WARNING message during the training if that should ever happen" << Endl;
00272    }
00273    
00274    if (fRandomisedTrees){
00275       Log() << kINFO << " Randomised trees should use *bagging* as *boost* method. Did you set this in the *MethodBoost* ? . Here I can enforce only the *no pruning*" << Endl;
00276       fPruneMethod = DecisionTree::kNoPruning;
00277       //      fBoostType   = "Bagging";
00278    }
00279 
00280 }
00281 
00282 //_______________________________________________________________________
00283 void TMVA::MethodDT::Init( void )
00284 {
00285    // common initialisation with defaults for the DT-Method
00286    fNodeMinEvents  = TMath::Max( 20, int( Data()->GetNTrainingEvents() / (10*GetNvar()*GetNvar())) );
00287    fNCuts          = 20; 
00288    fPruneMethod    = DecisionTree::kNoPruning;
00289    fPruneStrength  = 5;     // means automatic determination of the prune strength using a validation sample  
00290    fDeltaPruneStrength=0.1;
00291    fRandomisedTrees= kFALSE;
00292    fUseNvars       = GetNvar();
00293 
00294    // reference cut value to distingiush signal-like from background-like events   
00295    SetSignalReferenceCut( 0 );
00296    if (fAnalysisType == Types::kClassification || fAnalysisType == Types::kMulticlass ) {
00297       fMaxDepth        = 3;
00298    }else {
00299       fMaxDepth = 50;
00300    }
00301 }
00302 
00303 //_______________________________________________________________________
00304 TMVA::MethodDT::~MethodDT( void )
00305 {
00306    //destructor
00307    delete fTree;
00308 }
00309 
00310 //_______________________________________________________________________
00311 void TMVA::MethodDT::Train( void )
00312 {
00313    TMVA::DecisionTreeNode::fgIsTraining=true;
00314    fTree = new DecisionTree( fSepType, fNodeMinEvents, fNCuts, 0, 
00315                              fRandomisedTrees, fUseNvars, fNNodesMax, fMaxDepth,0 );
00316    if (fRandomisedTrees) Log()<<kWARNING<<" randomised Trees do not work yet in this framework," 
00317                                 << " as I do not know how to give each tree a new random seed, now they"
00318                                 << " will be all the same and that is not good " << Endl;
00319    fTree->SetAnalysisType( GetAnalysisType() );
00320 
00321    fTree->BuildTree(GetEventCollection(Types::kTraining));
00322    TMVA::DecisionTreeNode::fgIsTraining=false;
00323 }
00324 
00325 //_______________________________________________________________________
00326 Bool_t TMVA::MethodDT::MonitorBoost( MethodBoost* booster )
00327 {
00328    Int_t methodIndex = booster->GetMethodIndex();
00329    if (booster->GetBoostStage() == Types::kBoostProcBegin)
00330       {
00331          booster->AddMonitoringHist(new TH1I("NodesBeforePruning","nodes before pruning",booster->GetBoostNum(),0,booster->GetBoostNum()));
00332          booster->AddMonitoringHist(new TH1I("NodesAfterPruning","nodes after pruning",booster->GetBoostNum(),0,booster->GetBoostNum()));
00333          booster->AddMonitoringHist(new TH1D("PruneStrength","prune  strength",booster->GetBoostNum(),0,booster->GetBoostNum()));
00334       }
00335 
00336    if (booster->GetBoostStage() == Types::kBeforeTraining)
00337       {
00338          if (methodIndex == 0)
00339             {
00340                booster->GetMonitoringHist(2)->SetXTitle("#tree");
00341                booster->GetMonitoringHist(2)->SetYTitle("PruneStrength");
00342                //dividing the data set for pruning where strength is calculated automatically
00343                if (fAutomatic)
00344                   {
00345                      Data()->DivideTrainingSet(2);
00346                      Data()->MoveTrainingBlock(1,Types::kValidation,kTRUE);
00347                   }
00348             }
00349       }
00350    else if (booster->GetBoostStage() == Types::kBeforeBoosting)
00351       booster->GetMonitoringHist(0)->SetBinContent(booster->GetBoostNum()+1,fTree->GetNNodes());
00352 
00353    if (booster->GetBoostStage() == ((fPruneBeforeBoost)?Types::kBeforeBoosting:Types::kBoostValidation)
00354        && !(fPruneMethod == DecisionTree::kNoPruning)) {
00355       
00356       if (methodIndex==0 && fPruneBeforeBoost == kFALSE)
00357          Log() << kINFO << "Pruning "<< booster->GetBoostNum() << " Decision Trees ... patience please" << Endl;
00358          
00359       //reading the previous value
00360       if (fAutomatic && methodIndex > 0) {
00361          MethodDT* mdt = dynamic_cast<MethodDT*>(booster->GetPreviousMethod());
00362          if(mdt)
00363             fPruneStrength = mdt->GetPruneStrength();
00364       }
00365 
00366       booster->GetMonitoringHist(0)->SetBinContent(methodIndex+1,fTree->GetNNodes());
00367       booster->GetMonitoringHist(2)->SetBinContent(methodIndex+1,PruneTree(methodIndex));
00368       booster->GetMonitoringHist(1)->SetBinContent(methodIndex+1,fTree->GetNNodes());
00369    } // no pruning is performed
00370    else if (booster->GetBoostStage() != Types::kBoostProcEnd)
00371       return kFALSE;
00372 
00373    //finishing the pruning process, printing out everything
00374    if (booster->GetBoostStage() == Types::kBoostProcEnd)
00375       {
00376          if (fPruneMethod == DecisionTree::kNoPruning) {
00377             Log() << kINFO << "<Train> average number of nodes (w/o pruning) : "
00378                     <<  booster->GetMonitoringHist(0)->GetMean() << Endl;
00379          }
00380          else
00381             {
00382                Log() << kINFO << "<Train> average number of nodes before/after pruning : " 
00383                        << booster->GetMonitoringHist(0)->GetMean() << " / " 
00384                        << booster->GetMonitoringHist(1)->GetMean()
00385                        << Endl;
00386             }
00387       }
00388 
00389    return kTRUE;
00390 }
00391 
00392 
00393 //_______________________________________________________________________
00394 Double_t TMVA::MethodDT::PruneTree(const Int_t methodIndex)
00395 {
00396    if (fAutomatic && fPruneMethod == DecisionTree::kCostComplexityPruning) { // automatic cost complexity pruning
00397       CCPruner* pruneTool = new CCPruner(fTree, this->Data() , fSepType);
00398       pruneTool->Optimize();
00399       std::vector<DecisionTreeNode*> nodes = pruneTool->GetOptimalPruneSequence();
00400       fPruneStrength = pruneTool->GetOptimalPruneStrength();
00401       for(UInt_t i = 0; i < nodes.size(); i++) 
00402          fTree->PruneNode(nodes[i]);
00403       delete pruneTool;
00404    } 
00405    else if (fAutomatic &&  fPruneMethod != DecisionTree::kCostComplexityPruning){
00406       Int_t bla; 
00407       bla = methodIndex; //make the compiler quiet
00408       /*
00409       Double_t alpha = 0;
00410       Double_t delta = fDeltaPruneStrength;
00411       
00412       DecisionTree*  dcopy;
00413       vector<Double_t> q;
00414       multimap<Double_t,Double_t> quality;
00415       Int_t nnodes=fTree->GetNNodes();
00416 
00417       // find the maxiumum prune strength that still leaves some nodes 
00418       Bool_t forceStop = kFALSE;
00419       Int_t troubleCount=0, previousNnodes=nnodes;
00420 
00421 
00422       nnodes=fTree->GetNNodes();
00423       while (nnodes > 3 && !forceStop) {
00424          dcopy = new DecisionTree(*fTree);
00425          dcopy->SetPruneStrength(alpha+=delta);
00426          dcopy->PruneTree();
00427          q.push_back(TestTreeQuality(dcopy));
00428          quality.insert(pair<const Double_t,Double_t>(q.back(),alpha));
00429          nnodes=dcopy->GetNNodes();
00430          if (previousNnodes == nnodes) troubleCount++;
00431          else { 
00432             troubleCount=0; // reset counter
00433             if (nnodes < previousNnodes / 2 ) fDeltaPruneStrength /= 2.;
00434          }
00435          previousNnodes = nnodes;
00436          if (troubleCount > 20) {
00437             if (methodIndex == 0 && fPruneStrength <=0) {//maybe you need larger stepsize ??
00438                fDeltaPruneStrength *= 5;
00439                Log() << kINFO << "<PruneTree> trouble determining optimal prune strength"
00440                        << " for Tree " << methodIndex
00441                        << " --> first try to increase the step size"
00442                        << " currently Prunestrenght= " << alpha 
00443                        << " stepsize " << fDeltaPruneStrength << " " << Endl;
00444                troubleCount = 0;   // try again
00445                fPruneStrength = 1; // if it was for the first time.. 
00446             } else if (methodIndex == 0 && fPruneStrength <=2) {//maybe you need much larger stepsize ??
00447                fDeltaPruneStrength *= 5;
00448                Log() << kINFO << "<PruneTree> trouble determining optimal prune strength"
00449                        << " for Tree " << methodIndex
00450                        << " -->  try to increase the step size even more.. "
00451                        << " if that still didn't work, TRY IT BY HAND"  
00452                        << " currently Prunestrenght= " << alpha 
00453                        << " stepsize " << fDeltaPruneStrength << " " << Endl;
00454                troubleCount = 0;   // try again
00455                fPruneStrength = 3; // if it was for the first time.. 
00456             } else {
00457                forceStop=kTRUE;
00458                Log() << kINFO << "<PruneTree> trouble determining optimal prune strength"
00459                        << " for Tree " << methodIndex << " at tested prune strength: " << alpha << " --> abort forced, use same strength as for previous tree:"
00460                        << fPruneStrength << Endl;
00461             }
00462          }
00463          if (fgDebugLevel==1) Log() << kINFO << "Pruneed with ("<<alpha
00464                                       << ") give quality: " << q.back()
00465                                       << " and #nodes: " << nnodes  
00466                                       << Endl;
00467          delete dcopy;
00468       }
00469       if (!forceStop) {
00470          multimap<Double_t,Double_t>::reverse_iterator it=quality.rend();
00471          it++;
00472          fPruneStrength = it->second;
00473          // adjust the step size for the next tree.. think that 20 steps are sort of
00474          // fine enough.. could become a tunable option later..
00475          fDeltaPruneStrength *= Double_t(q.size())/20.;
00476       }
00477 
00478       fTree->SetPruneStrength(fPruneStrength);
00479       fTree->PruneTree();
00480       */
00481    } 
00482    else {
00483       fTree->SetPruneStrength(fPruneStrength);
00484       fTree->PruneTree();
00485    }
00486    return fPruneStrength;
00487 }
00488 
00489 //_______________________________________________________________________
00490 Double_t TMVA::MethodDT::TestTreeQuality( DecisionTree *dt )
00491 {
00492    Data()->SetCurrentType(Types::kValidation);
00493    // test the tree quality.. in terms of Miscalssification
00494    Double_t SumCorrect=0,SumWrong=0;
00495    for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++)
00496       {
00497          Event * ev = Data()->GetEvent(ievt);
00498          if ((dt->CheckEvent(*ev) > dt->GetNodePurityLimit() ) == DataInfo().IsSignal(ev)) SumCorrect+=ev->GetWeight();
00499          else SumWrong+=ev->GetWeight();
00500       }
00501    Data()->SetCurrentType(Types::kTraining);
00502    return  SumCorrect / (SumCorrect + SumWrong);
00503 }
00504 
00505 //_______________________________________________________________________
00506 void TMVA::MethodDT::AddWeightsXMLTo( void* parent ) const 
00507 {
00508    fTree->AddXMLTo(parent);
00509    //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
00510 }
00511 
00512 //_______________________________________________________________________
00513 void TMVA::MethodDT::ReadWeightsFromXML( void* wghtnode)
00514 {
00515    if(fTree)
00516       delete fTree;
00517    fTree = new DecisionTree();
00518    fTree->ReadXML(wghtnode,GetTrainingTMVAVersionCode());
00519 }
00520 
00521 //_______________________________________________________________________
00522 void  TMVA::MethodDT::ReadWeightsFromStream( istream& istr )
00523 {
00524    delete fTree;
00525    fTree = new DecisionTree();
00526    fTree->Read(istr);
00527 }
00528 
00529 //_______________________________________________________________________
00530 Double_t TMVA::MethodDT::GetMvaValue( Double_t* err, Double_t* errUpper )
00531 {
00532    // returns MVA value
00533 
00534    // cannot determine error
00535    NoErrorCalc(err, errUpper);
00536 
00537    return fTree->CheckEvent(*GetEvent(),fUseYesNoLeaf);
00538 }
00539 
00540 //_______________________________________________________________________
00541 void TMVA::MethodDT::GetHelpMessage() const
00542 {
00543 
00544 }
00545 //_______________________________________________________________________
00546 const TMVA::Ranking* TMVA::MethodDT::CreateRanking()
00547 {
00548    return 0;
00549 }

Generated on Tue Jul 5 15:25:02 2011 for ROOT_528-00b_version by  doxygen 1.5.1