MethodBDT.cxx

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodBDT.cxx 37986 2011-02-04 21:42:15Z pcanal $
00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodBDT (BDT = Boosted Decision Trees)                              *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Analysis of Boosted Decision Trees                                        *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
00015  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
00016  *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada         *
00017  *      Doug Schouten   <dschoute@sfu.ca>        - Simon Fraser U., Canada        *
00018  *      Jan Therhaag    <jan.therhaag@cern.ch>   - U. of Bonn, Germany            *
00019  *                                                                                *
00020  * Copyright (c) 2005:                                                            *
00021  *      CERN, Switzerland                                                         *
00022  *      U. of Victoria, Canada                                                    *
00023  *      MPI-K Heidelberg, Germany                                                 *
00024  *                                                                                *
00025  * Redistribution and use in source and binary forms, with or without             *
00026  * modification, are permitted according to the terms listed in LICENSE           *
00027  * (http://tmva.sourceforge.net/LICENSE)                                          *
00028  **********************************************************************************/
00029 
00030 //_______________________________________________________________________
00031 //
00032 // Analysis of Boosted Decision Trees
00033 //
00034 // Boosted decision trees have been successfully used in High Energy
00035 // Physics analysis for example by the MiniBooNE experiment
00036 // (Yang-Roe-Zhu, physics/0508045). In Boosted Decision Trees, the
00037 // selection is done on a majority vote on the result of several decision
00038 // trees, which are all derived from the same training sample by
00039 // supplying different event weights during the training.
00040 //
00041 // Decision trees:
00042 //
00043 // Successive decision nodes are used to categorize the
00044 // events out of the sample as either signal or background. Each node
00045 // uses only a single discriminating variable to decide if the event is
00046 // signal-like ("goes right") or background-like ("goes left"). This
00047 // forms a tree like structure with "baskets" at the end (leave nodes),
00048 // and an event is classified as either signal or background according to
00049 // whether the basket where it ends up has been classified signal or
00050 // background during the training. Training of a decision tree is the
00051 // process to define the "cut criteria" for each node. The training
00052 // starts with the root node. Here one takes the full training event
00053 // sample and selects the variable and corresponding cut value that gives
00054 // the best separation between signal and background at this stage. Using
00055 // this cut criterion, the sample is then divided into two subsamples, a
00056 // signal-like (right) and a background-like (left) sample. Two new nodes
00057 // are then created for each of the two sub-samples and they are
00058 // constructed using the same mechanism as described for the root
00059 // node. The devision is stopped once a certain node has reached either a
00060 // minimum number of events, or a minimum or maximum signal purity. These
00061 // leave nodes are then called "signal" or "background" if they contain
00062 // more signal respective background events from the training sample.
00063 //
00064 // Boosting:
00065 //
00066 // The idea behind adaptive boosting (AdaBoost) is, that signal events
00067 // from the training sample, that end up in a background node
00068 // (and vice versa) are given a larger weight than events that are in
00069 // the correct leave node. This results in a re-weighed training event
00070 // sample, with which then a new decision tree can be developed.
00071 // The boosting can be applied several times (typically 100-500 times)
00072 // and one ends up with a set of decision trees (a forest).
00073 // Gradient boosting works more like a function expansion approach, where
00074 // each tree corresponds to a summand. The parameters for each summand (tree)
00075 // are determined by the minimization of a error function (binomial log-
00076 // likelihood for classification and Huber loss for regression).
00077 // A greedy algorithm is used, which means, that only one tree is modified
00078 // at a time, while the other trees stay fixed.
00079 //
00080 // Bagging:
00081 //
00082 // In this particular variant of the Boosted Decision Trees the boosting
00083 // is not done on the basis of previous training results, but by a simple
00084 // stochastic re-sampling of the initial training event sample.
00085 //
00086 // Random Trees:
00087 // Similar to the "Random Forests" from Leo Breiman and Adele Cutler, it
00088 // uses the bagging algorithm together and bases the determination of the
00089 // best node-split during the training on a random subset of variables only
00090 // which is individually chosen for each split.
00091 //
00092 // Analysis:
00093 //
00094 // Applying an individual decision tree to a test event results in a
00095 // classification of the event as either signal or background. For the
00096 // boosted decision tree selection, an event is successively subjected to
00097 // the whole set of decision trees and depending on how often it is
00098 // classified as signal, a "likelihood" estimator is constructed for the
00099 // event being signal or background. The value of this estimator is the
00100 // one which is then used to select the events from an event sample, and
00101 // the cut value on this estimator defines the efficiency and purity of
00102 // the selection.
00103 //
00104 //_______________________________________________________________________
00105 
00106 #include <algorithm>
00107 
00108 #include <math.h>
00109 #include <fstream>
00110 
00111 #include "Riostream.h"
00112 #include "TRandom3.h"
00113 #include "TRandom3.h"
00114 #include "TMath.h"
00115 #include "TObjString.h"
00116 
00117 #include "TMVA/ClassifierFactory.h"
00118 #include "TMVA/MethodBDT.h"
00119 #include "TMVA/Tools.h"
00120 #include "TMVA/Timer.h"
00121 #include "TMVA/Ranking.h"
00122 #include "TMVA/SdivSqrtSplusB.h"
00123 #include "TMVA/BinarySearchTree.h"
00124 #include "TMVA/SeparationBase.h"
00125 #include "TMVA/GiniIndex.h"
00126 #include "TMVA/GiniIndexWithLaplace.h"
00127 #include "TMVA/CrossEntropy.h"
00128 #include "TMVA/MisClassificationError.h"
00129 #include "TMVA/Results.h"
00130 #include "TMVA/ResultsMulticlass.h"
00131 #include "TMVA/Interval.h"
00132 
00133 using std::vector;
00134 
00135 REGISTER_METHOD(BDT)
00136 
00137 ClassImp(TMVA::MethodBDT)
00138 
00139 const Int_t TMVA::MethodBDT::fgDebugLevel = 0;
00140 
00141 //_______________________________________________________________________
00142 TMVA::MethodBDT::MethodBDT( const TString& jobName,
00143                             const TString& methodTitle,
00144                             DataSetInfo& theData,
00145                             const TString& theOption,
00146                             TDirectory* theTargetDir ) :
00147    TMVA::MethodBase( jobName, Types::kBDT, methodTitle, theData, theOption, theTargetDir )
00148    , fNTrees(0)
00149    , fRenormByClass(0)        // don't use this initialisation, only here to make  Coverity happy. Is set in DeclarOptions()
00150    , fAdaBoostBeta(0)
00151    , fTransitionPoint(0)
00152    , fShrinkage(0)
00153    , fBaggedGradBoost(kFALSE)
00154    , fSampleFraction(0)
00155    , fSumOfWeights(0)
00156    , fNodeMinEvents(0)
00157    , fNCuts(0)
00158    , fUseFisherCuts(0)        // don't use this initialisation, only here to make  Coverity happy. Is set in DeclarOptions()
00159    , fMinLinCorrForFisher(.8) // don't use this initialisation, only here to make  Coverity happy. Is set in DeclarOptions()
00160    , fUseExclusiveVars(0)     // don't use this initialisation, only here to make  Coverity happy. Is set in DeclarOptions()
00161    , fUseYesNoLeaf(kFALSE)
00162    , fNodePurityLimit(0)
00163    , fUseWeightedTrees(kFALSE)
00164    , fNNodesMax(0)
00165    , fMaxDepth(0)
00166    , fPruneMethod(DecisionTree::kNoPruning)
00167    , fPruneStrength(0)
00168    , fPruneBeforeBoost(kFALSE)
00169    , fFValidationEvents(0)
00170    , fAutomatic(kFALSE)
00171    , fRandomisedTrees(kFALSE)
00172    , fUseNvars(0)
00173    , fUsePoissonNvars(0)  // don't use this initialisation, only here to make  Coverity happy. Is set in Init()
00174    , fUseNTrainEvents(0)
00175    , fSampleSizeFraction(0)
00176    , fNoNegWeightsInTraining(kFALSE)
00177    , fITree(0)
00178    , fBoostWeight(0)
00179    , fErrorFraction(0)
00180 {
00181    // the standard constructor for the "boosted decision trees"
00182    fMonitorNtuple = NULL;
00183    fSepType = NULL;
00184 }
00185 
00186 //_______________________________________________________________________
00187 TMVA::MethodBDT::MethodBDT( DataSetInfo& theData,
00188                             const TString& theWeightFile,
00189                             TDirectory* theTargetDir )
00190    : TMVA::MethodBase( Types::kBDT, theData, theWeightFile, theTargetDir )
00191    , fNTrees(0)
00192    , fRenormByClass(0)        // don't use this initialisation, only here to make  Coverity happy. Is set in DeclarOptions()
00193    , fAdaBoostBeta(0)
00194    , fTransitionPoint(0)
00195    , fShrinkage(0)
00196    , fBaggedGradBoost(kFALSE)
00197    , fSampleFraction(0)
00198    , fSumOfWeights(0)
00199    , fNodeMinEvents(0)
00200    , fNCuts(0)
00201    , fUseFisherCuts(0)        // don't use this initialisation, only here to make  Coverity happy. Is set in DeclarOptions()
00202    , fMinLinCorrForFisher(.8) // don't use this initialisation, only here to make  Coverity happy. Is set in DeclarOptions()
00203    , fUseExclusiveVars(0)     // don't use this initialisation, only here to make  Coverity happy. Is set in DeclarOptions()
00204    , fUseYesNoLeaf(kFALSE)
00205    , fNodePurityLimit(0)
00206    , fUseWeightedTrees(kFALSE)
00207    , fNNodesMax(0)
00208    , fMaxDepth(0)
00209    , fPruneMethod(DecisionTree::kNoPruning)
00210    , fPruneStrength(0)
00211    , fPruneBeforeBoost(kFALSE)
00212    , fFValidationEvents(0)
00213    , fAutomatic(kFALSE)
00214    , fRandomisedTrees(kFALSE)
00215    , fUseNvars(0)
00216    , fUsePoissonNvars(0)  // don't use this initialisation, only here to make  Coverity happy. Is set in Init()
00217    , fUseNTrainEvents(0)
00218    , fSampleSizeFraction(0)
00219    , fNoNegWeightsInTraining(kFALSE)
00220    , fITree(0)
00221    , fBoostWeight(0)
00222    , fErrorFraction(0)
00223 {
00224    fMonitorNtuple = NULL;
00225    fSepType = NULL;
00226    // constructor for calculating BDT-MVA using previously generated decision trees
00227    // the result of the previous training (the decision trees) are read in via the
00228    // weight file. Make sure the the variables correspond to the ones used in
00229    // creating the "weight"-file
00230 }
00231 
00232 //_______________________________________________________________________
00233 Bool_t TMVA::MethodBDT::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets )
00234 {
00235    // BDT can handle classification with multiple classes and regression with one regression-target
00236    if (type == Types::kClassification && numberClasses == 2) return kTRUE;
00237    if (type == Types::kMulticlass ) return kTRUE;
00238    if( type == Types::kRegression && numberTargets == 1 ) return kTRUE;
00239    return kFALSE;
00240 }
00241 
00242 //_______________________________________________________________________
00243 void TMVA::MethodBDT::DeclareOptions()
00244 {
00245    // define the options (their key words) that can be set in the option string
00246    // know options:
00247    // nTrees        number of trees in the forest to be created
00248    // BoostType     the boosting type for the trees in the forest (AdaBoost e.t.c..)
00249    //                  known: AdaBoost
00250    //                         AdaBoostR2 (Adaboost for regression)
00251    //                         Bagging
00252    //                         GradBoost
00253    // AdaBoostBeta     the boosting parameter, beta, for AdaBoost
00254    // UseRandomisedTrees  choose at each node splitting a random set of variables
00255    // UseNvars         use UseNvars variables in randomised trees
00256    // UsePoission Nvars use UseNvars not as fixed number but as mean of a possion distribution 
00257    // UseNTrainEvents  number of training events used in randomised (and bagged) trees
00258    // SeparationType   the separation criterion applied in the node splitting
00259    //                  known: GiniIndex
00260    //                         MisClassificationError
00261    //                         CrossEntropy
00262    //                         SDivSqrtSPlusB
00263    // nEventsMin:      the minimum number of events in a node (leaf criteria, stop splitting)
00264    // nCuts:           the number of steps in the optimisation of the cut for a node (if < 0, then
00265    //                  step size is determined by the events)
00266    // UseFisherCuts:   use multivariate splits using the Fisher criterium
00267    // UseYesNoLeaf     decide if the classification is done simply by the node type, or the S/B
00268    //                  (from the training) in the leaf node
00269    // NodePurityLimit  the minimum purity to classify a node as a signal node (used in pruning and boosting to determine
00270    //                  misclassification error rate)
00271    // UseWeightedTrees use average classification from the trees, or have the individual trees
00272    //                  trees in the forest weighted (e.g. log(boostweight) from AdaBoost
00273    // PruneMethod      The Pruning method:
00274    //                  known: NoPruning  // switch off pruning completely
00275    //                         ExpectedError
00276    //                         CostComplexity
00277    // PruneStrength    a parameter to adjust the amount of pruning. Should be large enough such that overtraining is avoided.
00278    // PruneBeforeBoost flag to prune the tree before applying boosting algorithm
00279    // PruningValFraction   number of events to use for optimizing pruning (only if PruneStrength < 0, i.e. automatic pruning)
00280    // IgnoreNegWeightsInTraining  Ignore negative weight events in the training.
00281    // NNodesMax        maximum number of nodes allwed in the tree splitting, then it stops.
00282    // MaxDepth         maximum depth of the decision tree allowed before further splitting is stopped
00283 
00284    DeclareOptionRef(fNTrees, "NTrees", "Number of trees in the forest");
00285    DeclareOptionRef(fRenormByClass=kFALSE,"RenormByClass","individually re-normalize each event class to the original size after boosting");
00286    DeclareOptionRef(fBoostType, "BoostType", "Boosting type for the trees in the forest");
00287    AddPreDefVal(TString("AdaBoost"));
00288    AddPreDefVal(TString("Bagging"));
00289    AddPreDefVal(TString("RegBoost"));
00290    AddPreDefVal(TString("AdaBoostR2"));
00291    AddPreDefVal(TString("Grad"));
00292    if (DoRegression()) {
00293       fBoostType = "AdaBoostR2";
00294    }else{
00295       fBoostType = "AdaBoost";
00296    }
00297    DeclareOptionRef(fAdaBoostR2Loss="Quadratic", "AdaBoostR2Loss", "Type of Loss function in AdaBoostR2t (Linear,Quadratic or Exponential)");
00298    AddPreDefVal(TString("Linear"));
00299    AddPreDefVal(TString("Quadratic"));
00300    AddPreDefVal(TString("Exponential"));
00301 
00302    DeclareOptionRef(fBaggedGradBoost=kFALSE, "UseBaggedGrad","Use only a random subsample of all events for growing the trees in each iteration. (Only valid for GradBoost)");
00303    DeclareOptionRef(fSampleFraction=0.6, "GradBaggingFraction","Defines the fraction of events to be used in each iteration when UseBaggedGrad=kTRUE. (Only valid for GradBoost)");
00304    DeclareOptionRef(fShrinkage=1.0, "Shrinkage", "Learning rate for GradBoost algorithm");
00305    DeclareOptionRef(fAdaBoostBeta=1.0, "AdaBoostBeta", "Parameter for AdaBoost algorithm");
00306    DeclareOptionRef(fRandomisedTrees,"UseRandomisedTrees","Choose at each node splitting a random set of variables");
00307    DeclareOptionRef(fUseNvars,"UseNvars","Number of variables used if randomised tree option is chosen");
00308    DeclareOptionRef(fUsePoissonNvars,"UsePoissonNvars", "use *UseNvars* not as fixed number but as mean of a possion distr. in each split");
00309    DeclareOptionRef(fUseNTrainEvents,"UseNTrainEvents","number of randomly picked training events used in randomised (and bagged) trees");
00310 
00311    DeclareOptionRef(fUseWeightedTrees=kTRUE, "UseWeightedTrees",
00312                     "Use weighted trees or simple average in classification from the forest");
00313    DeclareOptionRef(fUseYesNoLeaf=kTRUE, "UseYesNoLeaf",
00314                     "Use Sig or Bkg categories, or the purity=S/(S+B) as classification of the leaf node");
00315    if (DoRegression()) {
00316       fUseYesNoLeaf = kFALSE;
00317    }
00318 
00319 
00320    DeclareOptionRef(fNodePurityLimit=0.5, "NodePurityLimit", "In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
00321    DeclareOptionRef(fSepTypeS, "SeparationType", "Separation criterion for node splitting");
00322    AddPreDefVal(TString("CrossEntropy"));
00323    AddPreDefVal(TString("GiniIndex"));
00324    AddPreDefVal(TString("GiniIndexWithLaplace"));
00325    AddPreDefVal(TString("MisClassificationError"));
00326    AddPreDefVal(TString("SDivSqrtSPlusB"));
00327    AddPreDefVal(TString("RegressionVariance"));
00328    if (DoRegression()) {
00329       fSepTypeS = "RegressionVariance";
00330    }else{
00331       fSepTypeS = "GiniIndex";
00332    }
00333    DeclareOptionRef(fNodeMinEvents, "nEventsMin", "Minimum number of events required in a leaf node (default Classification: max(40, N_train/(Nvar^2)/10) ) Regression: 10");
00334    DeclareOptionRef(fNCuts, "nCuts", "Number of steps during node cut optimisation");
00335    DeclareOptionRef(fUseFisherCuts=kFALSE, "UseFisherCuts", "use multivariate splits using the Fisher criterium");
00336    DeclareOptionRef(fMinLinCorrForFisher=.8,"MinLinCorrForFisher", "the minimum linear correlation between two variables demanded for use in fisher criterium in node splitting");
00337    DeclareOptionRef(fUseExclusiveVars=kFALSE,"UseExclusiveVars","individual variables already used in fisher criterium are not anymore analysed individually for node splitting");
00338 
00339    DeclareOptionRef(fPruneStrength, "PruneStrength", "Pruning strength");
00340    DeclareOptionRef(fPruneMethodS, "PruneMethod", "Method used for pruning (removal) of statistically insignificant branches");
00341    AddPreDefVal(TString("NoPruning"));
00342    AddPreDefVal(TString("ExpectedError"));
00343    AddPreDefVal(TString("CostComplexity"));
00344    DeclareOptionRef(fPruneBeforeBoost=kFALSE, "PruneBeforeBoost", "Flag to prune the tree before applying boosting algorithm");
00345    DeclareOptionRef(fFValidationEvents=0.5, "PruningValFraction", "Fraction of events to use for optimizing automatic pruning.");
00346    DeclareOptionRef(fNNodesMax=100000,"NNodesMax","Max number of nodes in tree");
00347    if (DoRegression()) {
00348       DeclareOptionRef(fMaxDepth=50,"MaxDepth","Max depth of the decision tree allowed");
00349    }else{
00350       DeclareOptionRef(fMaxDepth=3,"MaxDepth","Max depth of the decision tree allowed");
00351    }
00352 }
00353 
00354 void TMVA::MethodBDT::DeclareCompatibilityOptions() {
00355    MethodBase::DeclareCompatibilityOptions();
00356    DeclareOptionRef(fSampleSizeFraction=1.0,"SampleSizeFraction","Relative size of bagged event sample to original size of the data sample" );
00357    DeclareOptionRef(fNoNegWeightsInTraining,"NoNegWeightsInTraining","Ignore negative event weights in the training process" );
00358 }
00359 
00360 
00361 
00362 
00363 //_______________________________________________________________________
00364 void TMVA::MethodBDT::ProcessOptions()
00365 {
00366    // the option string is decoded, for available options see "DeclareOptions"
00367 
00368    fSepTypeS.ToLower();
00369    if      (fSepTypeS == "misclassificationerror") fSepType = new MisClassificationError();
00370    else if (fSepTypeS == "giniindex")              fSepType = new GiniIndex();
00371    else if (fSepTypeS == "giniindexwithlaplace")   fSepType = new GiniIndexWithLaplace();
00372    else if (fSepTypeS == "crossentropy")           fSepType = new CrossEntropy();
00373    else if (fSepTypeS == "sdivsqrtsplusb")         fSepType = new SdivSqrtSplusB();
00374    else if (fSepTypeS == "regressionvariance")     fSepType = NULL;
00375    else {
00376       Log() << kINFO << GetOptions() << Endl;
00377       Log() << kFATAL << "<ProcessOptions> unknown Separation Index option " << fSepTypeS << " called" << Endl;
00378    }
00379 
00380    fPruneMethodS.ToLower();
00381    if      (fPruneMethodS == "expectederror")  fPruneMethod = DecisionTree::kExpectedErrorPruning;
00382    else if (fPruneMethodS == "costcomplexity") fPruneMethod = DecisionTree::kCostComplexityPruning;
00383    else if (fPruneMethodS == "nopruning")      fPruneMethod = DecisionTree::kNoPruning;
00384    else {
00385       Log() << kINFO << GetOptions() << Endl;
00386       Log() << kFATAL << "<ProcessOptions> unknown PruneMethod " << fPruneMethodS << " option called" << Endl;
00387    }
00388    if (fPruneStrength < 0 && (fPruneMethod != DecisionTree::kNoPruning) && fBoostType!="Grad") fAutomatic = kTRUE;
00389    else fAutomatic = kFALSE;
00390    if (fAutomatic && fPruneMethod==DecisionTree::kExpectedErrorPruning){
00391       Log() << kFATAL 
00392             <<  "Sorry autmoatic pruning strength determination is not implemented yet for ExpectedErrorPruning" << Endl;
00393    }
00394    fAdaBoostR2Loss.ToLower();
00395    
00396    if (fBoostType!="Grad") fBaggedGradBoost=kFALSE;
00397    else fPruneMethod = DecisionTree::kNoPruning;
00398    if (fFValidationEvents < 0.0) fFValidationEvents = 0.0;
00399    if (fAutomatic && fFValidationEvents > 0.5) {
00400       Log() << kWARNING << "You have chosen to use more than half of your training sample "
00401             << "to optimize the automatic pruning algorithm. This is probably wasteful "
00402             << "and your overall results will be degraded. Are you sure you want this?"
00403             << Endl;
00404    }
00405 
00406 
00407    if (this->Data()->HasNegativeEventWeights()){
00408       Log() << kINFO << " You are using a Monte Carlo that has also negative weights. "
00409             << "That should in principle be fine as long as on average you end up with "
00410             << "something positive. For this you have to make sure that the minimal number "
00411             << "of (un-weighted) events demanded for a tree node (currently you use: nEventsMin="
00412             <<fNodeMinEvents<<", you can set this via the BDT option string when booking the "
00413             << "classifier) is large enough to allow for reasonable averaging!!! "
00414             << " If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining  "
00415             << "which ignores events with negative weight in the training. " << Endl
00416             << Endl << "Note: You'll get a WARNING message during the training if that should ever happen" << Endl;
00417    }
00418 
00419    if (DoRegression()) {
00420       if (fUseYesNoLeaf && !IsConstructedFromWeightFile()){
00421          Log() << kWARNING << "Regression Trees do not work with fUseYesNoLeaf=TRUE --> I will set it to FALSE" << Endl;
00422          fUseYesNoLeaf = kFALSE;
00423       }
00424 
00425       if (fSepType != NULL){
00426          Log() << kWARNING << "Regression Trees do not work with Separation type other than <RegressionVariance> --> I will use it instead" << Endl;
00427          fSepType = NULL;
00428       }
00429    }
00430    if (fRandomisedTrees){
00431       Log() << kINFO << " Randomised trees use no pruning" << Endl;
00432       fPruneMethod = DecisionTree::kNoPruning;
00433       //      fBoostType   = "Bagging";
00434    }
00435 
00436    //    if (2*fNodeMinEvents >  Data()->GetNTrainingEvents()) {
00437    //       Log() << kFATAL << "you've demanded a minimun number of events in a leaf node " 
00438    //             << " that is larger than 1/2 the total number of events in the training sample."
00439    //             << " Hence I cannot make any split at all... this will not work!" << Endl;
00440    //    }
00441    
00442    if (fNTrees==0){
00443      Log() << kERROR << " Zero Decision Trees demanded... that does not work !! "
00444            << " I set it to 1 .. just so that the program does not crash"
00445            << Endl;
00446      fNTrees = 1;
00447    }
00448 
00449 }
00450 //_______________________________________________________________________
00451 void TMVA::MethodBDT::Init( void )
00452 {
00453    // common initialisation with defaults for the BDT-Method
00454       
00455    fNTrees         = 400;
00456    if (fAnalysisType == Types::kClassification || fAnalysisType == Types::kMulticlass ) {
00457       fMaxDepth        = 3;
00458       fBoostType      = "AdaBoost";
00459       if(DataInfo().GetNClasses()!=0) //workaround for multiclass application
00460          fNodeMinEvents  = TMath::Max( Int_t(40), Int_t( Data()->GetNTrainingEvents() / (10*GetNvar()*GetNvar())) );
00461    }else {
00462       fMaxDepth = 50;
00463       fBoostType      = "AdaBoostR2";
00464       fAdaBoostR2Loss = "Quadratic";
00465       if(DataInfo().GetNClasses()!=0) //workaround for multiclass application
00466          fNodeMinEvents  = 10;
00467    }
00468 
00469    fNCuts          = 20;
00470    fPruneMethodS   = "NoPruning";
00471    fPruneMethod    = DecisionTree::kNoPruning;
00472    fPruneStrength  = 0;
00473    fAutomatic      = kFALSE;
00474    fFValidationEvents = 0.5;
00475    fRandomisedTrees = kFALSE;
00476    //   fUseNvars        =  (GetNvar()>12) ? UInt_t(GetNvar()/8) : TMath::Max(UInt_t(2),UInt_t(GetNvar()/3));
00477    fUseNvars        =  UInt_t(TMath::Sqrt(GetNvar())+0.6);
00478    fUsePoissonNvars = kTRUE;
00479    if(DataInfo().GetNClasses()!=0) //workaround for multiclass application
00480       fUseNTrainEvents = Data()->GetNTrainingEvents();
00481    fNNodesMax       = 1000000;
00482    fShrinkage       = 1.0;
00483    fSumOfWeights    = 0.0;
00484 
00485    // reference cut value to distinguish signal-like from background-like events
00486    SetSignalReferenceCut( 0 );
00487 
00488 }
00489 
00490 
00491 //_______________________________________________________________________
00492 void TMVA::MethodBDT::Reset( void )
00493 {
00494    // reset the method, as if it had just been instantiated (forget all training etc.)
00495    
00496    // I keep the BDT EventSample and its Validation sample (eventuall they should all
00497    // disappear and just use the DataSet samples ..
00498    
00499    // remove all the trees 
00500    for (UInt_t i=0; i<fForest.size();           i++) delete fForest[i];
00501    fForest.clear();
00502 
00503    fBoostWeights.clear();
00504    if (fMonitorNtuple) fMonitorNtuple->Delete(); fMonitorNtuple=NULL;
00505    fVariableImportance.clear();
00506    fResiduals.clear();
00507    // now done in "InitEventSample" which is called in "Train"
00508    // reset all previously stored/accumulated BOOST weights in the event sample
00509    //for (UInt_t iev=0; iev<fEventSample.size(); iev++) fEventSample[iev]->SetBoostWeight(1.);
00510    if (Data()) Data()->DeleteResults(GetMethodName(), Types::kTraining, GetAnalysisType());
00511    Log() << kDEBUG << " successfully(?) resetted the method " << Endl;                                      
00512 }
00513 
00514 
00515 //_______________________________________________________________________
00516 TMVA::MethodBDT::~MethodBDT( void )
00517 {
00518    //destructor
00519    for (UInt_t i=0; i<fEventSample.size();      i++) delete fEventSample[i];
00520    for (UInt_t i=0; i<fValidationSample.size(); i++) delete fValidationSample[i];
00521    for (UInt_t i=0; i<fForest.size();           i++) delete fForest[i];
00522 }
00523 
00524 //_______________________________________________________________________
00525 void TMVA::MethodBDT::InitEventSample( void )
00526 {
00527    // Write all Events from the Tree into a vector of Events, that are
00528    // more easily manipulated. This method should never be called without
00529    // existing trainingTree, as it the vector of events from the ROOT training tree
00530    if (!HasTrainingTree()) Log() << kFATAL << "<Init> Data().TrainingTree() is zero pointer" << Endl;
00531 
00532    if (fEventSample.size() > 0) { // do not re-initialise the event sample, just set all boostweights to 1. as if it were untouched
00533       // reset all previously stored/accumulated BOOST weights in the event sample
00534       for (UInt_t iev=0; iev<fEventSample.size(); iev++) fEventSample[iev]->SetBoostWeight(1.);
00535    } else {
00536 
00537       UInt_t nevents = Data()->GetNTrainingEvents();
00538       Bool_t first=kTRUE;
00539       
00540       for (UInt_t ievt=0; ievt<nevents; ievt++) {
00541          
00542          Event* event = new Event( *GetTrainingEvent(ievt) );
00543          
00544          if (!IgnoreEventsWithNegWeightsInTraining() || event->GetWeight() > 0) {
00545             if (first && event->GetWeight() < 0) {
00546                first = kFALSE;
00547                Log() << kWARNING << "Events with negative event weights are USED during "
00548                      << "the BDT training. This might cause problems with small node sizes " 
00549                      << "or with the boosting. Please remove negative events from training "
00550                      << "using the option *IgnoreEventsWithNegWeightsInTraining* in case you "
00551                      << "observe problems with the boosting"
00552                      << Endl;
00553             }
00554             // if fAutomatic == true you need a validation sample to optimize pruning
00555             if (fAutomatic) {
00556                Double_t modulo = 1.0/(fFValidationEvents);
00557                Int_t   imodulo = static_cast<Int_t>( fmod(modulo,1.0) > 0.5 ? ceil(modulo) : floor(modulo) );
00558                if (ievt % imodulo == 0) fValidationSample.push_back( event );
00559                else                     fEventSample.push_back( event );
00560             }
00561             else {
00562                fEventSample.push_back(event);
00563             }
00564          } else {
00565             delete event;
00566          }
00567          if (fAutomatic) {
00568             Log() << kINFO << "<InitEventSample> Internally I use " << fEventSample.size()
00569                   << " for Training  and " << fValidationSample.size()
00570                   << " for Pruning Validation (" << ((Float_t)fValidationSample.size())/((Float_t)fEventSample.size()+fValidationSample.size())*100.0
00571                   << "% of training used for validation)" << Endl;
00572          }
00573       }
00574    }
00575 }
00576 
00577 //_______________________________________________________________________
00578 std::map<TString,Double_t>  TMVA::MethodBDT::OptimizeTuningParameters(TString fomType, TString fitType)
00579 {
00580    // call the Optimzier with the set of paremeters and ranges that
00581    // are meant to be tuned.
00582 
00583    // fill all the tuning parameters that should be optimized into a map:
00584    std::map<TString,TMVA::Interval> tuneParameters;
00585    std::map<TString,Double_t> tunedParameters;
00586 
00587    // note: the 3rd paraemter in the inteval is the "number of bins", NOT the stepsize !!
00588    //       the actual VALUES at (at least for the scan, guess also in GA) are always
00589    //       read from the middle of the bins. Hence.. the choice of Intervals e.g. for the
00590    //       MaxDepth, in order to make nice interger values!!!
00591 
00592    // find some reasonable ranges for the optimisation of NodeMinEvents:
00593    
00594    Int_t N  = Int_t( Data()->GetNEvtSigTrain()) ;            
00595    Int_t min  = TMath::Max( 20,    ( ( N/10000 - (N/10000)%10)  ) );
00596    Int_t max  = TMath::Max( min*10, TMath::Min( 10000, ( ( N/10    - (N/10)   %100) ) ) );
00597 
00598    tuneParameters.insert(std::pair<TString,Interval>("MaxDepth",       Interval(1,10,10)));    // stepsize 1
00599    tuneParameters.insert(std::pair<TString,Interval>("NodeMinEvents",  Interval(min,max,10))); // 
00600    tuneParameters.insert(std::pair<TString,Interval>("NTrees",         Interval(50,1000,20))); //  stepsize 50
00601    // tuneParameters.insert(std::pair<TString,Interval>("NodePurityLimit",Interval(.4,.6,3)));   // stepsize .1
00602    // tuneParameters.insert(std::pair<TString,Interval>("AdaBoostBeta",   Interval(.5,1.50,10)));   //== stepsize .1
00603 
00604    Log() << kINFO << "Automatic optimisation of tuning parameters in BDT uses:" << Endl;
00605    
00606    std::map<TString,TMVA::Interval>::iterator it;
00607    
00608    for (it=tuneParameters.begin(); it!=tuneParameters.end();it++) {
00609       Log() << kINFO << it->first 
00610             << " in range from: " << it->second.GetMin()
00611             << " to: " << it->second.GetMax()
00612             << " in : " << it->second.GetNbins()  << " steps"
00613             << Endl;
00614    }
00615    Log() << kINFO << " using the options: " << fomType << " and " << fitType << Endl;
00616    OptimizeConfigParameters optimize(this, tuneParameters, fomType, fitType);
00617    tunedParameters=optimize.optimize();
00618 
00619    return tunedParameters;
00620 
00621 }
00622 
00623 //_______________________________________________________________________
00624 void TMVA::MethodBDT::SetTuneParameters(std::map<TString,Double_t> tuneParameters)
00625 {
00626    // set the tuning parameters accoding to the argument
00627 
00628    std::map<TString,Double_t>::iterator it;
00629    for(it=tuneParameters.begin(); it!= tuneParameters.end(); it++){
00630       if (it->first ==  "MaxDepth"       ) SetMaxDepth        ((Int_t)it->second);
00631       if (it->first ==  "NodeMinEvents"  ) SetNodeMinEvents   ((Int_t)it->second);
00632       if (it->first ==  "NTrees"         ) SetNTrees          ((Int_t)it->second);
00633       if (it->first ==  "NodePurityLimit") SetNodePurityLimit (it->second);
00634       if (it->first ==  "AdaBoostBeta"   ) SetAdaBoostBeta    (it->second);
00635    }
00636    
00637 }
00638 
00639 //_______________________________________________________________________
00640 void TMVA::MethodBDT::Train()
00641 {
00642    // BDT training
00643    TMVA::DecisionTreeNode::fgIsTraining=true;
00644 
00645    // fill the STL Vector with the event sample
00646    // (needs to be done here and cannot be done in "init" as the options need to be 
00647    // known). 
00648    InitEventSample();
00649 
00650    if (fNTrees==0){
00651      Log() << kERROR << " Zero Decision Trees demanded... that does not work !! "
00652            << " I set it to 1 .. just so that the program does not crash"
00653            << Endl;
00654      fNTrees = 1;
00655    }
00656 
00657    // HHV (it's been here since looong but I really don't know why we cannot handle
00658    // normalized variables in BDTs...  todo
00659    if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with BDT; "
00660                              << "please remove the option from the configuration string, or "
00661                              << "use \"!Normalise\""
00662                              << Endl;
00663 
00664    Log() << kINFO << "Training "<< fNTrees << " Decision Trees ... patience please" << Endl;
00665 
00666    Log() << kDEBUG << "Training with maximal depth = " <<fMaxDepth 
00667          << ", NodeMinEvents=" << fNodeMinEvents
00668          << ", NTrees="<<fNTrees
00669          << ", NodePurityLimit="<<fNodePurityLimit
00670          << ", AdaBoostBeta="<<fAdaBoostBeta
00671          << Endl;
00672 
00673    // weights applied in boosting
00674    Int_t nBins;
00675    Double_t xMin,xMax;
00676    TString hname = "AdaBooost weight distribution";
00677 
00678    nBins= 100;
00679    xMin = 0;
00680    xMax = 30;
00681 
00682    if (DoRegression()) {
00683       nBins= 100;
00684       xMin = 0;
00685       xMax = 1;
00686       hname="Boost event weights distribution";
00687    }
00688 
00689    // book monitoring histograms (for AdaBost only)   
00690 
00691    TH1* h = new TH1F("BoostWeight",hname,nBins,xMin,xMax);
00692    TH1* nodesBeforePruningVsTree = new TH1I("NodesBeforePruning","nodes before pruning",fNTrees,0,fNTrees);
00693    TH1* nodesAfterPruningVsTree = new TH1I("NodesAfterPruning","nodes after pruning",fNTrees,0,fNTrees);
00694 
00695    if(!DoMulticlass()){
00696       Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
00697 
00698       h->SetXTitle("boost weight");
00699       results->Store(h, "BoostWeights");
00700       
00701       // weights applied in boosting vs tree number
00702       h = new TH1F("BoostWeightVsTree","Boost weights vs tree",fNTrees,0,fNTrees);
00703       h->SetXTitle("#tree");
00704       h->SetYTitle("boost weight");
00705       results->Store(h, "BoostWeightsVsTree");
00706       
00707       // error fraction vs tree number
00708       h = new TH1F("ErrFractHist","error fraction vs tree number",fNTrees,0,fNTrees);
00709       h->SetXTitle("#tree");
00710       h->SetYTitle("error fraction");
00711       results->Store(h, "ErrorFrac");
00712       
00713       // nNodesBeforePruning vs tree number
00714       nodesBeforePruningVsTree->SetXTitle("#tree");
00715       nodesBeforePruningVsTree->SetYTitle("#tree nodes");
00716       results->Store(nodesBeforePruningVsTree);
00717       
00718       // nNodesAfterPruning vs tree number
00719       nodesAfterPruningVsTree->SetXTitle("#tree");
00720       nodesAfterPruningVsTree->SetYTitle("#tree nodes");
00721       results->Store(nodesAfterPruningVsTree);
00722 
00723    }
00724    
00725    fMonitorNtuple= new TTree("MonitorNtuple","BDT variables");
00726    fMonitorNtuple->Branch("iTree",&fITree,"iTree/I");
00727    fMonitorNtuple->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
00728    fMonitorNtuple->Branch("errorFraction",&fErrorFraction,"errorFraction/D");
00729 
00730    Timer timer( fNTrees, GetName() );
00731    Int_t nNodesBeforePruningCount = 0;
00732    Int_t nNodesAfterPruningCount = 0;
00733 
00734    Int_t nNodesBeforePruning = 0;
00735    Int_t nNodesAfterPruning = 0;
00736 
00737    if(fBoostType=="Grad"){
00738       InitGradBoost(fEventSample);
00739    }
00740 
00741    for (int itree=0; itree<fNTrees; itree++) {
00742       timer.DrawProgressBar( itree );
00743       if(DoMulticlass()){
00744          if (fBoostType!="Grad"){
00745             Log() << kFATAL << "Multiclass is currently only supported by gradient boost. "
00746                   << "Please change boost option accordingly (GradBoost)."
00747                   << Endl;
00748          }
00749          UInt_t nClasses = DataInfo().GetNClasses();
00750          for (UInt_t i=0;i<nClasses;i++){
00751             fForest.push_back( new DecisionTree( fSepType, fNodeMinEvents, fNCuts, i,
00752                                                  fRandomisedTrees, fUseNvars, fUsePoissonNvars, fNNodesMax, fMaxDepth,
00753                                                  itree*nClasses+i, fNodePurityLimit, itree*nClasses+i));
00754             if (fUseFisherCuts) {
00755                fForest.back()->SetUseFisherCuts();
00756                fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher); 
00757                fForest.back()->SetUseExclusiveVars(fUseExclusiveVars); 
00758             }
00759             // the minimum linear correlation between two variables demanded for use in fisher criterium in node splitting
00760 
00761             if (fBaggedGradBoost){
00762                nNodesBeforePruning = fForest.back()->BuildTree(fSubSample);
00763                fBoostWeights.push_back(this->Boost(fSubSample, fForest.back(), itree, i));
00764 }
00765             else{
00766                nNodesBeforePruning = fForest.back()->BuildTree(fEventSample);  
00767                fBoostWeights.push_back(this->Boost(fEventSample, fForest.back(), itree, i));
00768             }
00769          }
00770       }
00771       else{
00772          
00773          fForest.push_back( new DecisionTree( fSepType, fNodeMinEvents, fNCuts, 0,
00774                                               fRandomisedTrees, fUseNvars, fUsePoissonNvars, fNNodesMax, fMaxDepth,
00775                                               itree, fNodePurityLimit, itree));
00776          if (fUseFisherCuts) {
00777             fForest.back()->SetUseFisherCuts();
00778             fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher); 
00779             fForest.back()->SetUseExclusiveVars(fUseExclusiveVars); 
00780          }
00781          if (fBaggedGradBoost) nNodesBeforePruning = fForest.back()->BuildTree(fSubSample);
00782          else                  nNodesBeforePruning = fForest.back()->BuildTree(fEventSample);
00783          
00784          if (fBoostType!="Grad")
00785             if (fUseYesNoLeaf && !DoRegression() ){ // remove leaf nodes where both daughter nodes are of same type
00786                nNodesBeforePruning = fForest.back()->CleanTree();
00787             }
00788          nNodesBeforePruningCount += nNodesBeforePruning;
00789          nodesBeforePruningVsTree->SetBinContent(itree+1,nNodesBeforePruning);
00790          
00791          fForest.back()->SetPruneMethod(fPruneMethod); // set the pruning method for the tree
00792          fForest.back()->SetPruneStrength(fPruneStrength); // set the strength parameter
00793          
00794          std::vector<Event*> * validationSample = NULL;
00795          if(fAutomatic) validationSample = &fValidationSample;
00796          
00797          if(fBoostType=="Grad"){
00798             if(fBaggedGradBoost)
00799                fBoostWeights.push_back(this->Boost(fSubSample, fForest.back(), itree));
00800             else
00801                fBoostWeights.push_back(this->Boost(fEventSample, fForest.back(), itree));
00802          }
00803          else {
00804             if(!fPruneBeforeBoost) { // only prune after boosting
00805                fBoostWeights.push_back( this->Boost(fEventSample, fForest.back(), itree) );
00806                // if fAutomatic == true, pruneStrength will be the optimal pruning strength
00807                // determined by the pruning algorithm; otherwise, it is simply the strength parameter
00808                // set by the user
00809                fForest.back()->PruneTree(validationSample);
00810             }
00811             else { // prune first, then apply a boosting cycle
00812                fForest.back()->PruneTree(validationSample);
00813                fBoostWeights.push_back( this->Boost(fEventSample, fForest.back(), itree) );
00814             }
00815             
00816             if (fUseYesNoLeaf && !DoRegression() ){ // remove leaf nodes where both daughter nodes are of same type
00817                fForest.back()->CleanTree();
00818             }
00819          }
00820          nNodesAfterPruning = fForest.back()->GetNNodes();
00821          nNodesAfterPruningCount += nNodesAfterPruning;
00822          nodesAfterPruningVsTree->SetBinContent(itree+1,nNodesAfterPruning);
00823          
00824          fITree = itree;
00825          fMonitorNtuple->Fill();
00826       }
00827    }
00828 
00829    // get elapsed time
00830    Log() << kINFO << "<Train> elapsed time: " << timer.GetElapsedTime()
00831          << "                              " << Endl;
00832    if (fPruneMethod == DecisionTree::kNoPruning) {
00833       Log() << kINFO << "<Train> average number of nodes (w/o pruning) : "
00834             << nNodesBeforePruningCount/fNTrees << Endl;
00835    }
00836    else {
00837       Log() << kINFO << "<Train> average number of nodes before/after pruning : "
00838             << nNodesBeforePruningCount/fNTrees << " / "
00839             << nNodesAfterPruningCount/fNTrees
00840             << Endl;
00841    }
00842    TMVA::DecisionTreeNode::fgIsTraining=false;
00843 }
00844 
00845 //_______________________________________________________________________
00846 void TMVA::MethodBDT::GetRandomSubSample()
00847 {
00848    // fills fEventSample with fSampleFraction*NEvents random training events
00849    UInt_t nevents = fEventSample.size();
00850    
00851    if (fSubSample.size()!=0) fSubSample.clear();
00852    TRandom3 *trandom   = new TRandom3(fForest.size()+1);
00853 
00854    for (UInt_t ievt=0; ievt<nevents; ievt++) { // recreate new random subsample
00855       if(trandom->Rndm()<fSampleFraction)
00856          fSubSample.push_back(fEventSample[ievt]);
00857    }
00858 }
00859 
00860 //_______________________________________________________________________
00861 Double_t TMVA::MethodBDT::GetGradBoostMVA(TMVA::Event& e, UInt_t nTrees)
00862 {
00863    //returns MVA value: -1 for background, 1 for signal
00864    Double_t sum=0;
00865    for (UInt_t itree=0; itree<nTrees; itree++) {
00866       //loop over all trees in forest
00867       sum += fForest[itree]->CheckEvent(e,kFALSE);
00868  
00869    }
00870    return 2.0/(1.0+exp(-2.0*sum))-1; //MVA output between -1 and 1
00871 }
00872 
00873 //_______________________________________________________________________
00874 void TMVA::MethodBDT::UpdateTargets(vector<TMVA::Event*> eventSample, UInt_t cls)
00875 {
00876    //Calculate residua for all events;
00877 
00878    if(DoMulticlass()){
00879       UInt_t nClasses = DataInfo().GetNClasses();
00880       for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
00881          fResiduals[*e].at(cls)+=fForest.back()->CheckEvent(*(*e),kFALSE);
00882          if(cls == nClasses-1){
00883             for(UInt_t i=0;i<nClasses;i++){
00884                Double_t norm = 0.0;
00885                for(UInt_t j=0;j<nClasses;j++){
00886                   if(i!=j)
00887                      norm+=exp(fResiduals[*e].at(j)-fResiduals[*e].at(i));
00888                }
00889                Double_t p_cls = 1.0/(1.0+norm);
00890                Double_t res = ((*e)->GetClass()==i)?(1.0-p_cls):(-p_cls);
00891                (*e)->SetTarget(i,res);
00892             }
00893          }
00894       }
00895    }
00896    else{
00897       for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
00898          fResiduals[*e].at(0)+=fForest.back()->CheckEvent(*(*e),kFALSE);
00899          Double_t p_sig=1.0/(1.0+exp(-2.0*fResiduals[*e].at(0)));
00900          Double_t res = (DataInfo().IsSignal(*e)?1:0)-p_sig;
00901          (*e)->SetTarget(0,res);
00902       }
00903    }   
00904 }
00905 
00906 //_______________________________________________________________________
00907 void TMVA::MethodBDT::UpdateTargetsRegression(vector<TMVA::Event*> eventSample, Bool_t first)
00908 {
00909    //Calculate current residuals for all events and update targets for next iteration
00910    for (vector<TMVA::Event*>::iterator e=fEventSample.begin(); e!=fEventSample.end();e++) {
00911       if(!first){
00912          fWeightedResiduals[*e].first -= fForest.back()->CheckEvent(*(*e),kFALSE);
00913       }
00914       
00915    }
00916    
00917    fSumOfWeights = 0;
00918    vector< pair<Double_t, Double_t> > temp;
00919    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++){
00920       temp.push_back(make_pair(fabs(fWeightedResiduals[*e].first),fWeightedResiduals[*e].second));
00921       fSumOfWeights += (*e)->GetWeight();
00922    }
00923    fTransitionPoint = GetWeightedQuantile(temp,0.7,fSumOfWeights);
00924 
00925    Int_t i=0;
00926    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
00927  
00928       if(temp[i].first<=fTransitionPoint)
00929          (*e)->SetTarget(0,fWeightedResiduals[*e].first);
00930       else
00931          (*e)->SetTarget(0,fTransitionPoint*(fWeightedResiduals[*e].first<0?-1.0:1.0));
00932       i++;
00933    }
00934 }
00935 
00936 //_______________________________________________________________________
00937 Double_t TMVA::MethodBDT::GetWeightedQuantile(vector<  pair<Double_t, Double_t> > vec, const Double_t quantile, const Double_t norm){
00938    //calculates the quantile of the distribution of the first pair entries weighted with the values in the second pair entries
00939    Double_t temp = 0.0;
00940    std::sort(vec.begin(), vec.end());
00941    Int_t i = 0;
00942    while(temp <= norm*quantile){
00943       temp += vec[i].second;
00944       i++;
00945    }
00946       
00947    return vec[i].first;
00948 }
00949 
00950 //_______________________________________________________________________
00951 Double_t TMVA::MethodBDT::GradBoost( vector<TMVA::Event*> eventSample, DecisionTree *dt, UInt_t cls)
00952 {
00953    //Calculate the desired response value for each region
00954    std::map<TMVA::DecisionTreeNode*,vector<Double_t> > leaves;
00955    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
00956       Double_t weight = (*e)->GetWeight();
00957       TMVA::DecisionTreeNode* node = dt->GetEventNode(*(*e));
00958       if ((leaves[node]).size()==0){
00959          (leaves[node]).push_back((*e)->GetTarget(cls)* weight);
00960          (leaves[node]).push_back(fabs((*e)->GetTarget(cls))*(1.0-fabs((*e)->GetTarget(cls))) * weight* weight);
00961       }
00962       else {
00963          (leaves[node])[0]+=((*e)->GetTarget(cls)* weight);
00964          (leaves[node])[1]+=fabs((*e)->GetTarget(cls))*(1.0-fabs((*e)->GetTarget(cls))) * weight* weight;
00965       }
00966    }
00967    for (std::map<TMVA::DecisionTreeNode*,vector<Double_t> >::iterator iLeave=leaves.begin();
00968         iLeave!=leaves.end();++iLeave){
00969       if ((iLeave->second)[1]<1e-30) (iLeave->second)[1]=1e-30;
00970 
00971       (iLeave->first)->SetResponse(fShrinkage/DataInfo().GetNClasses()*(iLeave->second)[0]/((iLeave->second)[1]));
00972    }
00973    
00974    //call UpdateTargets before next tree is grown
00975    if (fBaggedGradBoost){
00976       GetRandomSubSample();
00977    }
00978    DoMulticlass() ? UpdateTargets(fEventSample, cls) : UpdateTargets(fEventSample);
00979    return 1; //trees all have the same weight
00980 }
00981 
00982 //_______________________________________________________________________
00983 Double_t TMVA::MethodBDT::GradBoostRegression( vector<TMVA::Event*> eventSample, DecisionTree *dt )
00984 {
00985    // Implementation of M_TreeBoost using a Huber loss function as desribed by Friedman 1999
00986    std::map<TMVA::DecisionTreeNode*,Double_t > leaveWeights;
00987    std::map<TMVA::DecisionTreeNode*,vector< pair<Double_t, Double_t> > > leaves;
00988    UInt_t i =0;
00989    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
00990       TMVA::DecisionTreeNode* node = dt->GetEventNode(*(*e));      
00991       (leaves[node]).push_back(make_pair(fWeightedResiduals[*e].first,(*e)->GetWeight()));
00992       (leaveWeights[node]) += (*e)->GetWeight();
00993       i++;
00994    }
00995 
00996    for (std::map<TMVA::DecisionTreeNode*,vector< pair<Double_t, Double_t> > >::iterator iLeave=leaves.begin();
00997         iLeave!=leaves.end();++iLeave){
00998       Double_t shift=0,diff= 0;
00999       Double_t ResidualMedian = GetWeightedQuantile(iLeave->second,0.5,leaveWeights[iLeave->first]);
01000       for(UInt_t j=0;j<((iLeave->second).size());j++){
01001          diff = (iLeave->second)[j].first-ResidualMedian;
01002          shift+=1.0/((iLeave->second).size())*((diff<0)?-1.0:1.0)*TMath::Min(fTransitionPoint,fabs(diff));
01003       }
01004       (iLeave->first)->SetResponse(fShrinkage*(ResidualMedian+shift));          
01005    }
01006    
01007    if (fBaggedGradBoost){
01008       GetRandomSubSample();
01009       UpdateTargetsRegression(fSubSample);
01010    }
01011    else
01012       UpdateTargetsRegression(fEventSample);
01013    return 1;
01014 }
01015 
01016 //_______________________________________________________________________
01017 void TMVA::MethodBDT::InitGradBoost( vector<TMVA::Event*> eventSample)
01018 {
01019    // initialize targets for first tree
01020    fSumOfWeights = 0;
01021    fSepType=NULL; //set fSepType to NULL (regression trees are used for both classification an regression)
01022    std::vector<std::pair<Double_t, Double_t> > temp;
01023    if(DoRegression()){
01024       for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01025          fWeightedResiduals[*e]= make_pair((*e)->GetTarget(0), (*e)->GetWeight());
01026          fSumOfWeights+=(*e)->GetWeight();
01027          temp.push_back(make_pair(fWeightedResiduals[*e].first,fWeightedResiduals[*e].second));
01028       }
01029       Double_t weightedMedian = GetWeightedQuantile(temp,0.5, fSumOfWeights);
01030      
01031       //Store the weighted median as a first boosweight for later use
01032       fBoostWeights.push_back(weightedMedian);
01033       std::map<TMVA::Event*, std::pair<Double_t, Double_t> >::iterator res = fWeightedResiduals.begin();
01034       for (; res!=fWeightedResiduals.end(); ++res ) {
01035          //substract the gloabl median from all residuals
01036          (*res).second.first -= weightedMedian;  
01037       }
01038       if (fBaggedGradBoost){
01039          GetRandomSubSample();
01040          UpdateTargetsRegression(fSubSample,kTRUE);
01041       }
01042       else
01043          UpdateTargetsRegression(fEventSample,kTRUE);
01044       return;
01045    }
01046    else if(DoMulticlass()){
01047       UInt_t nClasses = DataInfo().GetNClasses();
01048       for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01049          for (UInt_t i=0;i<nClasses;i++){
01050             //Calculate initial residua, assuming equal probability for all classes
01051             Double_t r = (*e)->GetClass()==i?(1-1.0/nClasses):(-1.0/nClasses);
01052             (*e)->SetTarget(i,r);
01053             fResiduals[*e].push_back(0);   
01054          }
01055       }
01056    }
01057    else{
01058       for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01059          Double_t r = (DataInfo().IsSignal(*e)?1:0)-0.5; //Calculate initial residua
01060          (*e)->SetTarget(0,r);
01061          fResiduals[*e].push_back(0);         
01062       }
01063    }
01064    if (fBaggedGradBoost) GetRandomSubSample(); 
01065 }
01066 //_______________________________________________________________________
01067 Double_t TMVA::MethodBDT::TestTreeQuality( DecisionTree *dt )
01068 {
01069    // test the tree quality.. in terms of Miscalssification
01070 
01071    Double_t ncorrect=0, nfalse=0;
01072    for (UInt_t ievt=0; ievt<fValidationSample.size(); ievt++) {
01073       Bool_t isSignalType= (dt->CheckEvent(*(fValidationSample[ievt])) > fNodePurityLimit ) ? 1 : 0;
01074 
01075       if (isSignalType == (DataInfo().IsSignal(fValidationSample[ievt])) ) {
01076          ncorrect += fValidationSample[ievt]->GetWeight();
01077       }
01078       else{
01079          nfalse += fValidationSample[ievt]->GetWeight();
01080       }
01081    }
01082 
01083    return  ncorrect / (ncorrect + nfalse);
01084 }
01085 
01086 //_______________________________________________________________________
01087 Double_t TMVA::MethodBDT::Boost( vector<TMVA::Event*> eventSample, DecisionTree *dt, Int_t iTree, UInt_t cls )
01088 {
01089    // apply the boosting alogrithim (the algorithm is selecte via the the "option" given
01090    // in the constructor. The return value is the boosting weight
01091 
01092    if      (fBoostType=="AdaBoost")    return this->AdaBoost  (eventSample, dt);
01093    else if (fBoostType=="Bagging")     return this->Bagging   (eventSample, iTree);
01094    else if (fBoostType=="RegBoost")    return this->RegBoost  (eventSample, dt);
01095    else if (fBoostType=="AdaBoostR2")  return this->AdaBoostR2(eventSample, dt);
01096    else if (fBoostType=="Grad"){
01097       if(DoRegression())
01098          return this->GradBoostRegression(eventSample, dt);
01099       else if(DoMulticlass())
01100          return this->GradBoost (eventSample, dt, cls);
01101       else
01102          return this->GradBoost (eventSample, dt);
01103    }
01104    else {
01105       Log() << kINFO << GetOptions() << Endl;
01106       Log() << kFATAL << "<Boost> unknown boost option " << fBoostType<< " called" << Endl;
01107    }
01108 
01109    return -1;
01110 }
01111 
01112 //_______________________________________________________________________
01113 Double_t TMVA::MethodBDT::AdaBoost( vector<TMVA::Event*> eventSample, DecisionTree *dt )
01114 {
01115    // the AdaBoost implementation.
01116    // a new training sample is generated by weighting
01117    // events that are misclassified by the decision tree. The weight
01118    // applied is w = (1-err)/err or more general:
01119    //            w = ((1-err)/err)^beta
01120    // where err is the fraction of misclassified events in the tree ( <0.5 assuming
01121    // demanding the that previous selection was better than random guessing)
01122    // and "beta" being a free parameter (standard: beta = 1) that modifies the
01123    // boosting.
01124 
01125    Double_t err=0, sumGlobalw=0, sumGlobalwfalse=0, sumGlobalwfalse2=0;
01126 
01127    vector<Double_t> sumw; //for individually re-scaling  each class
01128 
01129    Double_t maxDev=0;
01130    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01131       Double_t w = (*e)->GetWeight();
01132       sumGlobalw += w;
01133       UInt_t iclass=(*e)->GetClass();
01134       if (iclass+1 > sumw.size()) sumw.resize(iclass+1,0);
01135       sumw[iclass] += w;
01136 
01137       if ( DoRegression() ) {
01138          Double_t tmpDev = TMath::Abs(dt->CheckEvent(*(*e),kFALSE) - (*e)->GetTarget(0) ); 
01139          sumGlobalwfalse += w * tmpDev;
01140          sumGlobalwfalse2 += w * tmpDev*tmpDev;
01141          if (tmpDev > maxDev) maxDev = tmpDev;
01142       }else{
01143          Bool_t isSignalType = (dt->CheckEvent(*(*e),fUseYesNoLeaf) > fNodePurityLimit );
01144 
01145          if (!(isSignalType == DataInfo().IsSignal(*e))) {
01146             sumGlobalwfalse+= w;
01147          }
01148       }
01149    }
01150    err = sumGlobalwfalse/sumGlobalw ;
01151    if ( DoRegression() ) {
01152       //if quadratic loss:
01153       if (fAdaBoostR2Loss=="linear"){
01154          err = sumGlobalwfalse/maxDev/sumGlobalw ;
01155       }
01156       else if (fAdaBoostR2Loss=="quadratic"){
01157          err = sumGlobalwfalse2/maxDev/maxDev/sumGlobalw ;
01158       }
01159       else if (fAdaBoostR2Loss=="exponential"){
01160          err = 0;
01161          for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01162             Double_t w = (*e)->GetWeight();
01163             Double_t  tmpDev = TMath::Abs(dt->CheckEvent(*(*e),kFALSE) - (*e)->GetTarget(0) ); 
01164             err += w * (1 - exp (-tmpDev/maxDev)) / sumGlobalw;
01165          }
01166          
01167       }
01168       else {
01169          Log() << kFATAL << " you've chosen a Loss type for Adaboost other than linear, quadratic or exponential " 
01170                << " namely " << fAdaBoostR2Loss << "\n" 
01171                << "and this is not implemented... a typo in the options ??" <<Endl;
01172       }
01173    }
01174    Double_t newSumGlobalw=0;
01175    vector<Double_t> newSumw(sumw.size(),0);
01176 
01177    Double_t boostWeight=1.;
01178    if (err >= 0.5) { // sanity check ... should never happen as otherwise there is apparently
01179       // something odd with the assignement of the leaf nodes (rem: you use the training
01180       // events for this determination of the error rate)
01181       if (dt->GetNNodes() == 1){
01182          Log() << kERROR << " YOUR tree has only 1 Node... kind of a funny *tree*. I cannot " 
01183                << "boost such a thing... if after 1 step the error rate is == 0.5"
01184                << Endl
01185                << "please check why this happens, maybe too many events per node requested ?"
01186                << Endl;
01187          
01188       }else{
01189          Log() << kERROR << " The error rate in the BDT boosting is > 0.5. ("<< err
01190                << ") That should not happen, please check your code (i.e... the BDT code), I "
01191                << " set it to 0.5.. just to continue.." <<  Endl;
01192       }
01193       err = 0.5;
01194    } else if (err < 0) {
01195       Log() << kERROR << " The error rate in the BDT boosting is < 0. That can happen"
01196             << " due to improper treatment of negative weights in a Monte Carlo.. (if you have"
01197             << " an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)"
01198             << " for the time being I set it to its absolute value.. just to continue.." <<  Endl;
01199       err = TMath::Abs(err);
01200    }
01201    if (fAdaBoostBeta == 1) {
01202       boostWeight = (1.-err)/err;
01203    }
01204    else {
01205       boostWeight =  TMath::Power((1.0 - err)/err, fAdaBoostBeta);
01206    }
01207 
01208    Results* results = Data()->GetResults(GetMethodName(),Types::kTraining, Types::kMaxAnalysisType);
01209 
01210    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01211  
01212       if ((!( (dt->CheckEvent(*(*e),fUseYesNoLeaf) > fNodePurityLimit ) == DataInfo().IsSignal(*e))) || DoRegression()) {
01213          Double_t boostfactor = boostWeight;
01214          if (DoRegression()) boostfactor = TMath::Power(1/boostWeight,(1.-TMath::Abs(dt->CheckEvent(*(*e),kFALSE) - (*e)->GetTarget(0) )/maxDev ) );
01215          if ( (*e)->GetWeight() > 0 ){
01216             (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
01217             // Helge change back            (*e)->ScaleBoostWeight(boostfactor);
01218             if (DoRegression()) results->GetHist("BoostWeights")->Fill(boostfactor);
01219          } else {
01220             (*e)->ScaleBoostWeight( 1. / boostfactor); // if the original event weight is negative, and you want to "increase" the events "positive" influence, you'd reather make the event weight "smaller" in terms of it's absolute value while still keeping it something "negative"
01221          }
01222       }
01223       newSumGlobalw+=(*e)->GetWeight();
01224       newSumw[(*e)->GetClass()] += (*e)->GetWeight();
01225    }
01226 
01227    // re-normalise the weights (independent for Signal and Background)
01228    Double_t globalNormWeight=sumGlobalw/newSumGlobalw;
01229    vector<Double_t>  normWeightByClass;
01230    for (UInt_t i=0; i<sumw.size(); i++) normWeightByClass.push_back(sumw[i]/newSumw[i]);
01231 
01232    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01233       if (fRenormByClass) (*e)->ScaleBoostWeight( normWeightByClass[(*e)->GetClass()] );
01234       else                (*e)->ScaleBoostWeight( globalNormWeight );
01235    }
01236 
01237    if (!(DoRegression()))results->GetHist("BoostWeights")->Fill(boostWeight);
01238    results->GetHist("BoostWeightsVsTree")->SetBinContent(fForest.size(),boostWeight);
01239    results->GetHist("ErrorFrac")->SetBinContent(fForest.size(),err);
01240 
01241    fBoostWeight = boostWeight;
01242    fErrorFraction = err;
01243 
01244    return TMath::Log(boostWeight);
01245 }
01246 
01247 //_______________________________________________________________________
01248 Double_t TMVA::MethodBDT::Bagging( vector<TMVA::Event*> eventSample, Int_t iTree )
01249 {
01250    // call it boot-strapping, re-sampling or whatever you like, in the end it is nothing
01251    // else but applying "random" weights to each event.
01252    Double_t newSumw=0;
01253    Double_t newWeight;
01254    TRandom3 *trandom   = new TRandom3(iTree);
01255    Double_t eventFraction = fUseNTrainEvents/Data()->GetNTrainingEvents();
01256    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01257       newWeight = trandom->PoissonD(eventFraction);
01258       (*e)->SetBoostWeight(newWeight);
01259       newSumw+=(*e)->GetBoostWeight();
01260    }
01261    Double_t normWeight =  eventSample.size() / newSumw ;
01262    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01263       (*e)->SetBoostWeight( (*e)->GetBoostWeight() * normWeight );
01264       // change this backwards      (*e)->ScaleBoostWeight( normWeight );
01265    }
01266    delete trandom;
01267    return 1.;  //here as there are random weights for each event, just return a constant==1;
01268 }
01269 
01270 //_______________________________________________________________________
01271 Double_t TMVA::MethodBDT::RegBoost( vector<TMVA::Event*> /* eventSample */, DecisionTree* /* dt */ )
01272 {
01273    // a special boosting only for Regression ...
01274    // maybe I'll implement it later...
01275 
01276    return 1;
01277 }
01278 
01279 //_______________________________________________________________________
01280 Double_t TMVA::MethodBDT::AdaBoostR2( vector<TMVA::Event*> eventSample, DecisionTree *dt )
01281 {
01282    // adaption of the AdaBoost to regression problems (see H.Drucker 1997)
01283 
01284    if ( !DoRegression() ) Log() << kFATAL << "Somehow you chose a regression boost method for a classification job" << Endl;
01285 
01286    Double_t err=0, sumw=0, sumwfalse=0, sumwfalse2=0;
01287    Double_t maxDev=0;
01288    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01289       Double_t w = (*e)->GetWeight();
01290       sumw += w;
01291 
01292       Double_t  tmpDev = TMath::Abs(dt->CheckEvent(*(*e),kFALSE) - (*e)->GetTarget(0) );
01293       sumwfalse  += w * tmpDev;
01294       sumwfalse2 += w * tmpDev*tmpDev;
01295       if (tmpDev > maxDev) maxDev = tmpDev;
01296    }
01297 
01298    //if quadratic loss:
01299    if (fAdaBoostR2Loss=="linear"){
01300       err = sumwfalse/maxDev/sumw ;
01301    }
01302    else if (fAdaBoostR2Loss=="quadratic"){
01303       err = sumwfalse2/maxDev/maxDev/sumw ;
01304    }
01305    else if (fAdaBoostR2Loss=="exponential"){
01306       err = 0;
01307       for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01308          Double_t w = (*e)->GetWeight();
01309          Double_t  tmpDev = TMath::Abs(dt->CheckEvent(*(*e),kFALSE) - (*e)->GetTarget(0) ); 
01310          err += w * (1 - exp (-tmpDev/maxDev)) / sumw;
01311       }
01312       
01313    }
01314    else {
01315       Log() << kFATAL << " you've chosen a Loss type for Adaboost other than linear, quadratic or exponential " 
01316             << " namely " << fAdaBoostR2Loss << "\n" 
01317             << "and this is not implemented... a typo in the options ??" <<Endl;
01318    }
01319 
01320 
01321    if (err >= 0.5) { // sanity check ... should never happen as otherwise there is apparently
01322       // something odd with the assignement of the leaf nodes (rem: you use the training
01323       // events for this determination of the error rate)
01324       if (dt->GetNNodes() == 1){
01325          Log() << kERROR << " YOUR tree has only 1 Node... kind of a funny *tree*. I cannot " 
01326                << "boost such a thing... if after 1 step the error rate is == 0.5"
01327                << Endl
01328                << "please check why this happens, maybe too many events per node requested ?"
01329                << Endl;
01330          
01331       }else{
01332          Log() << kERROR << " The error rate in the BDT boosting is > 0.5. ("<< err
01333                << ") That should not happen, but is possible for regression trees, and"
01334                << " should trigger a stop for the boosting. please check your code (i.e... the BDT code), I "
01335                << " set it to 0.5.. just to continue.." <<  Endl;
01336       }
01337       err = 0.5;
01338    } else if (err < 0) {
01339       Log() << kERROR << " The error rate in the BDT boosting is < 0. That can happen"
01340             << " due to improper treatment of negative weights in a Monte Carlo.. (if you have"
01341             << " an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)"
01342             << " for the time being I set it to its absolute value.. just to continue.." <<  Endl;
01343       err = TMath::Abs(err);
01344    }
01345 
01346    Double_t boostWeight = err / (1.-err);
01347    Double_t newSumw=0;
01348 
01349    Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, Types::kMaxAnalysisType);
01350 
01351    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01352       Double_t boostfactor =  TMath::Power(boostWeight,(1.-TMath::Abs(dt->CheckEvent(*(*e),kFALSE) - (*e)->GetTarget(0) )/maxDev ) );
01353       results->GetHist("BoostWeights")->Fill(boostfactor);
01354       //      cout << "R2  " << boostfactor << "   " << boostWeight << "   " << (1.-TMath::Abs(dt->CheckEvent(*(*e),kFALSE) - (*e)->GetTarget(0) )/maxDev)  << endl;
01355       if ( (*e)->GetWeight() > 0 ){
01356          Float_t newBoostWeight = (*e)->GetBoostWeight() * boostfactor;
01357          Float_t newWeight = (*e)->GetWeight() * (*e)->GetBoostWeight() * boostfactor;
01358          if (newWeight == 0) {
01359             Log() << kINFO << "Weight=    "   <<   (*e)->GetWeight() << Endl;
01360             Log() << kINFO  << "BoostWeight= " <<   (*e)->GetBoostWeight() << Endl;
01361             Log() << kINFO  << "boostweight="<<boostWeight << "  err= " <<err << Endl; 
01362             Log() << kINFO  << "NewBoostWeight= " <<   newBoostWeight << Endl;
01363             Log() << kINFO  << "boostfactor= " <<  boostfactor << Endl;
01364             Log() << kINFO  << "maxDev     = " <<  maxDev << Endl;
01365             Log() << kINFO  << "tmpDev     = " <<  TMath::Abs(dt->CheckEvent(*(*e),kFALSE) - (*e)->GetTarget(0) ) << Endl;
01366             Log() << kINFO  << "target     = " <<  (*e)->GetTarget(0)  << Endl; 
01367             Log() << kINFO  << "estimate   = " <<  dt->CheckEvent(*(*e),kFALSE)  << Endl;
01368          }
01369          (*e)->SetBoostWeight( newBoostWeight );
01370          //         (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
01371       } else {
01372          (*e)->SetBoostWeight( (*e)->GetBoostWeight() / boostfactor);
01373       }
01374       newSumw+=(*e)->GetWeight();
01375    }
01376 
01377    // re-normalise the weights
01378    Double_t normWeight =  sumw / newSumw;
01379    for (vector<TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
01380       //Helge    (*e)->ScaleBoostWeight( sumw/newSumw);
01381       // (*e)->ScaleBoostWeight( normWeight);
01382       (*e)->SetBoostWeight( (*e)->GetBoostWeight() * normWeight );
01383    }
01384 
01385 
01386    results->GetHist("BoostWeightsVsTree")->SetBinContent(fForest.size(),1./boostWeight);
01387    results->GetHist("ErrorFrac")->SetBinContent(fForest.size(),err);
01388 
01389    fBoostWeight = boostWeight;
01390    fErrorFraction = err;
01391 
01392    return TMath::Log(1./boostWeight);
01393 }
01394 
01395 //_______________________________________________________________________
01396 void TMVA::MethodBDT::AddWeightsXMLTo( void* parent ) const
01397 {
01398    // write weights to XML
01399    void* wght = gTools().AddChild(parent, "Weights");
01400    gTools().AddAttr( wght, "NTrees", fForest.size() );
01401    gTools().AddAttr( wght, "AnalysisType", fForest.back()->GetAnalysisType() );
01402 
01403    for (UInt_t i=0; i< fForest.size(); i++) {
01404       void* trxml = fForest[i]->AddXMLTo(wght);
01405       gTools().AddAttr( trxml, "boostWeight", fBoostWeights[i] );
01406       gTools().AddAttr( trxml, "itree", i );
01407    }
01408 }
01409 
01410 //_______________________________________________________________________
01411 void TMVA::MethodBDT::ReadWeightsFromXML(void* parent) {
01412    // reads the BDT from the xml file
01413 
01414    UInt_t i;
01415    for (i=0; i<fForest.size(); i++) delete fForest[i];
01416    fForest.clear();
01417    fBoostWeights.clear();
01418 
01419    UInt_t ntrees;
01420    UInt_t analysisType;
01421    Float_t boostWeight;
01422 
01423    gTools().ReadAttr( parent, "NTrees", ntrees );
01424    
01425    if(gTools().HasAttr(parent, "TreeType")) { // pre 4.1.0 version
01426       gTools().ReadAttr( parent, "TreeType", analysisType );
01427    } else {                                 // from 4.1.0 onwards
01428       gTools().ReadAttr( parent, "AnalysisType", analysisType );      
01429    }
01430 
01431    void* ch = gTools().GetChild(parent);
01432    i=0;
01433    while(ch) {
01434       fForest.push_back( dynamic_cast<DecisionTree*>( DecisionTree::CreateFromXML(ch, GetTrainingTMVAVersionCode()) ) );
01435       fForest.back()->SetAnalysisType(Types::EAnalysisType(analysisType));
01436       fForest.back()->SetTreeID(i++);
01437       gTools().ReadAttr(ch,"boostWeight",boostWeight);
01438       fBoostWeights.push_back(boostWeight);
01439       ch = gTools().GetNextChild(ch);
01440    }
01441 }
01442 
01443 //_______________________________________________________________________
01444 void  TMVA::MethodBDT::ReadWeightsFromStream( istream& istr )
01445 {
01446    // read the weights (BDT coefficients)
01447    TString dummy;
01448    //   Types::EAnalysisType analysisType;
01449    Int_t analysisType(0);
01450 
01451    // coverity[tainted_data_argument]
01452    istr >> dummy >> fNTrees;
01453    Log() << kINFO << "Read " << fNTrees << " Decision trees" << Endl;
01454 
01455    for (UInt_t i=0;i<fForest.size();i++) delete fForest[i];
01456    fForest.clear();
01457    fBoostWeights.clear();
01458    Int_t iTree;
01459    Double_t boostWeight;
01460    for (int i=0;i<fNTrees;i++) {
01461       istr >> dummy >> iTree >> dummy >> boostWeight;
01462       if (iTree != i) {
01463          fForest.back()->Print( cout );
01464          Log() << kFATAL << "Error while reading weight file; mismatch iTree="
01465                << iTree << " i=" << i
01466                << " dummy " << dummy
01467                << " boostweight " << boostWeight
01468                << Endl;
01469       }
01470       fForest.push_back( new DecisionTree() );
01471       fForest.back()->SetAnalysisType(Types::EAnalysisType(analysisType));
01472       fForest.back()->SetTreeID(i);
01473       fForest.back()->Read(istr, GetTrainingTMVAVersionCode());
01474       fBoostWeights.push_back(boostWeight);
01475    }
01476 }
01477 
01478 //_______________________________________________________________________
01479 Double_t TMVA::MethodBDT::GetMvaValue( Double_t* err, Double_t* errUpper ){
01480    return this->GetMvaValue( err, errUpper, 0 );
01481 }
01482 
01483 //_______________________________________________________________________
01484 Double_t TMVA::MethodBDT::GetMvaValue( Double_t* err, Double_t* errUpper, UInt_t useNTrees )
01485 {
01486    // Return the MVA value (range [-1;1]) that classifies the
01487    // event according to the majority vote from the total number of
01488    // decision trees.
01489 
01490    // cannot determine error
01491    NoErrorCalc(err, errUpper);
01492    
01493    // allow for the possibility to use less trees in the actual MVA calculation
01494    // than have been originally trained.
01495    UInt_t nTrees = fForest.size();
01496    if (useNTrees > 0 ) nTrees = useNTrees;
01497 
01498    if (fBoostType=="Grad") return GetGradBoostMVA(const_cast<TMVA::Event&>(*GetEvent()),nTrees);
01499    
01500    Double_t myMVA = 0;
01501    Double_t norm  = 0;
01502    for (UInt_t itree=0; itree<nTrees; itree++) {
01503       //
01504       if (fUseWeightedTrees) {
01505          myMVA += fBoostWeights[itree] * fForest[itree]->CheckEvent(*GetEvent(),fUseYesNoLeaf);
01506          norm  += fBoostWeights[itree];
01507       }
01508       else {
01509          myMVA += fForest[itree]->CheckEvent(*GetEvent(),fUseYesNoLeaf);
01510          norm  += 1;
01511       }
01512    }
01513    return ( norm > std::numeric_limits<double>::epsilon() ) ? myMVA /= norm : 0 ;
01514 }
01515 
01516 //_______________________________________________________________________
01517 const std::vector<Float_t>& TMVA::MethodBDT::GetMulticlassValues()
01518 {
01519    // get the multiclass MVA response for the BDT classifier
01520 
01521    const TMVA::Event& e = *GetEvent();
01522    if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
01523    fMulticlassReturnVal->clear();
01524 
01525    std::vector<double> temp;
01526 
01527    UInt_t nClasses = DataInfo().GetNClasses();
01528    for(UInt_t iClass=0; iClass<nClasses; iClass++){
01529       temp.push_back(0.0);
01530       for(UInt_t itree = iClass; itree<fForest.size(); itree+=nClasses){
01531          temp[iClass] += fForest[itree]->CheckEvent(e,kFALSE);
01532       }
01533    }    
01534 
01535    for(UInt_t iClass=0; iClass<nClasses; iClass++){
01536       Double_t norm = 0.0;
01537       for(UInt_t j=0;j<nClasses;j++){
01538          if(iClass!=j)
01539             norm+=exp(temp[j]-temp[iClass]);
01540       }
01541       (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
01542    }
01543 
01544    
01545    return *fMulticlassReturnVal;
01546 }
01547 
01548 
01549 
01550 
01551 //_______________________________________________________________________
01552 const std::vector<Float_t> & TMVA::MethodBDT::GetRegressionValues()
01553 {
01554    // get the regression value generated by the BDTs
01555 
01556 
01557    if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector<Float_t>();
01558    fRegressionReturnVal->clear();
01559 
01560    const Event * ev = GetEvent();
01561    Event * evT = new Event(*ev);
01562 
01563    Double_t myMVA = 0;
01564    Double_t norm  = 0;
01565    if (fBoostType=="AdaBoostR2") {
01566       // rather than using the weighted average of the tree respones in the forest
01567       // H.Decker(1997) proposed to use the "weighted median"
01568      
01569       // sort all individual tree responses according to the prediction value 
01570       //   (keep the association to their tree weight)
01571       // the sum up all the associated weights (starting from the one whose tree
01572       //   yielded the smalles response) up to the tree "t" at which you've
01573       //   added enough tree weights to have more than half of the sum of all tree weights.
01574       // choose as response of the forest that one which belongs to this "t"
01575 
01576       vector< Double_t > response(fForest.size());
01577       vector< Double_t > weight(fForest.size());
01578       Double_t           totalSumOfWeights = 0;
01579 
01580       for (UInt_t itree=0; itree<fForest.size(); itree++) {
01581          response[itree]    = fForest[itree]->CheckEvent(*ev,kFALSE);
01582          weight[itree]      = fBoostWeights[itree];
01583          totalSumOfWeights += fBoostWeights[itree];
01584       }
01585 
01586       vector< vector<Double_t> > vtemp;
01587       vtemp.push_back( response ); // this is the vector that will get sorted
01588       vtemp.push_back( weight ); 
01589       gTools().UsefulSortAscending( vtemp );
01590 
01591       Int_t t=0;
01592       Double_t sumOfWeights = 0;
01593       while (sumOfWeights <= totalSumOfWeights/2.) {
01594          sumOfWeights += vtemp[1][t];
01595          t++;
01596       }
01597 
01598       Double_t rVal=0;
01599       Int_t    count=0;
01600       for (UInt_t i= TMath::Max(UInt_t(0),UInt_t(t-(fForest.size()/6)-0.5)); 
01601            i< TMath::Min(UInt_t(fForest.size()),UInt_t(t+(fForest.size()/6)+0.5)); i++) {
01602          count++;
01603          rVal+=vtemp[0][i];
01604       }
01605 //      fRegressionReturnVal->push_back( rVal/Double_t(count));
01606       evT->SetTarget(0, rVal/Double_t(count) );
01607    }
01608    else if(fBoostType=="Grad"){
01609       for (UInt_t itree=0; itree<fForest.size(); itree++) {
01610          myMVA += fForest[itree]->CheckEvent(*ev,kFALSE);
01611       }
01612 //      fRegressionReturnVal->push_back( myMVA+fBoostWeights[0]);
01613       evT->SetTarget(0, myMVA+fBoostWeights[0] );
01614    }
01615    else{
01616       for (UInt_t itree=0; itree<fForest.size(); itree++) {
01617          //
01618          if (fUseWeightedTrees) {
01619             myMVA += fBoostWeights[itree] * fForest[itree]->CheckEvent(*ev,kFALSE);
01620             norm  += fBoostWeights[itree];
01621          }
01622          else {
01623             myMVA += fForest[itree]->CheckEvent(*ev,kFALSE);
01624             norm  += 1;
01625          }
01626       }
01627 //      fRegressionReturnVal->push_back( ( norm > std::numeric_limits<double>::epsilon() ) ? myMVA /= norm : 0 );
01628       evT->SetTarget(0, ( norm > std::numeric_limits<double>::epsilon() ) ? myMVA /= norm : 0 );
01629    }
01630 
01631 
01632 
01633    const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
01634    fRegressionReturnVal->push_back( evT2->GetTarget(0) );
01635 
01636    delete evT;
01637 
01638 
01639    return *fRegressionReturnVal;
01640 }
01641 
01642 //_______________________________________________________________________
01643 void  TMVA::MethodBDT::WriteMonitoringHistosToFile( void ) const
01644 {
01645    // Here we could write some histograms created during the processing
01646    // to the output file.
01647    Log() << kINFO << "Write monitoring histograms to file: " << BaseDir()->GetPath() << Endl;
01648 
01649    //Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, Types::kMaxAnalysisType);
01650    //results->GetStorage()->Write();
01651    fMonitorNtuple->Write();
01652 }
01653 
01654 //_______________________________________________________________________
01655 vector< Double_t > TMVA::MethodBDT::GetVariableImportance()
01656 {
01657    // Return the relative variable importance, normalized to all
01658    // variables together having the importance 1. The importance in
01659    // evaluated as the total separation-gain that this variable had in
01660    // the decision trees (weighted by the number of events)
01661 
01662    fVariableImportance.resize(GetNvar());
01663    Double_t  sum=0;
01664    for (int itree = 0; itree < fNTrees; itree++) {
01665       vector<Double_t> relativeImportance(fForest[itree]->GetVariableImportance());
01666       for (UInt_t i=0; i< relativeImportance.size(); i++) {
01667          fVariableImportance[i] += relativeImportance[i];
01668       }
01669    }
01670    for (UInt_t i=0; i< fVariableImportance.size(); i++) sum += fVariableImportance[i];
01671    for (UInt_t i=0; i< fVariableImportance.size(); i++) fVariableImportance[i] /= sum;
01672 
01673    return fVariableImportance;
01674 }
01675 
01676 //_______________________________________________________________________
01677 Double_t TMVA::MethodBDT::GetVariableImportance( UInt_t ivar )
01678 {
01679    // Returns the measure for the variable importance of variable "ivar"
01680    // which is later used in GetVariableImportance() to calculate the
01681    // relative variable importances.
01682 
01683    vector<Double_t> relativeImportance = this->GetVariableImportance();
01684    if (ivar < (UInt_t)relativeImportance.size()) return relativeImportance[ivar];
01685    else Log() << kFATAL << "<GetVariableImportance> ivar = " << ivar << " is out of range " << Endl;
01686 
01687    return -1;
01688 }
01689 
01690 //_______________________________________________________________________
01691 const TMVA::Ranking* TMVA::MethodBDT::CreateRanking()
01692 {
01693    // Compute ranking of input variables
01694 
01695    // create the ranking object
01696    fRanking = new Ranking( GetName(), "Variable Importance" );
01697    vector< Double_t> importance(this->GetVariableImportance());
01698 
01699    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
01700 
01701       fRanking->AddRank( Rank( GetInputLabel(ivar), importance[ivar] ) );
01702    }
01703 
01704    return fRanking;
01705 }
01706 
01707 //_______________________________________________________________________
01708 void TMVA::MethodBDT::GetHelpMessage() const
01709 {
01710    // Get help message text
01711    //
01712    // typical length of text line:
01713    //         "|--------------------------------------------------------------|"
01714    Log() << Endl;
01715    Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
01716    Log() << Endl;
01717    Log() << "Boosted Decision Trees are a collection of individual decision" << Endl;
01718    Log() << "trees which form a multivariate classifier by (weighted) majority " << Endl;
01719    Log() << "vote of the individual trees. Consecutive decision trees are  " << Endl;
01720    Log() << "trained using the original training data set with re-weighted " << Endl;
01721    Log() << "events. By default, the AdaBoost method is employed, which gives " << Endl;
01722    Log() << "events that were misclassified in the previous tree a larger " << Endl;
01723    Log() << "weight in the training of the following tree." << Endl;
01724    Log() << Endl;
01725    Log() << "Decision trees are a sequence of binary splits of the data sample" << Endl;
01726    Log() << "using a single descriminant variable at a time. A test event " << Endl;
01727    Log() << "ending up after the sequence of left-right splits in a final " << Endl;
01728    Log() << "(\"leaf\") node is classified as either signal or background" << Endl;
01729    Log() << "depending on the majority type of training events in that node." << Endl;
01730    Log() << Endl;
01731    Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
01732    Log() << Endl;
01733    Log() << "By the nature of the binary splits performed on the individual" << Endl;
01734    Log() << "variables, decision trees do not deal well with linear correlations" << Endl;
01735    Log() << "between variables (they need to approximate the linear split in" << Endl;
01736    Log() << "the two dimensional space by a sequence of splits on the two " << Endl;
01737    Log() << "variables individually). Hence decorrelation could be useful " << Endl;
01738    Log() << "to optimise the BDT performance." << Endl;
01739    Log() << Endl;
01740    Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
01741    Log() << Endl;
01742    Log() << "The two most important parameters in the configuration are the  " << Endl;
01743    Log() << "minimal number of events requested by a leaf node (option " << Endl;
01744    Log() << "\"nEventsMin\"). If this number is too large, detailed features " << Endl;
01745    Log() << "in the parameter space cannot be modelled. If it is too small, " << Endl;
01746    Log() << "the risk to overtrain rises." << Endl;
01747    Log() << "   (Imagine the decision tree is split until the leaf node contains" << Endl;
01748    Log() << "    only a single event. In such a case, no training event is  " << Endl;
01749    Log() << "    misclassified, while the situation will look very different" << Endl;
01750    Log() << "    for the test sample.)" << Endl;
01751    Log() << Endl;
01752    Log() << "The default minimal number is currently set to " << Endl;
01753    Log() << "   max(20, (N_training_events / N_variables^2 / 10)) " << Endl;
01754    Log() << "and can be changed by the user." << Endl;
01755    Log() << Endl;
01756    Log() << "The other crucial parameter, the pruning strength (\"PruneStrength\")," << Endl;
01757    Log() << "is also related to overtraining. It is a regularisation parameter " << Endl;
01758    Log() << "that is used when determining after the training which splits " << Endl;
01759    Log() << "are considered statistically insignificant and are removed. The" << Endl;
01760    Log() << "user is advised to carefully watch the BDT screen output for" << Endl;
01761    Log() << "the comparison between efficiencies obtained on the training and" << Endl;
01762    Log() << "the independent test sample. They should be equal within statistical" << Endl;
01763    Log() << "errors, in order to minimize statistical fluctuations in different samples." << Endl;
01764 }
01765 
01766 //_______________________________________________________________________
01767 void TMVA::MethodBDT::MakeClassSpecific( std::ostream& fout, const TString& className ) const
01768 {
01769    // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
01770 
01771    // write BDT-specific classifier response
01772    fout << "   std::vector<BDT_DecisionTreeNode*> fForest;       // i.e. root nodes of decision trees" << endl;
01773    fout << "   std::vector<double>                fBoostWeights; // the weights applied in the individual boosts" << endl;
01774    fout << "};" << endl << endl;
01775    fout << "double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << endl;
01776    fout << "{" << endl;
01777    fout << "   double myMVA = 0;" << endl;
01778    if (fBoostType!="Grad"){
01779       fout << "   double norm  = 0;" << endl;
01780    }
01781    fout << "   for (unsigned int itree=0; itree<fForest.size(); itree++){" << endl;
01782    fout << "      BDT_DecisionTreeNode *current = fForest[itree];" << endl;
01783    fout << "      while (current->GetNodeType() == 0) { //intermediate node" << endl;
01784    fout << "         if (current->GoesRight(inputValues)) current=(BDT_DecisionTreeNode*)current->GetRight();" << endl;
01785    fout << "         else current=(BDT_DecisionTreeNode*)current->GetLeft();" << endl;
01786    fout << "      }" << endl;
01787    if (fBoostType=="Grad"){
01788       fout << "      myMVA += current->GetResponse();" << endl;
01789    }
01790    else if (fUseWeightedTrees) {
01791       if (fUseYesNoLeaf) fout << "      myMVA += fBoostWeights[itree] *  current->GetNodeType();" << endl;
01792       else               fout << "      myMVA += fBoostWeights[itree] *  current->GetPurity();" << endl;
01793       fout << "      norm  += fBoostWeights[itree];" << endl;
01794    }
01795    else {
01796       if (fUseYesNoLeaf) fout << "      myMVA += current->GetNodeType();" << endl;
01797       else               fout << "      myMVA += current->GetPurity();" << endl;
01798       fout << "      norm  += 1.;" << endl;
01799    }
01800    fout << "   }" << endl;
01801    if (fBoostType=="Grad"){
01802       fout << "   return 2.0/(1.0+exp(-2.0*myMVA))-1.0;" << endl;
01803    }
01804    else fout << "   return myMVA /= norm;" << endl;
01805    fout << "};" << endl << endl;
01806    fout << "void " << className << "::Initialize()" << endl;
01807    fout << "{" << endl;
01808    //Now for each decision tree, write directly the constructors of the nodes in the tree structure
01809    for (int itree=0; itree<fNTrees; itree++) {
01810       fout << "  // itree = " << itree << endl;
01811       fout << "  fBoostWeights.push_back(" << fBoostWeights[itree] << ");" << endl;
01812       fout << "  fForest.push_back( " << endl;
01813       this->MakeClassInstantiateNode((DecisionTreeNode*)fForest[itree]->GetRoot(), fout, className);
01814       fout <<"   );" << endl;
01815    }
01816    fout << "   return;" << endl;
01817    fout << "};" << endl;
01818    fout << " " << endl;
01819    fout << "// Clean up" << endl;
01820    fout << "inline void " << className << "::Clear() " << endl;
01821    fout << "{" << endl;
01822    fout << "   for (unsigned int itree=0; itree<fForest.size(); itree++) { " << endl;
01823    fout << "      delete fForest[itree]; " << endl;
01824    fout << "   }" << endl;
01825    fout << "}" << endl;
01826 }
01827 
01828 //_______________________________________________________________________
01829 void TMVA::MethodBDT::MakeClassSpecificHeader(  std::ostream& fout, const TString& ) const
01830 {
01831    // specific class header
01832    fout << "#ifndef NN" << endl;
01833    fout << "#define NN new BDT_DecisionTreeNode" << endl;
01834    fout << "#endif" << endl;
01835    fout << "   " << endl;
01836    fout << "#ifndef BDT_DecisionTreeNode__def" << endl;
01837    fout << "#define BDT_DecisionTreeNode__def" << endl;
01838    fout << "   " << endl;
01839    fout << "class BDT_DecisionTreeNode {" << endl;
01840    fout << "   " << endl;
01841    fout << "public:" << endl;
01842    fout << "   " << endl;
01843    fout << "   // constructor of an essentially \"empty\" node floating in space" << endl;
01844    fout << "   BDT_DecisionTreeNode ( BDT_DecisionTreeNode* left," << endl;
01845    fout << "                          BDT_DecisionTreeNode* right," << endl;
01846    fout << "                          double cutValue, bool cutType, int selector," << endl;
01847    fout << "                          int nodeType, double purity, double response ) :" << endl;
01848    fout << "   fLeft    ( left     )," << endl;
01849    fout << "   fRight   ( right    )," << endl;
01850    fout << "   fCutValue( cutValue )," << endl;
01851    fout << "   fCutType ( cutType  )," << endl;
01852    fout << "   fSelector( selector )," << endl;
01853    fout << "   fNodeType( nodeType )," << endl;
01854    fout << "   fPurity  ( purity   )," << endl;
01855    fout << "   fResponse( response ){}" << endl << endl;
01856    fout << "   virtual ~BDT_DecisionTreeNode();" << endl << endl;
01857    fout << "   // test event if it decends the tree at this node to the right" << endl;
01858    fout << "   virtual bool GoesRight( const std::vector<double>& inputValues ) const;" << endl;
01859    fout << "   BDT_DecisionTreeNode* GetRight( void )  {return fRight; };" << endl << endl;
01860    fout << "   // test event if it decends the tree at this node to the left " << endl;
01861    fout << "   virtual bool GoesLeft ( const std::vector<double>& inputValues ) const;" << endl;
01862    fout << "   BDT_DecisionTreeNode* GetLeft( void ) { return fLeft; };   " << endl << endl;
01863    fout << "   // return  S/(S+B) (purity) at this node (from  training)" << endl << endl;
01864    fout << "   double GetPurity( void ) const { return fPurity; } " << endl;
01865    fout << "   // return the node type" << endl;
01866    fout << "   int    GetNodeType( void ) const { return fNodeType; }" << endl;
01867    fout << "   double GetResponse(void) const {return fResponse;}" << endl << endl;
01868    fout << "private:" << endl << endl;
01869    fout << "   BDT_DecisionTreeNode*   fLeft;     // pointer to the left daughter node" << endl;
01870    fout << "   BDT_DecisionTreeNode*   fRight;    // pointer to the right daughter node" << endl;
01871    fout << "   double                  fCutValue; // cut value appplied on this node to discriminate bkg against sig" << endl;
01872    fout << "   bool                    fCutType;  // true: if event variable > cutValue ==> signal , false otherwise" << endl;
01873    fout << "   int                     fSelector; // index of variable used in node selection (decision tree)   " << endl;
01874    fout << "   int                     fNodeType; // Type of node: -1 == Bkg-leaf, 1 == Signal-leaf, 0 = internal " << endl;
01875    fout << "   double                  fPurity;   // Purity of node from training"<< endl;
01876    fout << "   double                  fResponse; // Regression response value of node" << endl;
01877    fout << "}; " << endl;
01878    fout << "   " << endl;
01879    fout << "//_______________________________________________________________________" << endl;
01880    fout << "BDT_DecisionTreeNode::~BDT_DecisionTreeNode()" << endl;
01881    fout << "{" << endl;
01882    fout << "   if (fLeft  != NULL) delete fLeft;" << endl;
01883    fout << "   if (fRight != NULL) delete fRight;" << endl;
01884    fout << "}; " << endl;
01885    fout << "   " << endl;
01886    fout << "//_______________________________________________________________________" << endl;
01887    fout << "bool BDT_DecisionTreeNode::GoesRight( const std::vector<double>& inputValues ) const" << endl;
01888    fout << "{" << endl;
01889    fout << "   // test event if it decends the tree at this node to the right" << endl;
01890    fout << "   bool result = (inputValues[fSelector] > fCutValue );" << endl;
01891    fout << "   if (fCutType == true) return result; //the cuts are selecting Signal ;" << endl;
01892    fout << "   else return !result;" << endl;
01893    fout << "}" << endl;
01894    fout << "   " << endl;
01895    fout << "//_______________________________________________________________________" << endl;
01896    fout << "bool BDT_DecisionTreeNode::GoesLeft( const std::vector<double>& inputValues ) const" << endl;
01897    fout << "{" << endl;
01898    fout << "   // test event if it decends the tree at this node to the left" << endl;
01899    fout << "   if (!this->GoesRight(inputValues)) return true;" << endl;
01900    fout << "   else return false;" << endl;
01901    fout << "}" << endl;
01902    fout << "   " << endl;
01903    fout << "#endif" << endl;
01904    fout << "   " << endl;
01905 }
01906 
01907 //_______________________________________________________________________
01908 void TMVA::MethodBDT::MakeClassInstantiateNode( DecisionTreeNode *n, std::ostream& fout, const TString& className ) const
01909 {
01910    // recursively descends a tree and writes the node instance to the output streem
01911    if (n == NULL) {
01912       Log() << kFATAL << "MakeClassInstantiateNode: started with undefined node" <<Endl;
01913       return ;
01914    }
01915    fout << "NN("<<endl;
01916    if (n->GetLeft() != NULL){
01917       this->MakeClassInstantiateNode( (DecisionTreeNode*)n->GetLeft() , fout, className);
01918    }
01919    else {
01920       fout << "0";
01921    }
01922    fout << ", " <<endl;
01923    if (n->GetRight() != NULL){
01924       this->MakeClassInstantiateNode( (DecisionTreeNode*)n->GetRight(), fout, className );
01925    }
01926    else {
01927       fout << "0";
01928    }
01929    fout << ", " <<  endl
01930         << setprecision(6)
01931         << n->GetCutValue() << ", "
01932         << n->GetCutType() << ", "
01933         << n->GetSelector() << ", "
01934         << n->GetNodeType() << ", "
01935         << n->GetPurity() << ","
01936         << n->GetResponse() << ") ";
01937 
01938 }

Generated on Tue Jul 5 15:25:01 2011 for ROOT_528-00b_version by  doxygen 1.5.1