ROOT_528-00b_version: tmva/src/MethodMLP.cxx Source File

00001 // @(#)root/tmva $Id: MethodMLP.cxx 36966 2010-11-26 09:50:13Z evt $
00002 // Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Joerg Stelzer
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodMLP                                                             *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      ANN Multilayer Perceptron class for the discrimination of signal          *
00012  *      from background. BFGS implementation based on TMultiLayerPerceptron       *
00013  *      class from ROOT (http://root.cern.ch).                                    *
00014  *                                                                                *
00015  * Authors (alphabetical):                                                        *
00016  *      Krzysztof Danielowski <danielow@cern.ch>       - IFJ & AGH, Poland        *
00017  *      Andreas Hoecker       <Andreas.Hocker@cern.ch> - CERN, Switzerland        *
00018  *      Matt Jachowski        <jachowski@stanford.edu> - Stanford University, USA *
00019  *      Kamil Kraszewski      <kalq@cern.ch>           - IFJ & UJ, Poland         *
00020  *      Maciej Kruk           <mkruk@cern.ch>          - IFJ & AGH, Poland        *
00021  *      Peter Speckmayer      <peter.speckmayer@cern.ch> - CERN, Switzerland      *
00022  *      Joerg Stelzer         <stelzer@cern.ch>        - DESY, Germany            *
00023  *      Jiahang Zhong         <Jiahang.Zhong@cern.ch>  - Academia Sinica, Taipei  *
00024  *                                                                                *
00025  * Copyright (c) 2005:                                                            *
00026  *      CERN, Switzerland                                                         *
00027  *                                                                                *
00028  * Redistribution and use in source and binary forms, with or without             *
00029  * modification, are permitted according to the terms listed in LICENSE           *
00030  * (http://tmva.sourceforge.net/LICENSE)                                          *
00031  **********************************************************************************/
00032 
00033 //_______________________________________________________________________
00034 //
00035 // Multilayer Perceptron class built off of MethodANNBase
00036 //_______________________________________________________________________
00037 
00038 #include "TString.h"
00039 #include <vector>
00040 #include <cmath>
00041 #include "TTree.h"
00042 #include "Riostream.h"
00043 #include "TFitter.h"
00044 #include "TMatrixD.h"
00045 #include "TMath.h"
00046 #include "TFile.h"
00047 
00048 #include "TMVA/ClassifierFactory.h"
00049 #include "TMVA/Interval.h"
00050 #include "TMVA/MethodMLP.h"
00051 #include "TMVA/TNeuron.h"
00052 #include "TMVA/TSynapse.h"
00053 #include "TMVA/Timer.h"
00054 #include "TMVA/Types.h"
00055 #include "TMVA/Tools.h"
00056 #include "TMVA/GeneticFitter.h"
00057 #include "TMVA/Config.h"
00058 
00059 #ifdef MethodMLP_UseMinuit__
00060 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
00061 Bool_t MethodMLP_UseMinuit = kTRUE;
00062 #endif
00063 
00064 REGISTER_METHOD(MLP)
00065 
00066 ClassImp(TMVA::MethodMLP)
00067 
00068 using std::vector;
00069 
00070 //______________________________________________________________________________
00071 TMVA::MethodMLP::MethodMLP( const TString& jobName,
00072                             const TString& methodTitle,
00073                             DataSetInfo& theData,
00074                             const TString& theOption,
00075                             TDirectory* theTargetDir )
00076    : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption, theTargetDir ),
00077      fPrior(0.0),//zjh
00078      fSamplingFraction(1.0),
00079      fSamplingEpoch   (0.0)
00080 {
00081    // standard constructor
00082 }
00083 
00084 //______________________________________________________________________________
00085 TMVA::MethodMLP::MethodMLP( DataSetInfo& theData,
00086                             const TString& theWeightFile,
00087                             TDirectory* theTargetDir )
00088    : MethodANNBase( Types::kMLP, theData, theWeightFile, theTargetDir ),
00089      fPrior(0.0),//zjh
00090      fSamplingFraction(1.0),
00091      fSamplingEpoch(0.0)
00092 {
00093    // constructor from a weight file
00094 }
00095 
00096 //______________________________________________________________________________
00097 TMVA::MethodMLP::~MethodMLP()
00098 {
00099    // destructor
00100    // nothing to be done
00101 }
00102 
00103 //_______________________________________________________________________
00104 Bool_t TMVA::MethodMLP::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ )
00105 {
00106    // MLP can handle classification with 2 classes and regression with one regression-target
00107    if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
00108    if (type == Types::kMulticlass ) return kTRUE;
00109    if (type == Types::kRegression ) return kTRUE;
00110 
00111    return kFALSE;
00112 }
00113 
00114 //______________________________________________________________________________
00115 void TMVA::MethodMLP::Init()
00116 {
00117    // default initializations
00118 
00119    // the minimum requirement to declare an event signal-like
00120    SetSignalReferenceCut( 0.5 );
00121 #ifdef MethodMLP_UseMinuit__
00122    fgThis = this;
00123 #endif
00124 }
00125 
00126 //_______________________________________________________________________
00127 void TMVA::MethodMLP::DeclareOptions()
00128 {
00129    // define the options (their key words) that can be set in the option string
00130    // know options:
00131    // TrainingMethod  <string>     Training method
00132    //    available values are:         BP   Back-Propagation <default>
00133    //                                  GA   Genetic Algorithm (takes a LONG time)
00134    //
00135    // LearningRate    <float>      NN learning rate parameter
00136    // DecayRate       <float>      Decay rate for learning parameter
00137    // TestRate        <int>        Test for overtraining performed at each #th epochs
00138    //
00139    // BPMode          <string>     Back-propagation learning mode
00140    //    available values are:         sequential <default>
00141    //                                  batch
00142    //
00143    // BatchSize       <int>        Batch size: number of events/batch, only set if in Batch Mode,
00144    //                                          -1 for BatchSize=number_of_events
00145 
00146    DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
00147                     "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
00148    AddPreDefVal(TString("BP"));
00149    AddPreDefVal(TString("GA"));
00150    AddPreDefVal(TString("BFGS"));
00151 
00152    DeclareOptionRef(fLearnRate=0.02,    "LearningRate",    "ANN learning rate parameter");
00153    DeclareOptionRef(fDecayRate=0.01,    "DecayRate",       "Decay rate for learning parameter");
00154    DeclareOptionRef(fTestRate =10,      "TestRate",        "Test for overtraining performed at each #th epochs");
00155    DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
00156 
00157    DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
00158    DeclareOptionRef(fSamplingEpoch=1.0,    "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
00159    DeclareOptionRef(fSamplingWeight=1.0,    "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
00160 
00161    DeclareOptionRef(fSamplingTraining=kTRUE,    "SamplingTraining","The training sample is sampled");
00162    DeclareOptionRef(fSamplingTesting= kFALSE,    "SamplingTesting" ,"The testing sample is sampled");
00163 
00164    DeclareOptionRef(fResetStep=50,   "ResetStep",    "How often BFGS should reset history");
00165    DeclareOptionRef(fTau      =3.0,  "Tau",          "LineSearch \"size step\"");
00166 
00167    DeclareOptionRef(fBpModeS="sequential", "BPMode",
00168                     "Back-propagation learning mode: sequential or batch");
00169    AddPreDefVal(TString("sequential"));
00170    AddPreDefVal(TString("batch"));
00171 
00172    DeclareOptionRef(fBatchSize=-1, "BatchSize",
00173                     "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
00174 
00175    DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
00176                     "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
00177 
00178    DeclareOptionRef(fSteps=-1, "ConvergenceTests",
00179                     "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
00180 
00181    DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
00182                     "Use regulator to avoid over-training");   //zjh
00183    DeclareOptionRef(fUpdateLimit=10, "UpdateLimit",
00184                     "Number of updates for regulator before stop training");   //zjh
00185    DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
00186                     "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value");   //zjh
00187 }
00188 
00189 //_______________________________________________________________________
00190 void TMVA::MethodMLP::ProcessOptions()
00191 {
00192    // process user options
00193    MethodANNBase::ProcessOptions();
00194 
00195    if (IgnoreEventsWithNegWeightsInTraining()) {
00196       Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
00197             << GetMethodTypeName()
00198             << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
00199             << Endl;
00200    }
00201 
00202    if      (fTrainMethodS == "BP"  ) fTrainingMethod = kBP;
00203    else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
00204    else if (fTrainMethodS == "GA"  ) fTrainingMethod = kGA;
00205 
00206    if      (fBpModeS == "sequential") fBPMode = kSequential;
00207    else if (fBpModeS == "batch")      fBPMode = kBatch;
00208 
00209    //   InitializeLearningRates();
00210 
00211    if (fBPMode == kBatch) {
00212       Data()->SetCurrentType(Types::kTraining);
00213       Int_t numEvents = Data()->GetNEvents();
00214       if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
00215    }
00216 }
00217 
00218 //______________________________________________________________________________
00219 void TMVA::MethodMLP::InitializeLearningRates()
00220 {
00221    // initialize learning rates of synapses, used only by backpropagation
00222    Log() << kDEBUG << "Initialize learning rates" << Endl;
00223    TSynapse *synapse;
00224    Int_t numSynapses = fSynapses->GetEntriesFast();
00225    for (Int_t i = 0; i < numSynapses; i++) {
00226       synapse = (TSynapse*)fSynapses->At(i);
00227       synapse->SetLearningRate(fLearnRate);
00228    }
00229 }
00230 
00231 //______________________________________________________________________________
00232 Double_t TMVA::MethodMLP::CalculateEstimator( Types::ETreeType treeType, Int_t iEpoch )
00233 {
00234    // calculate the estimator that training is attempting to minimize
00235 
00236    // sanity check
00237    if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
00238       Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
00239    }
00240 
00241    Types::ETreeType saveType = Data()->GetCurrentType();
00242    Data()->SetCurrentType(treeType);
00243 
00244    // if epochs are counted create monitoring histograms (only available for classification)
00245    TString type  = (treeType == Types::kTraining ? "train" : "test");
00246    TString name  = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
00247    TString nameB = name + "_B";
00248    TString nameS = name + "_S";
00249    Int_t   nbin  = 100;
00250    Float_t limit = 2;
00251    TH1*    histS = 0;
00252    TH1*    histB = 0;
00253    if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
00254       histS = new TH1F( nameS, nameS, nbin, -limit, limit );
00255       histB = new TH1F( nameB, nameB, nbin, -limit, limit );
00256    }
00257 
00258    Double_t estimator = 0;
00259 
00260    // loop over all training events
00261    Int_t  nEvents  = GetNEvents();
00262    UInt_t nClasses = DataInfo().GetNClasses();
00263    UInt_t nTgts = DataInfo().GetNTargets();
00264    for (Int_t i = 0; i < nEvents; i++) {
00265 
00266       const Event* ev = GetEvent(i);
00267       Double_t     w  = ev->GetWeight();
00268 
00269       ForceNetworkInputs( ev );
00270       ForceNetworkCalculations();
00271 
00272       Double_t d = 0, v = 0;
00273       if (DoRegression()) {
00274          for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
00275             v = GetOutputNeuron( itgt )->GetActivationValue();
00276             Double_t targetValue = ev->GetTarget( itgt );
00277             Double_t dt = v - targetValue;
00278             d += (dt*dt);
00279          }
00280          estimator += d*w;
00281       } else if (DoMulticlass() ) {
00282          UInt_t cls = ev->GetClass();
00283          if (fEstimator==kCE){
00284             Double_t norm(0);
00285             for (UInt_t icls = 0; icls < nClasses; icls++) {
00286                norm += exp( GetOutputNeuron( icls )->GetActivationValue());
00287                if(icls==cls)
00288                   d = exp( GetOutputNeuron( icls )->GetActivationValue());
00289             }
00290             d = -TMath::Log(d/norm);
00291          }
00292          else{
00293             for (UInt_t icls = 0; icls < nClasses; icls++) {
00294                Double_t desired = (icls==cls) ? 1.0 : 0.0;
00295                v = GetOutputNeuron( icls )->GetActivationValue();
00296                d = (desired-v)*(desired-v);
00297             }
00298          }
00299          estimator += d*w; //zjh
00300       } else {
00301          Double_t desired =  DataInfo().IsSignal(ev)?1.:0.;
00302          v = GetOutputNeuron()->GetActivationValue();
00303          if (fEstimator==kMSE) d = (desired-v)*(desired-v);                         //zjh
00304          else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v));     //zjh
00305          estimator += d*w; //zjh
00306       }
00307 
00308       // fill monitoring histograms
00309       if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
00310       else if              (histB != 0) histB->Fill( float(v), float(w) );
00311    }
00312 
00313    if (histS != 0) fEpochMonHistS.push_back( histS );
00314    if (histB != 0) fEpochMonHistB.push_back( histB );
00315 
00316    //if      (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
00317    //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
00318    //else                     estimator = estimator*0.5/Float_t(nEvents);
00319    if      (DoRegression()) estimator = estimator/Float_t(nEvents);
00320    else if (DoMulticlass()) estimator = estimator/Float_t(nEvents);
00321    else                     estimator = estimator/Float_t(nEvents);
00322 
00323 
00324    //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents);  //zjh
00325 
00326    Data()->SetCurrentType( saveType );
00327 
00328    // provide epoch-wise monitoring
00329    if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
00330       CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
00331    }
00332 
00333    return estimator;
00334 }
00335 
00336 //______________________________________________________________________________
00337 void TMVA::MethodMLP::Train(Int_t nEpochs)
00338 {
00339    if (fNetwork == 0) {
00340       //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
00341       Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
00342       SetAnalysisType(GetAnalysisType());
00343    }
00344    Log() << kDEBUG << "reinitalize learning rates" << Endl;
00345    InitializeLearningRates();
00346    PrintMessage("Training Network");
00347 
00348    Int_t nEvents=GetNEvents();
00349    Int_t nSynapses=fSynapses->GetEntriesFast();
00350    if (nSynapses>nEvents) 
00351       Log()<<kFATAL<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
00352 
00353 #ifdef MethodMLP_UseMinuit__
00354    if (useMinuit) MinuitMinimize();
00355 #else
00356    if (fTrainingMethod == kGA)        GeneticMinimize();
00357    else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
00358    else                               BackPropagationMinimize(nEpochs);
00359 #endif
00360 
00361    float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample  //zjh
00362    float testE  = CalculateEstimator( Types::kTesting,  0 ) ; // estimator for test sample //zjh
00363    if (fUseRegulator){
00364       Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
00365       UpdateRegulators();
00366       Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
00367    }
00368 
00369    if( fCalculateErrors || fUseRegulator )
00370    {
00371       Int_t numSynapses=fSynapses->GetEntriesFast();
00372       fInvHessian.ResizeTo(numSynapses,numSynapses);
00373       GetApproxInvHessian( fInvHessian ,false);
00374    }
00375 }
00376 
00377 //______________________________________________________________________________
00378 void TMVA::MethodMLP::BFGSMinimize( Int_t nEpochs )
00379 {
00380    // train network with BFGS algorithm
00381 
00382    Timer timer( (fSteps>0?100:nEpochs), GetName() );
00383 
00384    // create histograms for overtraining monitoring
00385    Int_t nbinTest = Int_t(nEpochs/fTestRate);
00386    fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
00387                                    nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
00388    fEstimatorHistTest  = new TH1F( "estimatorHistTest", "test estimator",
00389                                    nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
00390 
00391    Int_t nSynapses = fSynapses->GetEntriesFast();
00392    Int_t nWeights  = nSynapses;
00393 
00394    for (Int_t i=0;i<nSynapses;i++) {
00395       TSynapse* synapse = (TSynapse*)fSynapses->At(i);
00396       synapse->SetDEDw(0.0);
00397    }
00398 
00399    std::vector<Double_t> buffer( nWeights );
00400    for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
00401 
00402    TMatrixD Dir     ( nWeights, 1 );
00403    TMatrixD Hessian ( nWeights, nWeights );
00404    TMatrixD Gamma   ( nWeights, 1 );
00405    TMatrixD Delta   ( nWeights, 1 );
00406    Int_t        RegUpdateCD=0;                  //zjh
00407    Int_t        RegUpdateTimes=0;               //zjh
00408    Double_t     AccuError=0;
00409 
00410    Double_t trainE = -1;
00411    Double_t testE  = -1;
00412 
00413    fLastAlpha = 0.;
00414 
00415    if(fSamplingTraining || fSamplingTesting)
00416       Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
00417 
00418    if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
00419    timer.DrawProgressBar( 0 );
00420 
00421    // start training cycles (epochs)
00422    for (Int_t i = 0; i < nEpochs; i++) {
00423       if (Float_t(i)/nEpochs < fSamplingEpoch) {
00424          if ((i+1)%fTestRate == 0 || (i == 0)) {
00425             if (fSamplingTraining) {
00426                Data()->SetCurrentType( Types::kTraining );
00427                Data()->InitSampling(fSamplingFraction,fSamplingWeight);
00428                Data()->CreateSampling();
00429             }
00430             if (fSamplingTesting) {
00431                Data()->SetCurrentType( Types::kTesting );
00432                Data()->InitSampling(fSamplingFraction,fSamplingWeight);
00433                Data()->CreateSampling();
00434             }
00435          }
00436       }
00437       else {
00438          Data()->SetCurrentType( Types::kTraining );
00439          Data()->InitSampling(1.0,1.0);
00440          Data()->SetCurrentType( Types::kTesting );
00441          Data()->InitSampling(1.0,1.0);
00442       }
00443       Data()->SetCurrentType( Types::kTraining );
00444 
00445       //zjh
00446       if (fUseRegulator) {
00447          UpdatePriors();
00448          RegUpdateCD++;
00449       }
00450       //zjh
00451 
00452       SetGammaDelta( Gamma, Delta, buffer );
00453 
00454       if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
00455          SteepestDir( Dir );
00456          Hessian.UnitMatrix();
00457          RegUpdateCD=0;    //zjh
00458       }
00459       else {
00460          if (GetHessian( Hessian, Gamma, Delta )) {
00461             SteepestDir( Dir );
00462             Hessian.UnitMatrix();
00463             RegUpdateCD=0;    //zjh
00464          }
00465          else SetDir( Hessian, Dir );
00466       }
00467 
00468       Double_t dError=0;  //zjh
00469       if (DerivDir( Dir ) > 0) {
00470          SteepestDir( Dir );
00471          Hessian.UnitMatrix();
00472          RegUpdateCD=0;    //zjh
00473       }
00474       if (LineSearch( Dir, buffer, &dError )) { //zjh
00475          Hessian.UnitMatrix();
00476          SteepestDir( Dir );
00477          RegUpdateCD=0;    //zjh
00478          if (LineSearch(Dir, buffer, &dError)) {  //zjh
00479             i = nEpochs;
00480             Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
00481          }
00482       }
00483 
00484       //zjh+
00485       if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
00486       AccuError+=dError;
00487       if (std::abs(dError)>0.0001) RegUpdateCD=0;
00488 
00489       if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=((0.4*fResetStep)>50?50:(0.4*fResetStep)) && i<0.8*nEpochs && AccuError>0.01 ) {
00490          Log()<<kDEBUG <<Endl;
00491          Log()<<kDEBUG<<"\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
00492          UpdateRegulators();
00493          Hessian.UnitMatrix();
00494          RegUpdateCD=0;
00495          RegUpdateTimes++;
00496          AccuError=0;
00497       }
00498       //zjh-
00499 
00500       // monitor convergence of training and control sample
00501       if ((i+1)%fTestRate == 0) {
00502          //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample  //zjh
00503          //testE  = CalculateEstimator( Types::kTesting,  i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
00504          trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample  //zjh
00505          testE  = CalculateEstimator( Types::kTesting,  i ) ; // estimator for test sample //zjh
00506          fEstimatorHistTrain->Fill( i+1, trainE );
00507          fEstimatorHistTest ->Fill( i+1, testE );
00508 
00509          Bool_t success = kFALSE;
00510          if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
00511             success = kTRUE;
00512          }
00513          Data()->EventResult( success );
00514 
00515          SetCurrentValue( testE );
00516          if (HasConverged()) {
00517             if (Float_t(i)/nEpochs < fSamplingEpoch) {
00518                Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
00519                i = newEpoch;
00520                ResetConvergenceCounter();
00521             }
00522             else break;
00523          }
00524       }
00525 
00526       // draw progress
00527       TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE ); //zjh
00528       if (fSteps > 0) {
00529          Float_t progress = 0;
00530          if (Float_t(i)/nEpochs < fSamplingEpoch)
00531             progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
00532          else
00533             progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
00534          Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
00535          if (progress2>progress) progress=progress2; //zjh
00536          timer.DrawProgressBar( Int_t(progress), convText );
00537       }
00538       else {
00539          Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
00540          if (progress<i) progress=i; //zjh
00541          timer.DrawProgressBar( progress, convText ); //zjh
00542       }
00543 
00544       // some verbose output
00545       if (fgPRINT_SEQ) {
00546          PrintNetwork();
00547          WaitForKeyboard();
00548       }
00549    }
00550 }
00551 
00552 //______________________________________________________________________________
00553 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
00554 {
00555    Int_t nWeights = fSynapses->GetEntriesFast();
00556 
00557    Int_t IDX = 0;
00558    Int_t nSynapses = fSynapses->GetEntriesFast();
00559    for (Int_t i=0;i<nSynapses;i++) {
00560       TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00561       Gamma[IDX++][0] = -synapse->GetDEDw();
00562    }
00563 
00564    for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
00565 
00566    ComputeDEDw();
00567 
00568    IDX = 0;
00569    for (Int_t i=0;i<nSynapses;i++)
00570       {
00571          TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00572          Gamma[IDX++][0] += synapse->GetDEDw();
00573       }
00574 }
00575 
00576 //______________________________________________________________________________
00577 void TMVA::MethodMLP::ComputeDEDw()
00578 {
00579    Int_t nSynapses = fSynapses->GetEntriesFast();
00580    for (Int_t i=0;i<nSynapses;i++) {
00581       TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00582       synapse->SetDEDw( 0.0 );
00583    }
00584 
00585    Int_t nEvents = GetNEvents();
00586    for (Int_t i=0;i<nEvents;i++) {
00587 
00588       const Event* ev = GetEvent(i);
00589 
00590       SimulateEvent( ev );
00591 
00592       for (Int_t j=0;j<nSynapses;j++) {
00593          TSynapse *synapse = (TSynapse*)fSynapses->At(j);
00594          synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
00595       }
00596    }
00597 
00598    for (Int_t i=0;i<nSynapses;i++) {
00599       TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00600       Double_t DEDw=synapse->GetDEDw();     //zjh
00601       if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
00602       synapse->SetDEDw( DEDw / nEvents );   //zjh
00603    }
00604 }
00605 
00606 //______________________________________________________________________________
00607 void TMVA::MethodMLP::SimulateEvent( const Event* ev )
00608 {
00609    Double_t eventWeight = ev->GetWeight();
00610 
00611    ForceNetworkInputs( ev );
00612    ForceNetworkCalculations();
00613 
00614    if (DoRegression()) {
00615       UInt_t ntgt = DataInfo().GetNTargets();
00616       for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
00617          Double_t desired     = ev->GetTarget(itgt);
00618          Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
00619          GetOutputNeuron( itgt )->SetError(error);
00620       }
00621    } else if (DoMulticlass()) {
00622       UInt_t nClasses = DataInfo().GetNClasses();
00623       UInt_t cls      = ev->GetClass();
00624       for (UInt_t icls = 0; icls < nClasses; icls++) {
00625          Double_t desired  = ( cls==icls ? 1.0 : 0.0 );
00626          Double_t error    = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
00627          GetOutputNeuron( icls )->SetError(error);
00628       }
00629    } else {
00630       Double_t desired     = GetDesiredOutput( ev );
00631       Double_t error=-1;                                //zjh
00632       if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight;       //zjh
00633       else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired);  //zjh
00634       GetOutputNeuron()->SetError(error);
00635    }
00636 
00637    CalculateNeuronDeltas();
00638    for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
00639       TSynapse *synapse = (TSynapse*)fSynapses->At(j);
00640       synapse->InitDelta();
00641       synapse->CalculateDelta();
00642    }
00643 }
00644 
00645 //______________________________________________________________________________
00646 void TMVA::MethodMLP::SteepestDir( TMatrixD &Dir )
00647 {
00648    Int_t IDX = 0;
00649    Int_t nSynapses = fSynapses->GetEntriesFast();
00650 
00651    for (Int_t i=0;i<nSynapses;i++) {
00652       TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00653       Dir[IDX++][0] = -synapse->GetDEDw();
00654    }
00655 }
00656 
00657 //______________________________________________________________________________
00658 Bool_t TMVA::MethodMLP::GetHessian( TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta )
00659 {
00660    TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
00661    if ((Double_t) gd[0][0] == 0.) return kTRUE;
00662    TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
00663    TMatrixD tmp(Gamma,   TMatrixD::kTransposeMult, Hessian);
00664    TMatrixD gHg(Gamma,   TMatrixD::kTransposeMult, aHg);
00665    Double_t a = 1 / (Double_t) gd[0][0];
00666    Double_t f = 1 + ((Double_t)gHg[0][0]*a);
00667    TMatrixD res(TMatrixD(Delta, TMatrixD::kMult, TMatrixD(TMatrixD::kTransposed,Delta)));
00668    res *= f;
00669    res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
00670                                                             TMatrixD(TMatrixD::kTransposed,Delta)));
00671    res *= a;
00672    Hessian += res;
00673 
00674    return kFALSE;
00675 }
00676 
00677 //______________________________________________________________________________
00678 void TMVA::MethodMLP::SetDir( TMatrixD &Hessian, TMatrixD &dir )
00679 {
00680    Int_t IDX = 0;
00681    Int_t nSynapses = fSynapses->GetEntriesFast();
00682    TMatrixD DEDw(nSynapses, 1);
00683 
00684    for (Int_t i=0;i<nSynapses;i++) {
00685       TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00686       DEDw[IDX++][0] = synapse->GetDEDw();
00687    }
00688 
00689    dir = Hessian * DEDw;
00690    for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
00691 }
00692 
00693 //______________________________________________________________________________
00694 Double_t TMVA::MethodMLP::DerivDir( TMatrixD &Dir )
00695 {
00696    Int_t IDX = 0;
00697    Int_t nSynapses = fSynapses->GetEntriesFast();
00698    Double_t Result = 0.0;
00699 
00700    for (Int_t i=0;i<nSynapses;i++) {
00701       TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00702       Result += Dir[IDX++][0] * synapse->GetDEDw();
00703    }
00704    return Result;
00705 }
00706 
00707 //______________________________________________________________________________
00708 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
00709 {
00710    Int_t IDX = 0;
00711    Int_t nSynapses = fSynapses->GetEntriesFast();
00712    Int_t nWeights = nSynapses;
00713 
00714    std::vector<Double_t> Origin(nWeights);
00715    for (Int_t i=0;i<nSynapses;i++) {
00716       TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00717       Origin[i] = synapse->GetWeight();
00718    }
00719 
00720    Double_t err1 = GetError();
00721    Double_t errOrigin=err1;     //zjh
00722    Double_t alpha1 = 0.;
00723    Double_t alpha2 = fLastAlpha;
00724 
00725 
00726    if      (alpha2 < 0.01) alpha2 = 0.01;
00727    else if (alpha2 > 2.0)  alpha2 = 2.0;
00728    Double_t alpha_original = alpha2;
00729    Double_t alpha3 = alpha2;
00730 
00731    SetDirWeights( Origin, Dir, alpha2 );
00732    Double_t err2 = GetError();
00733    //Double_t err2 = err1; 
00734    Double_t err3 = err2;
00735    Bool_t bingo = kFALSE;
00736 
00737 
00738    if (err1 > err2) {
00739       for (Int_t i=0;i<100;i++)  {
00740          alpha3 *= fTau;
00741          SetDirWeights(Origin, Dir, alpha3);
00742          err3 = GetError();
00743          if (err3 > err2) {
00744             bingo = kTRUE;
00745             break;
00746          }
00747          alpha1 = alpha2;
00748          err1 = err2;
00749          alpha2 = alpha3;
00750          err2 = err3;
00751       }
00752       if (!bingo) {
00753          SetDirWeights(Origin, Dir, 0.);
00754          return kTRUE;
00755       }
00756    }
00757    else {
00758       for (Int_t i=0;i<100;i++) {
00759          alpha2 /= fTau;
00760          if (i==50) {
00761             Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
00762             alpha2 = -alpha_original;
00763          }
00764          SetDirWeights(Origin, Dir, alpha2);
00765          err2 = GetError();
00766          if (err1 > err2) {
00767             bingo = kTRUE;
00768             break;
00769          }
00770          alpha3 = alpha2;
00771          err3 = err2;
00772       }
00773       if (!bingo) {
00774          SetDirWeights(Origin, Dir, 0.);
00775          Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
00776          fLastAlpha = 0.05;
00777          return kTRUE;
00778       }
00779    }
00780 
00781    if (alpha1>0 && alpha2>0 && alpha3 > 0) {
00782       fLastAlpha = 0.5 * (alpha1 + alpha3 -
00783                           (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
00784                                            - ( err2 - err1 ) / (alpha2 - alpha1 )));
00785    }
00786    else {
00787       fLastAlpha = alpha2;
00788    }
00789 
00790    fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
00791 
00792    SetDirWeights(Origin, Dir, fLastAlpha);
00793 
00794    // leaving these lines uncommented is a heavy price to pay for only a warning message
00795    // (which shoulnd't appear anyway)
00796    // --> about 15% of time is spent in the final GetError().
00797    //
00798    Double_t finalError = GetError();
00799    if (finalError > err1) {
00800       Log() << kWARNING << "Line search increased error! Something is wrong."
00801             << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
00802             << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
00803    }
00804 
00805    for (Int_t i=0;i<nSynapses;i++) {
00806       TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00807       buffer[IDX] = synapse->GetWeight() - Origin[IDX];
00808       IDX++;
00809    }
00810 
00811    if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
00812 
00813    return kFALSE;
00814 }
00815 
00816 //______________________________________________________________________________
00817 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
00818 {
00819    Int_t IDX = 0;
00820    Int_t nSynapses = fSynapses->GetEntriesFast();
00821 
00822    for (Int_t i=0;i<nSynapses;i++) {
00823       TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00824       synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
00825       IDX++;
00826    }
00827    if (fUseRegulator) UpdatePriors();   //zjh
00828 }
00829 
00830 
00831 //______________________________________________________________________________
00832 Double_t TMVA::MethodMLP::GetError()
00833 {
00834    Int_t nEvents = GetNEvents();
00835    UInt_t ntgts = GetNTargets();
00836    Double_t Result = 0.;
00837 
00838    for (Int_t i=0;i<nEvents;i++) {
00839       const Event* ev = GetEvent(i);
00840 
00841       SimulateEvent( ev );
00842 
00843       Double_t error = 0.;
00844       if (DoRegression()) {
00845          for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
00846             error += GetMSEErr( ev, itgt );     //zjh
00847          }
00848       } else if ( DoMulticlass() ){
00849          for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
00850             error += GetMSEErr( ev, icls );
00851          }
00852       } else {
00853          if (fEstimator==kMSE) error = GetMSEErr( ev );  //zjh
00854          else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
00855       }
00856       Result += error * ev->GetWeight();
00857    }
00858    if (fUseRegulator) Result+=fPrior;  //zjh
00859    if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
00860    return Result;
00861 }
00862 
00863 //______________________________________________________________________________
00864 Double_t TMVA::MethodMLP::GetMSEErr( const Event* ev, UInt_t index )
00865 {
00866    Double_t error = 0;
00867    Double_t output = GetOutputNeuron( index )->GetActivationValue();
00868    Double_t target = 0;
00869    if      (DoRegression()) target = ev->GetTarget( index );
00870    else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
00871    else                     target = GetDesiredOutput( ev );
00872 
00873    error = 0.5*(output-target)*(output-target); //zjh
00874 
00875    return error;
00876 
00877 }
00878 
00879 //______________________________________________________________________________
00880 Double_t TMVA::MethodMLP::GetCEErr( const Event* ev, UInt_t index )  //zjh
00881 {
00882    Double_t error = 0;
00883    Double_t output = GetOutputNeuron( index )->GetActivationValue();
00884    Double_t target = 0;
00885    if      (DoRegression()) target = ev->GetTarget( index );
00886    else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
00887    else                     target = GetDesiredOutput( ev );
00888 
00889    error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
00890 
00891    return error;
00892 }
00893 
00894 //______________________________________________________________________________
00895 void TMVA::MethodMLP::BackPropagationMinimize(Int_t nEpochs)
00896 {
00897    // minimize estimator / train network with backpropagation algorithm
00898 
00899    //    Timer timer( nEpochs, GetName() );
00900    Timer timer( (fSteps>0?100:nEpochs), GetName() );
00901    Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
00902 
00903    // create histograms for overtraining monitoring
00904    Int_t nbinTest = Int_t(nEpochs/fTestRate);
00905    fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
00906                                    nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
00907    fEstimatorHistTest  = new TH1F( "estimatorHistTest", "test estimator",
00908                                    nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
00909 
00910    if(fSamplingTraining || fSamplingTesting)
00911       Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
00912 
00913    if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
00914    timer.DrawProgressBar(0);
00915 
00916    // estimators
00917    Double_t trainE = -1;
00918    Double_t testE  = -1;
00919 
00920    // start training cycles (epochs)
00921    for (Int_t i = 0; i < nEpochs; i++) {
00922 
00923       if (Float_t(i)/nEpochs < fSamplingEpoch) {
00924          if ((i+1)%fTestRate == 0 || (i == 0)) {
00925             if (fSamplingTraining) {
00926                Data()->SetCurrentType( Types::kTraining );
00927                Data()->InitSampling(fSamplingFraction,fSamplingWeight);
00928                Data()->CreateSampling();
00929             }
00930             if (fSamplingTesting) {
00931                Data()->SetCurrentType( Types::kTesting );
00932                Data()->InitSampling(fSamplingFraction,fSamplingWeight);
00933                Data()->CreateSampling();
00934             }
00935          }
00936       }
00937       else {
00938          Data()->SetCurrentType( Types::kTraining );
00939          Data()->InitSampling(1.0,1.0);
00940          Data()->SetCurrentType( Types::kTesting );
00941          Data()->InitSampling(1.0,1.0);
00942       }
00943       Data()->SetCurrentType( Types::kTraining );
00944 
00945       TrainOneEpoch();
00946       DecaySynapseWeights(i >= lateEpoch);
00947 
00948       // monitor convergence of training and control sample
00949       if ((i+1)%fTestRate == 0) {
00950          trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
00951          testE  = CalculateEstimator( Types::kTesting,  i );  // estimator for test samplea
00952          fEstimatorHistTrain->Fill( i+1, trainE );
00953          fEstimatorHistTest ->Fill( i+1, testE );
00954 
00955          Bool_t success = kFALSE;
00956          if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
00957             success = kTRUE;
00958          }
00959          Data()->EventResult( success );
00960 
00961          SetCurrentValue( testE );
00962          if (HasConverged()) {
00963             if (Float_t(i)/nEpochs < fSamplingEpoch) {
00964                Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
00965                i = newEpoch;
00966                ResetConvergenceCounter();
00967             }
00968             else {
00969                if (lateEpoch > i) lateEpoch = i;
00970                else                break;
00971             }
00972          }
00973       }
00974 
00975       // draw progress bar (add convergence value)
00976       TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
00977       if (fSteps > 0) {
00978          Float_t progress = 0;
00979          if (Float_t(i)/nEpochs < fSamplingEpoch)
00980             progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
00981          else
00982             progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
00983 
00984          timer.DrawProgressBar( Int_t(progress), convText );
00985       }
00986       else {
00987         timer.DrawProgressBar( i, convText );
00988       }
00989    }
00990 }
00991 
00992 //______________________________________________________________________________
00993 void TMVA::MethodMLP::TrainOneEpoch()
00994 {
00995    // train network over a single epoch/cyle of events
00996 
00997    Int_t nEvents = Data()->GetNEvents();
00998 
00999    // randomize the order events will be presented, important for sequential mode
01000    Int_t* index = new Int_t[nEvents];
01001    for (Int_t i = 0; i < nEvents; i++) index[i] = i;
01002    Shuffle(index, nEvents);
01003 
01004    // loop over all training events
01005    for (Int_t i = 0; i < nEvents; i++) {
01006 
01007       TrainOneEvent(index[i]);
01008 
01009       // do adjustments if in batch mode
01010       if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
01011          AdjustSynapseWeights();
01012          if (fgPRINT_BATCH) {
01013             PrintNetwork();
01014             WaitForKeyboard();
01015          }
01016       }
01017 
01018       // debug in sequential mode
01019       if (fgPRINT_SEQ) {
01020          PrintNetwork();
01021          WaitForKeyboard();
01022       }
01023    }
01024 
01025    delete[] index;
01026 }
01027 
01028 //______________________________________________________________________________
01029 void TMVA::MethodMLP::Shuffle(Int_t* index, Int_t n)
01030 {
01031    // Input:
01032    //   index: the array to shuffle
01033    //   n: the size of the array
01034    // Output:
01035    //   index: the shuffled indexes
01036    // This method is used for sequential training
01037 
01038    Int_t j, k;
01039    Int_t a = n - 1;
01040    for (Int_t i = 0; i < n; i++) {
01041       j = (Int_t) (frgen->Rndm() * a);
01042       k = index[j];
01043       index[j] = index[i];
01044       index[i] = k;
01045    }
01046 }
01047 
01048 //______________________________________________________________________________
01049 void TMVA::MethodMLP::DecaySynapseWeights(Bool_t lateEpoch)
01050 {
01051    // decay synapse weights
01052    // in last 10 epochs, lower learning rate even more to find a good minimum
01053 
01054    TSynapse* synapse;
01055    Int_t numSynapses = fSynapses->GetEntriesFast();
01056    for (Int_t i = 0; i < numSynapses; i++) {
01057       synapse = (TSynapse*)fSynapses->At(i);
01058       if (lateEpoch) synapse->DecayLearningRate(fDecayRate*fDecayRate);
01059       else           synapse->DecayLearningRate(fDecayRate);
01060    }
01061 }
01062 
01063 //______________________________________________________________________________
01064 void TMVA::MethodMLP::TrainOneEventFast(Int_t ievt, Float_t*& branchVar, Int_t& type)
01065 {
01066    // fast per-event training
01067 
01068    GetEvent(ievt);
01069 
01070    // as soon as we know how to get event weights, get that here
01071    
01072    // note: the normalization of event weights will affect the choice
01073    // of learning rate, one will have to experiment to get the right value.
01074    // in general, if the "average" event weight is 1, the learning rate
01075    // should be good if set around 0.02 (a good value if all event weights are 1)
01076    Double_t eventWeight = 1.0;
01077    
01078    // get the desired output of this event
01079    Double_t desired;
01080    if (type == 0) desired = fOutput->GetMin();  // background //zjh
01081    else           desired = fOutput->GetMax();  // signal     //zjh
01082 
01083    // force the value for each input neuron
01084    Double_t x;
01085    TNeuron* neuron;
01086    
01087    for (UInt_t j = 0; j < GetNvar(); j++) {
01088       x = branchVar[j];
01089       if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
01090       neuron = GetInputNeuron(j);
01091       neuron->ForceValue(x);
01092    }
01093 
01094    ForceNetworkCalculations();
01095    UpdateNetwork(desired, eventWeight);
01096 }
01097 
01098 //______________________________________________________________________________
01099 void TMVA::MethodMLP::TrainOneEvent(Int_t ievt)
01100 {
01101    // train network over a single event
01102    // this uses the new event model
01103 
01104    // note: the normalization of event weights will affect the choice
01105    // of learning rate, one will have to experiment to get the right value.
01106    // in general, if the "average" event weight is 1, the learning rate
01107    // should be good if set around 0.02 (a good value if all event weights are 1)
01108 
01109    const Event * ev = GetEvent(ievt);
01110    Double_t eventWeight = ev->GetWeight();
01111    ForceNetworkInputs( ev );
01112    ForceNetworkCalculations();
01113    if (DoRegression()) UpdateNetwork( ev->GetTargets(),       eventWeight );
01114    if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
01115    else                UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
01116 }
01117 
01118 //______________________________________________________________________________
01119 Double_t TMVA::MethodMLP::GetDesiredOutput( const Event* ev )
01120 {
01121    // get the desired output of this event
01122    return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
01123 }
01124 
01125 
01126 //______________________________________________________________________________
01127 void TMVA::MethodMLP::UpdateNetwork(Double_t desired, Double_t eventWeight)
01128 {
01129    // update the network based on how closely
01130    // the output matched the desired output
01131    Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
01132    if (fEstimator==kMSE)  error = GetOutputNeuron()->GetActivationValue() - desired ;  //zjh
01133    else if (fEstimator==kCE)  error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
01134    else  Log() << kFATAL << "Estimator type unspecified!!" << Endl;              //zjh
01135    error *= eventWeight;
01136    GetOutputNeuron()->SetError(error);
01137    CalculateNeuronDeltas();
01138    UpdateSynapses();
01139 }
01140 
01141 //______________________________________________________________________________
01142 void TMVA::MethodMLP::UpdateNetwork(std::vector<Float_t>& desired, Double_t eventWeight)
01143 {
01144    // update the network based on how closely
01145    // the output matched the desired output
01146    for (UInt_t i = 0; i < desired.size(); i++) {
01147       Double_t error = GetOutputNeuron( i )->GetActivationValue() - desired.at(i);
01148       error *= eventWeight;
01149       GetOutputNeuron( i )->SetError(error);
01150    }
01151    CalculateNeuronDeltas();
01152    UpdateSynapses();
01153 }
01154 
01155 
01156 //______________________________________________________________________________
01157 void TMVA::MethodMLP::CalculateNeuronDeltas()
01158 {
01159    // have each neuron calculate its delta by backpropagation
01160 
01161    TNeuron* neuron;
01162    Int_t    numNeurons;
01163    Int_t    numLayers = fNetwork->GetEntriesFast();
01164    TObjArray* curLayer;
01165 
01166    // step backwards through the network (backpropagation)
01167    // deltas calculated starting at output layer
01168    for (Int_t i = numLayers-1; i >= 0; i--) {
01169       curLayer = (TObjArray*)fNetwork->At(i);
01170       numNeurons = curLayer->GetEntriesFast();
01171   
01172       for (Int_t j = 0; j < numNeurons; j++) {
01173          neuron = (TNeuron*) curLayer->At(j);
01174          neuron->CalculateDelta();
01175       }
01176    }
01177 }
01178 
01179 //______________________________________________________________________________
01180 void TMVA::MethodMLP::GeneticMinimize()
01181 {
01182    // create genetics class similar to GeneticCut
01183    // give it vector of parameter ranges (parameters = weights)
01184    // link fitness function of this class to ComputeEstimator
01185    // instantiate GA (see MethodCuts)
01186    // run it
01187    // then this should exist for GA, Minuit and random sampling
01188 
01189    PrintMessage("Minimizing Estimator with GA");
01190 
01191    // define GA parameters
01192    fGA_preCalc   = 1;
01193    fGA_SC_steps  = 10;
01194    fGA_SC_rate   = 5;
01195    fGA_SC_factor = 0.95;
01196    fGA_nsteps    = 30;
01197 
01198    // ranges
01199    vector<Interval*> ranges;
01200 
01201    Int_t numWeights = fSynapses->GetEntriesFast();
01202    for (Int_t ivar=0; ivar< numWeights; ivar++) {
01203       ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
01204    }
01205 
01206    FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
01207    gf->Run();
01208 
01209    Double_t estimator = CalculateEstimator();
01210    Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
01211 }
01212 
01213 //______________________________________________________________________________
01214 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
01215 {
01216    // interface to the estimate
01217    return ComputeEstimator( parameters );
01218 }
01219 
01220 //______________________________________________________________________________
01221 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
01222 {
01223    // this function is called by GeneticANN for GA optimization
01224 
01225    TSynapse* synapse;
01226    Int_t numSynapses = fSynapses->GetEntriesFast();
01227 
01228    for (Int_t i = 0; i < numSynapses; i++) {
01229       synapse = (TSynapse*)fSynapses->At(i);
01230       synapse->SetWeight(parameters.at(i));
01231    }
01232    if (fUseRegulator) UpdatePriors(); //zjh
01233 
01234    Double_t estimator = CalculateEstimator();
01235 
01236    return estimator;
01237 }
01238 
01239 //______________________________________________________________________________
01240 void TMVA::MethodMLP::UpdateSynapses()
01241 {
01242    // update synapse error fields and adjust the weights (if in sequential mode)
01243 
01244    TNeuron* neuron;
01245    Int_t numNeurons;
01246    TObjArray* curLayer;
01247    Int_t numLayers = fNetwork->GetEntriesFast();
01248 
01249    for (Int_t i = 0; i < numLayers; i++) {
01250       curLayer = (TObjArray*)fNetwork->At(i);
01251       numNeurons = curLayer->GetEntriesFast();
01252 
01253       for (Int_t j = 0; j < numNeurons; j++) {
01254          neuron = (TNeuron*) curLayer->At(j);
01255          if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
01256          else                neuron->UpdateSynapsesSequential();
01257       }
01258    }
01259 }
01260 
01261 //______________________________________________________________________________
01262 void TMVA::MethodMLP::AdjustSynapseWeights()
01263 {
01264    // just adjust the synapse weights (should be called in batch mode)
01265 
01266    TNeuron* neuron;
01267    Int_t numNeurons;
01268    TObjArray* curLayer;
01269    Int_t numLayers = fNetwork->GetEntriesFast();
01270 
01271    for (Int_t i = numLayers-1; i >= 0; i--) {
01272       curLayer = (TObjArray*)fNetwork->At(i);
01273       numNeurons = curLayer->GetEntriesFast();
01274 
01275       for (Int_t j = 0; j < numNeurons; j++) {
01276          neuron = (TNeuron*) curLayer->At(j);
01277          neuron->AdjustSynapseWeights();
01278       }
01279    }
01280 }
01281 
01282 //_______________________________________________________________________
01283 void TMVA::MethodMLP::UpdatePriors()  //zjh
01284 {
01285    fPrior=0;
01286    fPriorDev.clear();
01287    Int_t nSynapses = fSynapses->GetEntriesFast();
01288    for (Int_t i=0;i<nSynapses;i++) {
01289       TSynapse* synapse = (TSynapse*)fSynapses->At(i);
01290       fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
01291       fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
01292    }
01293 }
01294 
01295 //_______________________________________________________________________
01296 void TMVA::MethodMLP::UpdateRegulators()  //zjh
01297 {
01298    TMatrixD InvH(0,0);
01299    GetApproxInvHessian(InvH);
01300    Int_t numSynapses=fSynapses->GetEntriesFast();
01301    Int_t numRegulators=fRegulators.size();
01302    Float_t gamma=0,
01303       variance=1.;    // Gaussian noise
01304    vector<Int_t> nWDP(numRegulators);
01305    vector<Double_t> trace(numRegulators),weightSum(numRegulators);
01306    for (int i=0;i<numSynapses;i++) {
01307       TSynapse* synapses = (TSynapse*)fSynapses->At(i);
01308       Int_t idx=fRegulatorIdx[i];
01309       nWDP[idx]++;
01310       trace[idx]+=InvH[i][i];
01311       gamma+=1-fRegulators[idx]*InvH[i][i];
01312       weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
01313    }
01314    if (fEstimator==kMSE) {
01315       if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
01316       else variance=CalculateEstimator( Types::kTraining, 0 );
01317    }
01318 
01319    //Log() << kDEBUG << Endl;
01320    for (int i=0;i<numRegulators;i++)
01321       {
01322          //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
01323          fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
01324          if (fRegulators[i]<0) fRegulators[i]=0;
01325          Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
01326       }
01327    float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample  //zjh
01328    float testE  = CalculateEstimator( Types::kTesting,  0 ) ; // estimator for test sample //zjh
01329 
01330    Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
01331 
01332 }
01333 
01334 //_______________________________________________________________________
01335 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate)  //zjh
01336 {
01337    Int_t numSynapses=fSynapses->GetEntriesFast();
01338    InvHessian.ResizeTo( numSynapses, numSynapses );
01339    InvHessian=0;
01340    TMatrixD sens(numSynapses,1);
01341    TMatrixD sensT(1,numSynapses);
01342    Int_t nEvents = GetNEvents();
01343    for (Int_t i=0;i<nEvents;i++) {
01344       GetEvent(i);
01345       double outputValue=GetMvaValue(); // force calculation
01346       GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
01347       CalculateNeuronDeltas();
01348       for (Int_t j = 0; j < numSynapses; j++){
01349          TSynapse* synapses = (TSynapse*)fSynapses->At(j);
01350          synapses->InitDelta();
01351          synapses->CalculateDelta();
01352          sens[j][0]=sensT[0][j]=synapses->GetDelta();
01353       }
01354       if (fEstimator==kMSE ) InvHessian+=sens*sensT;
01355       else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
01356    }
01357 
01358    // TVectorD eValue(numSynapses);
01359    if (regulate) {
01360       for (Int_t i = 0; i < numSynapses; i++){
01361          InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
01362       }
01363    }
01364    else {
01365       for (Int_t i = 0; i < numSynapses; i++){
01366          InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
01367       }
01368    }
01369 
01370    InvHessian.Invert();
01371 
01372 }
01373 
01374 //_______________________________________________________________________
01375 Double_t TMVA::MethodMLP::GetMvaValueAsymError( Double_t* errLower, Double_t* errUpper )
01376 {
01377    Double_t MvaValue = GetMvaValue();// contains back propagation
01378 
01379    // no hessian (old training file) or no error reqested
01380    if (fInvHessian.GetNcols()==0 || errLower==0 || errUpper==0)
01381       return MvaValue;
01382 
01383    Double_t MvaUpper,MvaLower,median,variance;
01384    Int_t numSynapses=fSynapses->GetEntriesFast();
01385    if (fInvHessian.GetNcols()!=numSynapses) {
01386       Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
01387    }
01388    TMatrixD sens(numSynapses,1);
01389    TMatrixD sensT(1,numSynapses);
01390    GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
01391    //GetOutputNeuron()->SetError(1.);
01392    CalculateNeuronDeltas();
01393    for (Int_t i = 0; i < numSynapses; i++){
01394       TSynapse* synapses = (TSynapse*)fSynapses->At(i);
01395       synapses->InitDelta();
01396       synapses->CalculateDelta();
01397       sensT[0][i]=synapses->GetDelta();
01398    }
01399    sens.Transpose(sensT);
01400    TMatrixD sig=sensT*fInvHessian*sens;
01401    variance=sig[0][0];
01402    median=GetOutputNeuron()->GetValue();
01403    //Log()<<kDEBUG<<"median="<<median<<"\tvariance="<<variance<<Endl;
01404 
01405    //upper
01406    MvaUpper=fOutput->Eval(median+variance);
01407    if(errUpper)
01408       *errUpper=MvaUpper-MvaValue;
01409 
01410    //lower
01411    MvaLower=fOutput->Eval(median-variance);
01412    if(errLower)
01413       *errLower=MvaValue-MvaLower;
01414 
01415    if (variance<0) {
01416       Log()<<kWARNING<<"median=" << median << "\tvariance=" << variance
01417            <<"MvaLower=" << MvaLower <<"\terrLower=" << (errLower?*errLower:0)
01418            <<"MvaUpper=" << MvaUpper <<"\terrUpper=" << (errUpper?*errUpper:0)
01419            <<Endl;
01420    }
01421    return MvaValue;
01422 }
01423 
01424 
01425 #ifdef MethodMLP_UseMinuit__
01426 
01427 //______________________________________________________________________________
01428 void TMVA::MethodMLP::MinuitMinimize()
01429 {
01430    // minimize using Minuit
01431    fNumberOfWeights = fSynapses->GetEntriesFast();
01432 
01433    TFitter* tfitter = new TFitter( fNumberOfWeights );
01434 
01435    // minuit-specific settings
01436    Double_t args[10];
01437 
01438    // output level
01439    args[0] = 2; // put to 0 for results only, or to -1 for no garbage
01440    tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
01441    tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
01442 
01443    double w[54];
01444 
01445    // init parameters
01446    for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
01447       TString parName = Form("w%i", ipar);
01448       tfitter->SetParameter( ipar,
01449                              parName, w[ipar], 0.1, 0, 0 );
01450    }
01451 
01452    // define the CFN function
01453    tfitter->SetFCN( &IFCN );
01454 
01455    // define fit strategy
01456    args[0] = 2;
01457    tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
01458 
01459    // now do the fit !
01460    args[0] = 1e-04;
01461    tfitter->ExecuteCommand( "MIGRAD", args, 1 );
01462 
01463    Bool_t doBetter     = kFALSE;
01464    Bool_t doEvenBetter = kFALSE;
01465    if (doBetter) {
01466       args[0] = 1e-04;
01467       tfitter->ExecuteCommand( "IMPROVE", args, 1 );
01468 
01469       if (doEvenBetter) {
01470          args[0] = 500;
01471          tfitter->ExecuteCommand( "MINOS", args, 1 );
01472       }
01473    }
01474 }
01475 
01476 _____________________________________________________________________________
01477 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
01478 {
01479    // Evaluate the minimisation function ----------------------------------------------------
01480    //
01481    //  Input parameters:
01482    //    npars:   number of currently variable parameters
01483    //             CAUTION: this is not (necessarily) the dimension of the fitPars vector !
01484    //    fitPars: array of (constant and variable) parameters
01485    //    iflag:   indicates what is to be calculated (see example below)
01486    //    grad:    array of gradients
01487    //
01488    //  Output parameters:
01489    //    f:       the calculated function value.
01490    //    grad:    the (optional) vector of first derivatives).
01491    // ---------------------------------------------------------------------------------------
01492    ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
01493 }
01494 
01495 static Int_t  nc   = 0;
01496 static double minf = 1000000;
01497 
01498 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
01499 {
01500    // first update the weights
01501    for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
01502       TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
01503       synapse->SetWeight(fitPars[ipar]);
01504    }
01505 
01506    // now compute the estimator
01507    f = CalculateEstimator();
01508 
01509    nc++;
01510    if (f < minf) minf = f;
01511    for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
01512    Log() << kDEBUG << Endl;
01513    Log() << kDEBUG << "***** New estimator: " << f << "  min: " << minf << " --> ncalls: " << nc << Endl;
01514 }
01515 
01516 //_______________________________________________________________________
01517 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
01518 {
01519    // global "this" pointer to be used in minuit
01520    return fgThis;
01521 }
01522 
01523 #endif
01524 
01525 
01526 //_______________________________________________________________________
01527 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
01528 {
01529    // write specific classifier response
01530    MethodANNBase::MakeClassSpecific(fout, className);
01531 }
01532 
01533 //_______________________________________________________________________
01534 void TMVA::MethodMLP::GetHelpMessage() const
01535 {
01536    // get help message text
01537    //
01538    // typical length of text line:
01539    //         "|--------------------------------------------------------------|"
01540    TString col    = gConfig().WriteOptionsReference() ? "" : gTools().Color("bold");
01541    TString colres = gConfig().WriteOptionsReference() ? "" : gTools().Color("reset");
01542 
01543    Log() << Endl;
01544    Log() << col << "--- Short description:" << colres << Endl;
01545    Log() << Endl;
01546    Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
01547    Log() << "forward multilayer perceptron impementation. The MLP has a user-" << Endl;
01548    Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
01549    Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
01550    Log() << "signal and one background). " << Endl;
01551    Log() << Endl;
01552    Log() << col << "--- Performance optimisation:" << colres << Endl;
01553    Log() << Endl;
01554    Log() << "Neural networks are stable and performing for a large variety of " << Endl;
01555    Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
01556    Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
01557    Log() << "number of input variables that have only little discrimination power. " << Endl;
01558    Log() << "" << Endl;
01559    Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
01560    Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
01561    Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
01562    Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
01563    Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
01564    Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
01565    Log() << "competitive results)." << Endl;
01566    Log() << Endl;
01567    Log() << col << "Overtraining: " << colres
01568          << "only the TMlpANN performs an explicit separation of the" << Endl;
01569    Log() << "full training sample into independent training and validation samples." << Endl;
01570    Log() << "We have found that in most high-energy physics applications the " << Endl;
01571    Log() << "avaliable degrees of freedom (training events) are sufficient to " << Endl;
01572    Log() << "constrain the weights of the relatively simple architectures required" << Endl;
01573    Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
01574    Log() << "the use of validation samples would only reduce the available training" << Endl;
01575    Log() << "information. However, if the perrormance on the training sample is " << Endl;
01576    Log() << "found to be significantly better than the one found with the inde-" << Endl;
01577    Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
01578    Log() << "are printed to standard output at the end of each training job." << Endl;
01579    Log() << Endl;
01580    Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
01581    Log() << Endl;
01582    Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
01583    Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
01584    Log() << "neurons and the second N neurons (and so on), and where N is the number  " << Endl;
01585    Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
01586    Log() << "in favour of more neurons in the first hidden layer." << Endl;
01587    Log() << "" << Endl;
01588    Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
01589    Log() << "adjustable weights is small compared to the training sample size," << Endl;
01590    Log() << "using a large number of training samples should not lead to overtraining." << Endl;
01591 }
01592