00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "TString.h"
00039 #include <vector>
00040 #include <cmath>
00041 #include "TTree.h"
00042 #include "Riostream.h"
00043 #include "TFitter.h"
00044 #include "TMatrixD.h"
00045 #include "TMath.h"
00046 #include "TFile.h"
00047
00048 #include "TMVA/ClassifierFactory.h"
00049 #include "TMVA/Interval.h"
00050 #include "TMVA/MethodMLP.h"
00051 #include "TMVA/TNeuron.h"
00052 #include "TMVA/TSynapse.h"
00053 #include "TMVA/Timer.h"
00054 #include "TMVA/Types.h"
00055 #include "TMVA/Tools.h"
00056 #include "TMVA/GeneticFitter.h"
00057 #include "TMVA/Config.h"
00058
00059 #ifdef MethodMLP_UseMinuit__
00060 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
00061 Bool_t MethodMLP_UseMinuit = kTRUE;
00062 #endif
00063
00064 REGISTER_METHOD(MLP)
00065
00066 ClassImp(TMVA::MethodMLP)
00067
00068 using std::vector;
00069
00070
00071 TMVA::MethodMLP::MethodMLP( const TString& jobName,
00072 const TString& methodTitle,
00073 DataSetInfo& theData,
00074 const TString& theOption,
00075 TDirectory* theTargetDir )
00076 : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption, theTargetDir ),
00077 fPrior(0.0),
00078 fSamplingFraction(1.0),
00079 fSamplingEpoch (0.0)
00080 {
00081
00082 }
00083
00084
00085 TMVA::MethodMLP::MethodMLP( DataSetInfo& theData,
00086 const TString& theWeightFile,
00087 TDirectory* theTargetDir )
00088 : MethodANNBase( Types::kMLP, theData, theWeightFile, theTargetDir ),
00089 fPrior(0.0),
00090 fSamplingFraction(1.0),
00091 fSamplingEpoch(0.0)
00092 {
00093
00094 }
00095
00096
00097 TMVA::MethodMLP::~MethodMLP()
00098 {
00099
00100
00101 }
00102
00103
00104 Bool_t TMVA::MethodMLP::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
00105 {
00106
00107 if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
00108 if (type == Types::kMulticlass ) return kTRUE;
00109 if (type == Types::kRegression ) return kTRUE;
00110
00111 return kFALSE;
00112 }
00113
00114
00115 void TMVA::MethodMLP::Init()
00116 {
00117
00118
00119
00120 SetSignalReferenceCut( 0.5 );
00121 #ifdef MethodMLP_UseMinuit__
00122 fgThis = this;
00123 #endif
00124 }
00125
00126
00127 void TMVA::MethodMLP::DeclareOptions()
00128 {
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146 DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
00147 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
00148 AddPreDefVal(TString("BP"));
00149 AddPreDefVal(TString("GA"));
00150 AddPreDefVal(TString("BFGS"));
00151
00152 DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
00153 DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
00154 DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
00155 DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
00156
00157 DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
00158 DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
00159 DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
00160
00161 DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
00162 DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
00163
00164 DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
00165 DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
00166
00167 DeclareOptionRef(fBpModeS="sequential", "BPMode",
00168 "Back-propagation learning mode: sequential or batch");
00169 AddPreDefVal(TString("sequential"));
00170 AddPreDefVal(TString("batch"));
00171
00172 DeclareOptionRef(fBatchSize=-1, "BatchSize",
00173 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
00174
00175 DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
00176 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
00177
00178 DeclareOptionRef(fSteps=-1, "ConvergenceTests",
00179 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
00180
00181 DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
00182 "Use regulator to avoid over-training");
00183 DeclareOptionRef(fUpdateLimit=10, "UpdateLimit",
00184 "Number of updates for regulator before stop training");
00185 DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
00186 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value");
00187 }
00188
00189
00190 void TMVA::MethodMLP::ProcessOptions()
00191 {
00192
00193 MethodANNBase::ProcessOptions();
00194
00195 if (IgnoreEventsWithNegWeightsInTraining()) {
00196 Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
00197 << GetMethodTypeName()
00198 << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
00199 << Endl;
00200 }
00201
00202 if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
00203 else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
00204 else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
00205
00206 if (fBpModeS == "sequential") fBPMode = kSequential;
00207 else if (fBpModeS == "batch") fBPMode = kBatch;
00208
00209
00210
00211 if (fBPMode == kBatch) {
00212 Data()->SetCurrentType(Types::kTraining);
00213 Int_t numEvents = Data()->GetNEvents();
00214 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
00215 }
00216 }
00217
00218
00219 void TMVA::MethodMLP::InitializeLearningRates()
00220 {
00221
00222 Log() << kDEBUG << "Initialize learning rates" << Endl;
00223 TSynapse *synapse;
00224 Int_t numSynapses = fSynapses->GetEntriesFast();
00225 for (Int_t i = 0; i < numSynapses; i++) {
00226 synapse = (TSynapse*)fSynapses->At(i);
00227 synapse->SetLearningRate(fLearnRate);
00228 }
00229 }
00230
00231
00232 Double_t TMVA::MethodMLP::CalculateEstimator( Types::ETreeType treeType, Int_t iEpoch )
00233 {
00234
00235
00236
00237 if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
00238 Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
00239 }
00240
00241 Types::ETreeType saveType = Data()->GetCurrentType();
00242 Data()->SetCurrentType(treeType);
00243
00244
00245 TString type = (treeType == Types::kTraining ? "train" : "test");
00246 TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
00247 TString nameB = name + "_B";
00248 TString nameS = name + "_S";
00249 Int_t nbin = 100;
00250 Float_t limit = 2;
00251 TH1* histS = 0;
00252 TH1* histB = 0;
00253 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
00254 histS = new TH1F( nameS, nameS, nbin, -limit, limit );
00255 histB = new TH1F( nameB, nameB, nbin, -limit, limit );
00256 }
00257
00258 Double_t estimator = 0;
00259
00260
00261 Int_t nEvents = GetNEvents();
00262 UInt_t nClasses = DataInfo().GetNClasses();
00263 UInt_t nTgts = DataInfo().GetNTargets();
00264 for (Int_t i = 0; i < nEvents; i++) {
00265
00266 const Event* ev = GetEvent(i);
00267 Double_t w = ev->GetWeight();
00268
00269 ForceNetworkInputs( ev );
00270 ForceNetworkCalculations();
00271
00272 Double_t d = 0, v = 0;
00273 if (DoRegression()) {
00274 for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
00275 v = GetOutputNeuron( itgt )->GetActivationValue();
00276 Double_t targetValue = ev->GetTarget( itgt );
00277 Double_t dt = v - targetValue;
00278 d += (dt*dt);
00279 }
00280 estimator += d*w;
00281 } else if (DoMulticlass() ) {
00282 UInt_t cls = ev->GetClass();
00283 if (fEstimator==kCE){
00284 Double_t norm(0);
00285 for (UInt_t icls = 0; icls < nClasses; icls++) {
00286 norm += exp( GetOutputNeuron( icls )->GetActivationValue());
00287 if(icls==cls)
00288 d = exp( GetOutputNeuron( icls )->GetActivationValue());
00289 }
00290 d = -TMath::Log(d/norm);
00291 }
00292 else{
00293 for (UInt_t icls = 0; icls < nClasses; icls++) {
00294 Double_t desired = (icls==cls) ? 1.0 : 0.0;
00295 v = GetOutputNeuron( icls )->GetActivationValue();
00296 d = (desired-v)*(desired-v);
00297 }
00298 }
00299 estimator += d*w;
00300 } else {
00301 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
00302 v = GetOutputNeuron()->GetActivationValue();
00303 if (fEstimator==kMSE) d = (desired-v)*(desired-v);
00304 else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v));
00305 estimator += d*w;
00306 }
00307
00308
00309 if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
00310 else if (histB != 0) histB->Fill( float(v), float(w) );
00311 }
00312
00313 if (histS != 0) fEpochMonHistS.push_back( histS );
00314 if (histB != 0) fEpochMonHistB.push_back( histB );
00315
00316
00317
00318
00319 if (DoRegression()) estimator = estimator/Float_t(nEvents);
00320 else if (DoMulticlass()) estimator = estimator/Float_t(nEvents);
00321 else estimator = estimator/Float_t(nEvents);
00322
00323
00324
00325
00326 Data()->SetCurrentType( saveType );
00327
00328
00329 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
00330 CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
00331 }
00332
00333 return estimator;
00334 }
00335
00336
00337 void TMVA::MethodMLP::Train(Int_t nEpochs)
00338 {
00339 if (fNetwork == 0) {
00340
00341 Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
00342 SetAnalysisType(GetAnalysisType());
00343 }
00344 Log() << kDEBUG << "reinitalize learning rates" << Endl;
00345 InitializeLearningRates();
00346 PrintMessage("Training Network");
00347
00348 Int_t nEvents=GetNEvents();
00349 Int_t nSynapses=fSynapses->GetEntriesFast();
00350 if (nSynapses>nEvents)
00351 Log()<<kFATAL<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
00352
00353 #ifdef MethodMLP_UseMinuit__
00354 if (useMinuit) MinuitMinimize();
00355 #else
00356 if (fTrainingMethod == kGA) GeneticMinimize();
00357 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
00358 else BackPropagationMinimize(nEpochs);
00359 #endif
00360
00361 float trainE = CalculateEstimator( Types::kTraining, 0 ) ;
00362 float testE = CalculateEstimator( Types::kTesting, 0 ) ;
00363 if (fUseRegulator){
00364 Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
00365 UpdateRegulators();
00366 Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
00367 }
00368
00369 if( fCalculateErrors || fUseRegulator )
00370 {
00371 Int_t numSynapses=fSynapses->GetEntriesFast();
00372 fInvHessian.ResizeTo(numSynapses,numSynapses);
00373 GetApproxInvHessian( fInvHessian ,false);
00374 }
00375 }
00376
00377
00378 void TMVA::MethodMLP::BFGSMinimize( Int_t nEpochs )
00379 {
00380
00381
00382 Timer timer( (fSteps>0?100:nEpochs), GetName() );
00383
00384
00385 Int_t nbinTest = Int_t(nEpochs/fTestRate);
00386 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
00387 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
00388 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
00389 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
00390
00391 Int_t nSynapses = fSynapses->GetEntriesFast();
00392 Int_t nWeights = nSynapses;
00393
00394 for (Int_t i=0;i<nSynapses;i++) {
00395 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
00396 synapse->SetDEDw(0.0);
00397 }
00398
00399 std::vector<Double_t> buffer( nWeights );
00400 for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
00401
00402 TMatrixD Dir ( nWeights, 1 );
00403 TMatrixD Hessian ( nWeights, nWeights );
00404 TMatrixD Gamma ( nWeights, 1 );
00405 TMatrixD Delta ( nWeights, 1 );
00406 Int_t RegUpdateCD=0;
00407 Int_t RegUpdateTimes=0;
00408 Double_t AccuError=0;
00409
00410 Double_t trainE = -1;
00411 Double_t testE = -1;
00412
00413 fLastAlpha = 0.;
00414
00415 if(fSamplingTraining || fSamplingTesting)
00416 Data()->InitSampling(1.0,1.0,fRandomSeed);
00417
00418 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
00419 timer.DrawProgressBar( 0 );
00420
00421
00422 for (Int_t i = 0; i < nEpochs; i++) {
00423 if (Float_t(i)/nEpochs < fSamplingEpoch) {
00424 if ((i+1)%fTestRate == 0 || (i == 0)) {
00425 if (fSamplingTraining) {
00426 Data()->SetCurrentType( Types::kTraining );
00427 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
00428 Data()->CreateSampling();
00429 }
00430 if (fSamplingTesting) {
00431 Data()->SetCurrentType( Types::kTesting );
00432 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
00433 Data()->CreateSampling();
00434 }
00435 }
00436 }
00437 else {
00438 Data()->SetCurrentType( Types::kTraining );
00439 Data()->InitSampling(1.0,1.0);
00440 Data()->SetCurrentType( Types::kTesting );
00441 Data()->InitSampling(1.0,1.0);
00442 }
00443 Data()->SetCurrentType( Types::kTraining );
00444
00445
00446 if (fUseRegulator) {
00447 UpdatePriors();
00448 RegUpdateCD++;
00449 }
00450
00451
00452 SetGammaDelta( Gamma, Delta, buffer );
00453
00454 if (i % fResetStep == 0 && i<0.5*nEpochs) {
00455 SteepestDir( Dir );
00456 Hessian.UnitMatrix();
00457 RegUpdateCD=0;
00458 }
00459 else {
00460 if (GetHessian( Hessian, Gamma, Delta )) {
00461 SteepestDir( Dir );
00462 Hessian.UnitMatrix();
00463 RegUpdateCD=0;
00464 }
00465 else SetDir( Hessian, Dir );
00466 }
00467
00468 Double_t dError=0;
00469 if (DerivDir( Dir ) > 0) {
00470 SteepestDir( Dir );
00471 Hessian.UnitMatrix();
00472 RegUpdateCD=0;
00473 }
00474 if (LineSearch( Dir, buffer, &dError )) {
00475 Hessian.UnitMatrix();
00476 SteepestDir( Dir );
00477 RegUpdateCD=0;
00478 if (LineSearch(Dir, buffer, &dError)) {
00479 i = nEpochs;
00480 Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
00481 }
00482 }
00483
00484
00485 if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
00486 AccuError+=dError;
00487 if (std::abs(dError)>0.0001) RegUpdateCD=0;
00488
00489 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=((0.4*fResetStep)>50?50:(0.4*fResetStep)) && i<0.8*nEpochs && AccuError>0.01 ) {
00490 Log()<<kDEBUG <<Endl;
00491 Log()<<kDEBUG<<"\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
00492 UpdateRegulators();
00493 Hessian.UnitMatrix();
00494 RegUpdateCD=0;
00495 RegUpdateTimes++;
00496 AccuError=0;
00497 }
00498
00499
00500
00501 if ((i+1)%fTestRate == 0) {
00502
00503
00504 trainE = CalculateEstimator( Types::kTraining, i ) ;
00505 testE = CalculateEstimator( Types::kTesting, i ) ;
00506 fEstimatorHistTrain->Fill( i+1, trainE );
00507 fEstimatorHistTest ->Fill( i+1, testE );
00508
00509 Bool_t success = kFALSE;
00510 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
00511 success = kTRUE;
00512 }
00513 Data()->EventResult( success );
00514
00515 SetCurrentValue( testE );
00516 if (HasConverged()) {
00517 if (Float_t(i)/nEpochs < fSamplingEpoch) {
00518 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
00519 i = newEpoch;
00520 ResetConvergenceCounter();
00521 }
00522 else break;
00523 }
00524 }
00525
00526
00527 TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
00528 if (fSteps > 0) {
00529 Float_t progress = 0;
00530 if (Float_t(i)/nEpochs < fSamplingEpoch)
00531 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
00532 else
00533 progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
00534 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit;
00535 if (progress2>progress) progress=progress2;
00536 timer.DrawProgressBar( Int_t(progress), convText );
00537 }
00538 else {
00539 Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit));
00540 if (progress<i) progress=i;
00541 timer.DrawProgressBar( progress, convText );
00542 }
00543
00544
00545 if (fgPRINT_SEQ) {
00546 PrintNetwork();
00547 WaitForKeyboard();
00548 }
00549 }
00550 }
00551
00552
00553 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
00554 {
00555 Int_t nWeights = fSynapses->GetEntriesFast();
00556
00557 Int_t IDX = 0;
00558 Int_t nSynapses = fSynapses->GetEntriesFast();
00559 for (Int_t i=0;i<nSynapses;i++) {
00560 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00561 Gamma[IDX++][0] = -synapse->GetDEDw();
00562 }
00563
00564 for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
00565
00566 ComputeDEDw();
00567
00568 IDX = 0;
00569 for (Int_t i=0;i<nSynapses;i++)
00570 {
00571 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00572 Gamma[IDX++][0] += synapse->GetDEDw();
00573 }
00574 }
00575
00576
00577 void TMVA::MethodMLP::ComputeDEDw()
00578 {
00579 Int_t nSynapses = fSynapses->GetEntriesFast();
00580 for (Int_t i=0;i<nSynapses;i++) {
00581 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00582 synapse->SetDEDw( 0.0 );
00583 }
00584
00585 Int_t nEvents = GetNEvents();
00586 for (Int_t i=0;i<nEvents;i++) {
00587
00588 const Event* ev = GetEvent(i);
00589
00590 SimulateEvent( ev );
00591
00592 for (Int_t j=0;j<nSynapses;j++) {
00593 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
00594 synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
00595 }
00596 }
00597
00598 for (Int_t i=0;i<nSynapses;i++) {
00599 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00600 Double_t DEDw=synapse->GetDEDw();
00601 if (fUseRegulator) DEDw+=fPriorDev[i];
00602 synapse->SetDEDw( DEDw / nEvents );
00603 }
00604 }
00605
00606
00607 void TMVA::MethodMLP::SimulateEvent( const Event* ev )
00608 {
00609 Double_t eventWeight = ev->GetWeight();
00610
00611 ForceNetworkInputs( ev );
00612 ForceNetworkCalculations();
00613
00614 if (DoRegression()) {
00615 UInt_t ntgt = DataInfo().GetNTargets();
00616 for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
00617 Double_t desired = ev->GetTarget(itgt);
00618 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
00619 GetOutputNeuron( itgt )->SetError(error);
00620 }
00621 } else if (DoMulticlass()) {
00622 UInt_t nClasses = DataInfo().GetNClasses();
00623 UInt_t cls = ev->GetClass();
00624 for (UInt_t icls = 0; icls < nClasses; icls++) {
00625 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
00626 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
00627 GetOutputNeuron( icls )->SetError(error);
00628 }
00629 } else {
00630 Double_t desired = GetDesiredOutput( ev );
00631 Double_t error=-1;
00632 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight;
00633 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired);
00634 GetOutputNeuron()->SetError(error);
00635 }
00636
00637 CalculateNeuronDeltas();
00638 for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
00639 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
00640 synapse->InitDelta();
00641 synapse->CalculateDelta();
00642 }
00643 }
00644
00645
00646 void TMVA::MethodMLP::SteepestDir( TMatrixD &Dir )
00647 {
00648 Int_t IDX = 0;
00649 Int_t nSynapses = fSynapses->GetEntriesFast();
00650
00651 for (Int_t i=0;i<nSynapses;i++) {
00652 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00653 Dir[IDX++][0] = -synapse->GetDEDw();
00654 }
00655 }
00656
00657
00658 Bool_t TMVA::MethodMLP::GetHessian( TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta )
00659 {
00660 TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
00661 if ((Double_t) gd[0][0] == 0.) return kTRUE;
00662 TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
00663 TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
00664 TMatrixD gHg(Gamma, TMatrixD::kTransposeMult, aHg);
00665 Double_t a = 1 / (Double_t) gd[0][0];
00666 Double_t f = 1 + ((Double_t)gHg[0][0]*a);
00667 TMatrixD res(TMatrixD(Delta, TMatrixD::kMult, TMatrixD(TMatrixD::kTransposed,Delta)));
00668 res *= f;
00669 res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
00670 TMatrixD(TMatrixD::kTransposed,Delta)));
00671 res *= a;
00672 Hessian += res;
00673
00674 return kFALSE;
00675 }
00676
00677
00678 void TMVA::MethodMLP::SetDir( TMatrixD &Hessian, TMatrixD &dir )
00679 {
00680 Int_t IDX = 0;
00681 Int_t nSynapses = fSynapses->GetEntriesFast();
00682 TMatrixD DEDw(nSynapses, 1);
00683
00684 for (Int_t i=0;i<nSynapses;i++) {
00685 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00686 DEDw[IDX++][0] = synapse->GetDEDw();
00687 }
00688
00689 dir = Hessian * DEDw;
00690 for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
00691 }
00692
00693
00694 Double_t TMVA::MethodMLP::DerivDir( TMatrixD &Dir )
00695 {
00696 Int_t IDX = 0;
00697 Int_t nSynapses = fSynapses->GetEntriesFast();
00698 Double_t Result = 0.0;
00699
00700 for (Int_t i=0;i<nSynapses;i++) {
00701 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00702 Result += Dir[IDX++][0] * synapse->GetDEDw();
00703 }
00704 return Result;
00705 }
00706
00707
00708 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
00709 {
00710 Int_t IDX = 0;
00711 Int_t nSynapses = fSynapses->GetEntriesFast();
00712 Int_t nWeights = nSynapses;
00713
00714 std::vector<Double_t> Origin(nWeights);
00715 for (Int_t i=0;i<nSynapses;i++) {
00716 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00717 Origin[i] = synapse->GetWeight();
00718 }
00719
00720 Double_t err1 = GetError();
00721 Double_t errOrigin=err1;
00722 Double_t alpha1 = 0.;
00723 Double_t alpha2 = fLastAlpha;
00724
00725
00726 if (alpha2 < 0.01) alpha2 = 0.01;
00727 else if (alpha2 > 2.0) alpha2 = 2.0;
00728 Double_t alpha_original = alpha2;
00729 Double_t alpha3 = alpha2;
00730
00731 SetDirWeights( Origin, Dir, alpha2 );
00732 Double_t err2 = GetError();
00733
00734 Double_t err3 = err2;
00735 Bool_t bingo = kFALSE;
00736
00737
00738 if (err1 > err2) {
00739 for (Int_t i=0;i<100;i++) {
00740 alpha3 *= fTau;
00741 SetDirWeights(Origin, Dir, alpha3);
00742 err3 = GetError();
00743 if (err3 > err2) {
00744 bingo = kTRUE;
00745 break;
00746 }
00747 alpha1 = alpha2;
00748 err1 = err2;
00749 alpha2 = alpha3;
00750 err2 = err3;
00751 }
00752 if (!bingo) {
00753 SetDirWeights(Origin, Dir, 0.);
00754 return kTRUE;
00755 }
00756 }
00757 else {
00758 for (Int_t i=0;i<100;i++) {
00759 alpha2 /= fTau;
00760 if (i==50) {
00761 Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
00762 alpha2 = -alpha_original;
00763 }
00764 SetDirWeights(Origin, Dir, alpha2);
00765 err2 = GetError();
00766 if (err1 > err2) {
00767 bingo = kTRUE;
00768 break;
00769 }
00770 alpha3 = alpha2;
00771 err3 = err2;
00772 }
00773 if (!bingo) {
00774 SetDirWeights(Origin, Dir, 0.);
00775 Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
00776 fLastAlpha = 0.05;
00777 return kTRUE;
00778 }
00779 }
00780
00781 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
00782 fLastAlpha = 0.5 * (alpha1 + alpha3 -
00783 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
00784 - ( err2 - err1 ) / (alpha2 - alpha1 )));
00785 }
00786 else {
00787 fLastAlpha = alpha2;
00788 }
00789
00790 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
00791
00792 SetDirWeights(Origin, Dir, fLastAlpha);
00793
00794
00795
00796
00797
00798 Double_t finalError = GetError();
00799 if (finalError > err1) {
00800 Log() << kWARNING << "Line search increased error! Something is wrong."
00801 << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
00802 << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
00803 }
00804
00805 for (Int_t i=0;i<nSynapses;i++) {
00806 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00807 buffer[IDX] = synapse->GetWeight() - Origin[IDX];
00808 IDX++;
00809 }
00810
00811 if (dError) (*dError)=(errOrigin-finalError)/finalError;
00812
00813 return kFALSE;
00814 }
00815
00816
00817 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
00818 {
00819 Int_t IDX = 0;
00820 Int_t nSynapses = fSynapses->GetEntriesFast();
00821
00822 for (Int_t i=0;i<nSynapses;i++) {
00823 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
00824 synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
00825 IDX++;
00826 }
00827 if (fUseRegulator) UpdatePriors();
00828 }
00829
00830
00831
00832 Double_t TMVA::MethodMLP::GetError()
00833 {
00834 Int_t nEvents = GetNEvents();
00835 UInt_t ntgts = GetNTargets();
00836 Double_t Result = 0.;
00837
00838 for (Int_t i=0;i<nEvents;i++) {
00839 const Event* ev = GetEvent(i);
00840
00841 SimulateEvent( ev );
00842
00843 Double_t error = 0.;
00844 if (DoRegression()) {
00845 for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
00846 error += GetMSEErr( ev, itgt );
00847 }
00848 } else if ( DoMulticlass() ){
00849 for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
00850 error += GetMSEErr( ev, icls );
00851 }
00852 } else {
00853 if (fEstimator==kMSE) error = GetMSEErr( ev );
00854 else if (fEstimator==kCE) error= GetCEErr( ev );
00855 }
00856 Result += error * ev->GetWeight();
00857 }
00858 if (fUseRegulator) Result+=fPrior;
00859 if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
00860 return Result;
00861 }
00862
00863
00864 Double_t TMVA::MethodMLP::GetMSEErr( const Event* ev, UInt_t index )
00865 {
00866 Double_t error = 0;
00867 Double_t output = GetOutputNeuron( index )->GetActivationValue();
00868 Double_t target = 0;
00869 if (DoRegression()) target = ev->GetTarget( index );
00870 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
00871 else target = GetDesiredOutput( ev );
00872
00873 error = 0.5*(output-target)*(output-target);
00874
00875 return error;
00876
00877 }
00878
00879
00880 Double_t TMVA::MethodMLP::GetCEErr( const Event* ev, UInt_t index )
00881 {
00882 Double_t error = 0;
00883 Double_t output = GetOutputNeuron( index )->GetActivationValue();
00884 Double_t target = 0;
00885 if (DoRegression()) target = ev->GetTarget( index );
00886 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
00887 else target = GetDesiredOutput( ev );
00888
00889 error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
00890
00891 return error;
00892 }
00893
00894
00895 void TMVA::MethodMLP::BackPropagationMinimize(Int_t nEpochs)
00896 {
00897
00898
00899
00900 Timer timer( (fSteps>0?100:nEpochs), GetName() );
00901 Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
00902
00903
00904 Int_t nbinTest = Int_t(nEpochs/fTestRate);
00905 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
00906 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
00907 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
00908 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
00909
00910 if(fSamplingTraining || fSamplingTesting)
00911 Data()->InitSampling(1.0,1.0,fRandomSeed);
00912
00913 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
00914 timer.DrawProgressBar(0);
00915
00916
00917 Double_t trainE = -1;
00918 Double_t testE = -1;
00919
00920
00921 for (Int_t i = 0; i < nEpochs; i++) {
00922
00923 if (Float_t(i)/nEpochs < fSamplingEpoch) {
00924 if ((i+1)%fTestRate == 0 || (i == 0)) {
00925 if (fSamplingTraining) {
00926 Data()->SetCurrentType( Types::kTraining );
00927 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
00928 Data()->CreateSampling();
00929 }
00930 if (fSamplingTesting) {
00931 Data()->SetCurrentType( Types::kTesting );
00932 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
00933 Data()->CreateSampling();
00934 }
00935 }
00936 }
00937 else {
00938 Data()->SetCurrentType( Types::kTraining );
00939 Data()->InitSampling(1.0,1.0);
00940 Data()->SetCurrentType( Types::kTesting );
00941 Data()->InitSampling(1.0,1.0);
00942 }
00943 Data()->SetCurrentType( Types::kTraining );
00944
00945 TrainOneEpoch();
00946 DecaySynapseWeights(i >= lateEpoch);
00947
00948
00949 if ((i+1)%fTestRate == 0) {
00950 trainE = CalculateEstimator( Types::kTraining, i );
00951 testE = CalculateEstimator( Types::kTesting, i );
00952 fEstimatorHistTrain->Fill( i+1, trainE );
00953 fEstimatorHistTest ->Fill( i+1, testE );
00954
00955 Bool_t success = kFALSE;
00956 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
00957 success = kTRUE;
00958 }
00959 Data()->EventResult( success );
00960
00961 SetCurrentValue( testE );
00962 if (HasConverged()) {
00963 if (Float_t(i)/nEpochs < fSamplingEpoch) {
00964 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
00965 i = newEpoch;
00966 ResetConvergenceCounter();
00967 }
00968 else {
00969 if (lateEpoch > i) lateEpoch = i;
00970 else break;
00971 }
00972 }
00973 }
00974
00975
00976 TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
00977 if (fSteps > 0) {
00978 Float_t progress = 0;
00979 if (Float_t(i)/nEpochs < fSamplingEpoch)
00980 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
00981 else
00982 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
00983
00984 timer.DrawProgressBar( Int_t(progress), convText );
00985 }
00986 else {
00987 timer.DrawProgressBar( i, convText );
00988 }
00989 }
00990 }
00991
00992
00993 void TMVA::MethodMLP::TrainOneEpoch()
00994 {
00995
00996
00997 Int_t nEvents = Data()->GetNEvents();
00998
00999
01000 Int_t* index = new Int_t[nEvents];
01001 for (Int_t i = 0; i < nEvents; i++) index[i] = i;
01002 Shuffle(index, nEvents);
01003
01004
01005 for (Int_t i = 0; i < nEvents; i++) {
01006
01007 TrainOneEvent(index[i]);
01008
01009
01010 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
01011 AdjustSynapseWeights();
01012 if (fgPRINT_BATCH) {
01013 PrintNetwork();
01014 WaitForKeyboard();
01015 }
01016 }
01017
01018
01019 if (fgPRINT_SEQ) {
01020 PrintNetwork();
01021 WaitForKeyboard();
01022 }
01023 }
01024
01025 delete[] index;
01026 }
01027
01028
01029 void TMVA::MethodMLP::Shuffle(Int_t* index, Int_t n)
01030 {
01031
01032
01033
01034
01035
01036
01037
01038 Int_t j, k;
01039 Int_t a = n - 1;
01040 for (Int_t i = 0; i < n; i++) {
01041 j = (Int_t) (frgen->Rndm() * a);
01042 k = index[j];
01043 index[j] = index[i];
01044 index[i] = k;
01045 }
01046 }
01047
01048
01049 void TMVA::MethodMLP::DecaySynapseWeights(Bool_t lateEpoch)
01050 {
01051
01052
01053
01054 TSynapse* synapse;
01055 Int_t numSynapses = fSynapses->GetEntriesFast();
01056 for (Int_t i = 0; i < numSynapses; i++) {
01057 synapse = (TSynapse*)fSynapses->At(i);
01058 if (lateEpoch) synapse->DecayLearningRate(fDecayRate*fDecayRate);
01059 else synapse->DecayLearningRate(fDecayRate);
01060 }
01061 }
01062
01063
01064 void TMVA::MethodMLP::TrainOneEventFast(Int_t ievt, Float_t*& branchVar, Int_t& type)
01065 {
01066
01067
01068 GetEvent(ievt);
01069
01070
01071
01072
01073
01074
01075
01076 Double_t eventWeight = 1.0;
01077
01078
01079 Double_t desired;
01080 if (type == 0) desired = fOutput->GetMin();
01081 else desired = fOutput->GetMax();
01082
01083
01084 Double_t x;
01085 TNeuron* neuron;
01086
01087 for (UInt_t j = 0; j < GetNvar(); j++) {
01088 x = branchVar[j];
01089 if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
01090 neuron = GetInputNeuron(j);
01091 neuron->ForceValue(x);
01092 }
01093
01094 ForceNetworkCalculations();
01095 UpdateNetwork(desired, eventWeight);
01096 }
01097
01098
01099 void TMVA::MethodMLP::TrainOneEvent(Int_t ievt)
01100 {
01101
01102
01103
01104
01105
01106
01107
01108
01109 const Event * ev = GetEvent(ievt);
01110 Double_t eventWeight = ev->GetWeight();
01111 ForceNetworkInputs( ev );
01112 ForceNetworkCalculations();
01113 if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
01114 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
01115 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
01116 }
01117
01118
01119 Double_t TMVA::MethodMLP::GetDesiredOutput( const Event* ev )
01120 {
01121
01122 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin();
01123 }
01124
01125
01126
01127 void TMVA::MethodMLP::UpdateNetwork(Double_t desired, Double_t eventWeight)
01128 {
01129
01130
01131 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
01132 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ;
01133 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired);
01134 else Log() << kFATAL << "Estimator type unspecified!!" << Endl;
01135 error *= eventWeight;
01136 GetOutputNeuron()->SetError(error);
01137 CalculateNeuronDeltas();
01138 UpdateSynapses();
01139 }
01140
01141
01142 void TMVA::MethodMLP::UpdateNetwork(std::vector<Float_t>& desired, Double_t eventWeight)
01143 {
01144
01145
01146 for (UInt_t i = 0; i < desired.size(); i++) {
01147 Double_t error = GetOutputNeuron( i )->GetActivationValue() - desired.at(i);
01148 error *= eventWeight;
01149 GetOutputNeuron( i )->SetError(error);
01150 }
01151 CalculateNeuronDeltas();
01152 UpdateSynapses();
01153 }
01154
01155
01156
01157 void TMVA::MethodMLP::CalculateNeuronDeltas()
01158 {
01159
01160
01161 TNeuron* neuron;
01162 Int_t numNeurons;
01163 Int_t numLayers = fNetwork->GetEntriesFast();
01164 TObjArray* curLayer;
01165
01166
01167
01168 for (Int_t i = numLayers-1; i >= 0; i--) {
01169 curLayer = (TObjArray*)fNetwork->At(i);
01170 numNeurons = curLayer->GetEntriesFast();
01171
01172 for (Int_t j = 0; j < numNeurons; j++) {
01173 neuron = (TNeuron*) curLayer->At(j);
01174 neuron->CalculateDelta();
01175 }
01176 }
01177 }
01178
01179
01180 void TMVA::MethodMLP::GeneticMinimize()
01181 {
01182
01183
01184
01185
01186
01187
01188
01189 PrintMessage("Minimizing Estimator with GA");
01190
01191
01192 fGA_preCalc = 1;
01193 fGA_SC_steps = 10;
01194 fGA_SC_rate = 5;
01195 fGA_SC_factor = 0.95;
01196 fGA_nsteps = 30;
01197
01198
01199 vector<Interval*> ranges;
01200
01201 Int_t numWeights = fSynapses->GetEntriesFast();
01202 for (Int_t ivar=0; ivar< numWeights; ivar++) {
01203 ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
01204 }
01205
01206 FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
01207 gf->Run();
01208
01209 Double_t estimator = CalculateEstimator();
01210 Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
01211 }
01212
01213
01214 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
01215 {
01216
01217 return ComputeEstimator( parameters );
01218 }
01219
01220
01221 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
01222 {
01223
01224
01225 TSynapse* synapse;
01226 Int_t numSynapses = fSynapses->GetEntriesFast();
01227
01228 for (Int_t i = 0; i < numSynapses; i++) {
01229 synapse = (TSynapse*)fSynapses->At(i);
01230 synapse->SetWeight(parameters.at(i));
01231 }
01232 if (fUseRegulator) UpdatePriors();
01233
01234 Double_t estimator = CalculateEstimator();
01235
01236 return estimator;
01237 }
01238
01239
01240 void TMVA::MethodMLP::UpdateSynapses()
01241 {
01242
01243
01244 TNeuron* neuron;
01245 Int_t numNeurons;
01246 TObjArray* curLayer;
01247 Int_t numLayers = fNetwork->GetEntriesFast();
01248
01249 for (Int_t i = 0; i < numLayers; i++) {
01250 curLayer = (TObjArray*)fNetwork->At(i);
01251 numNeurons = curLayer->GetEntriesFast();
01252
01253 for (Int_t j = 0; j < numNeurons; j++) {
01254 neuron = (TNeuron*) curLayer->At(j);
01255 if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
01256 else neuron->UpdateSynapsesSequential();
01257 }
01258 }
01259 }
01260
01261
01262 void TMVA::MethodMLP::AdjustSynapseWeights()
01263 {
01264
01265
01266 TNeuron* neuron;
01267 Int_t numNeurons;
01268 TObjArray* curLayer;
01269 Int_t numLayers = fNetwork->GetEntriesFast();
01270
01271 for (Int_t i = numLayers-1; i >= 0; i--) {
01272 curLayer = (TObjArray*)fNetwork->At(i);
01273 numNeurons = curLayer->GetEntriesFast();
01274
01275 for (Int_t j = 0; j < numNeurons; j++) {
01276 neuron = (TNeuron*) curLayer->At(j);
01277 neuron->AdjustSynapseWeights();
01278 }
01279 }
01280 }
01281
01282
01283 void TMVA::MethodMLP::UpdatePriors()
01284 {
01285 fPrior=0;
01286 fPriorDev.clear();
01287 Int_t nSynapses = fSynapses->GetEntriesFast();
01288 for (Int_t i=0;i<nSynapses;i++) {
01289 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
01290 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
01291 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
01292 }
01293 }
01294
01295
01296 void TMVA::MethodMLP::UpdateRegulators()
01297 {
01298 TMatrixD InvH(0,0);
01299 GetApproxInvHessian(InvH);
01300 Int_t numSynapses=fSynapses->GetEntriesFast();
01301 Int_t numRegulators=fRegulators.size();
01302 Float_t gamma=0,
01303 variance=1.;
01304 vector<Int_t> nWDP(numRegulators);
01305 vector<Double_t> trace(numRegulators),weightSum(numRegulators);
01306 for (int i=0;i<numSynapses;i++) {
01307 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
01308 Int_t idx=fRegulatorIdx[i];
01309 nWDP[idx]++;
01310 trace[idx]+=InvH[i][i];
01311 gamma+=1-fRegulators[idx]*InvH[i][i];
01312 weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
01313 }
01314 if (fEstimator==kMSE) {
01315 if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
01316 else variance=CalculateEstimator( Types::kTraining, 0 );
01317 }
01318
01319
01320 for (int i=0;i<numRegulators;i++)
01321 {
01322
01323 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
01324 if (fRegulators[i]<0) fRegulators[i]=0;
01325 Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
01326 }
01327 float trainE = CalculateEstimator( Types::kTraining, 0 ) ;
01328 float testE = CalculateEstimator( Types::kTesting, 0 ) ;
01329
01330 Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
01331
01332 }
01333
01334
01335 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate)
01336 {
01337 Int_t numSynapses=fSynapses->GetEntriesFast();
01338 InvHessian.ResizeTo( numSynapses, numSynapses );
01339 InvHessian=0;
01340 TMatrixD sens(numSynapses,1);
01341 TMatrixD sensT(1,numSynapses);
01342 Int_t nEvents = GetNEvents();
01343 for (Int_t i=0;i<nEvents;i++) {
01344 GetEvent(i);
01345 double outputValue=GetMvaValue();
01346 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
01347 CalculateNeuronDeltas();
01348 for (Int_t j = 0; j < numSynapses; j++){
01349 TSynapse* synapses = (TSynapse*)fSynapses->At(j);
01350 synapses->InitDelta();
01351 synapses->CalculateDelta();
01352 sens[j][0]=sensT[0][j]=synapses->GetDelta();
01353 }
01354 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
01355 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
01356 }
01357
01358
01359 if (regulate) {
01360 for (Int_t i = 0; i < numSynapses; i++){
01361 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
01362 }
01363 }
01364 else {
01365 for (Int_t i = 0; i < numSynapses; i++){
01366 InvHessian[i][i]+=1e-6;
01367 }
01368 }
01369
01370 InvHessian.Invert();
01371
01372 }
01373
01374
01375 Double_t TMVA::MethodMLP::GetMvaValueAsymError( Double_t* errLower, Double_t* errUpper )
01376 {
01377 Double_t MvaValue = GetMvaValue();
01378
01379
01380 if (fInvHessian.GetNcols()==0 || errLower==0 || errUpper==0)
01381 return MvaValue;
01382
01383 Double_t MvaUpper,MvaLower,median,variance;
01384 Int_t numSynapses=fSynapses->GetEntriesFast();
01385 if (fInvHessian.GetNcols()!=numSynapses) {
01386 Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
01387 }
01388 TMatrixD sens(numSynapses,1);
01389 TMatrixD sensT(1,numSynapses);
01390 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
01391
01392 CalculateNeuronDeltas();
01393 for (Int_t i = 0; i < numSynapses; i++){
01394 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
01395 synapses->InitDelta();
01396 synapses->CalculateDelta();
01397 sensT[0][i]=synapses->GetDelta();
01398 }
01399 sens.Transpose(sensT);
01400 TMatrixD sig=sensT*fInvHessian*sens;
01401 variance=sig[0][0];
01402 median=GetOutputNeuron()->GetValue();
01403
01404
01405
01406 MvaUpper=fOutput->Eval(median+variance);
01407 if(errUpper)
01408 *errUpper=MvaUpper-MvaValue;
01409
01410
01411 MvaLower=fOutput->Eval(median-variance);
01412 if(errLower)
01413 *errLower=MvaValue-MvaLower;
01414
01415 if (variance<0) {
01416 Log()<<kWARNING<<"median=" << median << "\tvariance=" << variance
01417 <<"MvaLower=" << MvaLower <<"\terrLower=" << (errLower?*errLower:0)
01418 <<"MvaUpper=" << MvaUpper <<"\terrUpper=" << (errUpper?*errUpper:0)
01419 <<Endl;
01420 }
01421 return MvaValue;
01422 }
01423
01424
01425 #ifdef MethodMLP_UseMinuit__
01426
01427
01428 void TMVA::MethodMLP::MinuitMinimize()
01429 {
01430
01431 fNumberOfWeights = fSynapses->GetEntriesFast();
01432
01433 TFitter* tfitter = new TFitter( fNumberOfWeights );
01434
01435
01436 Double_t args[10];
01437
01438
01439 args[0] = 2;
01440 tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
01441 tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
01442
01443 double w[54];
01444
01445
01446 for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
01447 TString parName = Form("w%i", ipar);
01448 tfitter->SetParameter( ipar,
01449 parName, w[ipar], 0.1, 0, 0 );
01450 }
01451
01452
01453 tfitter->SetFCN( &IFCN );
01454
01455
01456 args[0] = 2;
01457 tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
01458
01459
01460 args[0] = 1e-04;
01461 tfitter->ExecuteCommand( "MIGRAD", args, 1 );
01462
01463 Bool_t doBetter = kFALSE;
01464 Bool_t doEvenBetter = kFALSE;
01465 if (doBetter) {
01466 args[0] = 1e-04;
01467 tfitter->ExecuteCommand( "IMPROVE", args, 1 );
01468
01469 if (doEvenBetter) {
01470 args[0] = 500;
01471 tfitter->ExecuteCommand( "MINOS", args, 1 );
01472 }
01473 }
01474 }
01475
01476 _____________________________________________________________________________
01477 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
01478 {
01479
01480
01481
01482
01483
01484
01485
01486
01487
01488
01489
01490
01491
01492 ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
01493 }
01494
01495 static Int_t nc = 0;
01496 static double minf = 1000000;
01497
01498 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
01499 {
01500
01501 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
01502 TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
01503 synapse->SetWeight(fitPars[ipar]);
01504 }
01505
01506
01507 f = CalculateEstimator();
01508
01509 nc++;
01510 if (f < minf) minf = f;
01511 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
01512 Log() << kDEBUG << Endl;
01513 Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
01514 }
01515
01516
01517 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
01518 {
01519
01520 return fgThis;
01521 }
01522
01523 #endif
01524
01525
01526
01527 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
01528 {
01529
01530 MethodANNBase::MakeClassSpecific(fout, className);
01531 }
01532
01533
01534 void TMVA::MethodMLP::GetHelpMessage() const
01535 {
01536
01537
01538
01539
01540 TString col = gConfig().WriteOptionsReference() ? "" : gTools().Color("bold");
01541 TString colres = gConfig().WriteOptionsReference() ? "" : gTools().Color("reset");
01542
01543 Log() << Endl;
01544 Log() << col << "--- Short description:" << colres << Endl;
01545 Log() << Endl;
01546 Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
01547 Log() << "forward multilayer perceptron impementation. The MLP has a user-" << Endl;
01548 Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
01549 Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
01550 Log() << "signal and one background). " << Endl;
01551 Log() << Endl;
01552 Log() << col << "--- Performance optimisation:" << colres << Endl;
01553 Log() << Endl;
01554 Log() << "Neural networks are stable and performing for a large variety of " << Endl;
01555 Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
01556 Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
01557 Log() << "number of input variables that have only little discrimination power. " << Endl;
01558 Log() << "" << Endl;
01559 Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
01560 Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
01561 Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
01562 Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
01563 Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
01564 Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
01565 Log() << "competitive results)." << Endl;
01566 Log() << Endl;
01567 Log() << col << "Overtraining: " << colres
01568 << "only the TMlpANN performs an explicit separation of the" << Endl;
01569 Log() << "full training sample into independent training and validation samples." << Endl;
01570 Log() << "We have found that in most high-energy physics applications the " << Endl;
01571 Log() << "avaliable degrees of freedom (training events) are sufficient to " << Endl;
01572 Log() << "constrain the weights of the relatively simple architectures required" << Endl;
01573 Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
01574 Log() << "the use of validation samples would only reduce the available training" << Endl;
01575 Log() << "information. However, if the perrormance on the training sample is " << Endl;
01576 Log() << "found to be significantly better than the one found with the inde-" << Endl;
01577 Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
01578 Log() << "are printed to standard output at the end of each training job." << Endl;
01579 Log() << Endl;
01580 Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
01581 Log() << Endl;
01582 Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
01583 Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
01584 Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
01585 Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
01586 Log() << "in favour of more neurons in the first hidden layer." << Endl;
01587 Log() << "" << Endl;
01588 Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
01589 Log() << "adjustable weights is small compared to the training sample size," << Endl;
01590 Log() << "using a large number of training samples should not lead to overtraining." << Endl;
01591 }
01592