00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <iostream>
00028 #include <iomanip>
00029 #include <numeric>
00030 #include <algorithm>
00031
00032 #include "TTree.h"
00033 #include "TMath.h"
00034
00035 #include "TMVA/Timer.h"
00036 #include "TMVA/RuleFitParams.h"
00037 #include "TMVA/RuleFit.h"
00038 #include "TMVA/RuleEnsemble.h"
00039 #include "TMVA/MethodRuleFit.h"
00040 #include "TMVA/Tools.h"
00041
00042 Bool_t gFIRSTTST=kTRUE;
00043 Bool_t gFIRSTORG=kTRUE;
00044
00045 Double_t gGDInit=0;
00046 Double_t gGDPtr=0;
00047 Double_t gGDNorm=0;
00048 Double_t gGDEval=0;
00049 Double_t gGDEvalRule=0;
00050 Double_t gGDRuleLoop=0;
00051 Double_t gGDLinLoop=0;
00052
00053
00054 TMVA::RuleFitParams::RuleFitParams()
00055 : fRuleFit ( 0 )
00056 , fRuleEnsemble ( 0 )
00057 , fNRules ( 0 )
00058 , fNLinear ( 0 )
00059 , fPathIdx1 ( 0 )
00060 , fPathIdx2 ( 0 )
00061 , fPerfIdx1 ( 0 )
00062 , fPerfIdx2 ( 0 )
00063 , fGDNTauTstOK( 0 )
00064 , fGDNTau ( 51 )
00065 , fGDTauPrec ( 0.02 )
00066 , fGDTauScan ( 1000 )
00067 , fGDTauMin ( 0.0 )
00068 , fGDTauMax ( 1.0 )
00069 , fGDTau ( -1.0 )
00070 , fGDPathStep ( 0.01 )
00071 , fGDNPathSteps ( 1000 )
00072 , fGDErrScale ( 1.1 )
00073 , fAverageTruth( 0 )
00074 , fFstarMedian ( 0 )
00075 , fGDNtuple ( 0 )
00076 , fNTRisk ( 0 )
00077 , fNTErrorRate ( 0 )
00078 , fNTNuval ( 0 )
00079 , fNTCoefRad ( 0 )
00080 , fNTOffset ( 0 )
00081 , fNTCoeff ( 0 )
00082 , fNTLinCoeff ( 0 )
00083 , fsigave( 0 )
00084 , fsigrms( 0 )
00085 , fbkgave( 0 )
00086 , fbkgrms( 0 )
00087 , fLogger( new MsgLogger("RuleFit") )
00088 {
00089
00090 Init();
00091 }
00092
00093 TMVA::RuleFitParams::~RuleFitParams()
00094 {
00095
00096 if (fNTCoeff) { delete fNTCoeff; fNTCoeff = 0; }
00097 if (fNTLinCoeff) { delete fNTLinCoeff;fNTLinCoeff = 0; }
00098 delete fLogger;
00099 }
00100
00101
00102 void TMVA::RuleFitParams::Init()
00103 {
00104
00105 if (fRuleFit==0) return;
00106 if (fRuleFit->GetMethodRuleFit()==0) {
00107 Log() << kFATAL << "RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
00108 }
00109 UInt_t neve = fRuleFit->GetTrainingEvents().size();
00110
00111 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
00112 fNRules = fRuleEnsemble->GetNRules();
00113 fNLinear = fRuleEnsemble->GetNLinear();
00114
00115
00116
00117
00118
00119 UInt_t ofs;
00120 fPerfIdx1 = 0;
00121 if (neve>1) {
00122 fPerfIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
00123 }
00124 else {
00125 fPerfIdx2 = 0;
00126 }
00127 ofs = neve - fPerfIdx2 - 1;
00128 fPerfIdx1 += ofs;
00129 fPerfIdx2 += ofs;
00130
00131
00132
00133
00134
00135 fPathIdx1 = 0;
00136 if (neve>1) {
00137 fPathIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
00138 }
00139 else {
00140 fPathIdx2 = 0;
00141 }
00142
00143
00144
00145 fNEveEffPath = 0;;
00146 for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
00147 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
00148 }
00149
00150 fNEveEffPerf=0;
00151 for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
00152 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
00153 }
00154
00155 Log() << kVERBOSE << "Path constr. - event index range = [ " << fPathIdx1 << ", " << fPathIdx2 << " ]"
00156 << ", effective N(events) = " << fNEveEffPath << Endl;
00157 Log() << kVERBOSE << "Error estim. - event index range = [ " << fPerfIdx1 << ", " << fPerfIdx2 << " ]"
00158 << ", effective N(events) = " << fNEveEffPerf << Endl;
00159
00160 if (fRuleEnsemble->DoRules())
00161 Log() << kDEBUG << "Number of rules in ensemble: " << fNRules << Endl;
00162 else
00163 Log() << kDEBUG << "Rules are disabled " << Endl;
00164
00165 if (fRuleEnsemble->DoLinear())
00166 Log() << kDEBUG << "Number of linear terms: " << fNLinear << Endl;
00167 else
00168 Log() << kDEBUG << "Linear terms are disabled " << Endl;
00169 }
00170
00171
00172 void TMVA::RuleFitParams::InitNtuple()
00173 {
00174
00175
00176 fGDNtuple= new TTree("MonitorNtuple_RuleFitParams","RuleFit path search");
00177 fGDNtuple->Branch("risk", &fNTRisk, "risk/D");
00178 fGDNtuple->Branch("error", &fNTErrorRate,"error/D");
00179 fGDNtuple->Branch("nuval", &fNTNuval, "nuval/D");
00180 fGDNtuple->Branch("coefrad", &fNTCoefRad, "coefrad/D");
00181 fGDNtuple->Branch("offset", &fNTOffset, "offset/D");
00182
00183 fNTCoeff = (fNRules >0 ? new Double_t[fNRules] : 0);
00184 fNTLinCoeff = (fNLinear>0 ? new Double_t[fNLinear] : 0);
00185
00186 for (UInt_t i=0; i<fNRules; i++) {
00187 fGDNtuple->Branch(Form("a%d",i+1),&fNTCoeff[i],Form("a%d/D",i+1));
00188 }
00189 for (UInt_t i=0; i<fNLinear; i++) {
00190 fGDNtuple->Branch(Form("b%d",i+1),&fNTLinCoeff[i],Form("b%d/D",i+1));
00191 }
00192 }
00193
00194
00195 void TMVA::RuleFitParams::EvaluateAverage( UInt_t ind1, UInt_t ind2,
00196 std::vector<Double_t> &avsel,
00197 std::vector<Double_t> &avrul )
00198 {
00199
00200 UInt_t neve = ind2-ind1+1;
00201 if (neve<1) {
00202 Log() << kFATAL << "<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
00203 }
00204
00205 avsel.clear();
00206 avrul.clear();
00207
00208 if (fNLinear>0) avsel.resize(fNLinear,0);
00209 if (fNRules>0) avrul.resize(fNRules,0);
00210 const std::vector<UInt_t> *eventRuleMap=0;
00211 Double_t ew;
00212 Double_t sumew=0;
00213
00214
00215
00216 if (fRuleEnsemble->IsRuleMapOK()) {
00217 for ( UInt_t i=ind1; i<ind2+1; i++) {
00218 ew = fRuleFit->GetTrainingEventWeight(i);
00219 sumew += ew;
00220 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
00221 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
00222 }
00223
00224 UInt_t nrules=0;
00225 if (fRuleEnsemble->DoRules()) {
00226 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
00227 nrules = (*eventRuleMap).size();
00228 }
00229 for (UInt_t r=0; r<nrules; r++) {
00230 avrul[(*eventRuleMap)[r]] += ew;
00231 }
00232 }
00233 }
00234 else {
00235 const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
00236 for ( UInt_t i=ind1; i<ind2+1; i++) {
00237 ew = fRuleFit->GetTrainingEventWeight(i);
00238 sumew += ew;
00239
00240 Double_t val = fRuleEnsemble->EvalLinEvent(*((*events)[i]));
00241 val = fRuleEnsemble->EvalEvent(*((*events)[i]));
00242
00243 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
00244 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
00245 }
00246
00247 for (UInt_t r=0; r<fNRules; r++) {
00248 avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
00249 }
00250 }
00251 }
00252
00253 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
00254 avsel[sel] = avsel[sel] / sumew;
00255 }
00256
00257 for (UInt_t r=0; r<fNRules; r++) {
00258 avrul[r] = avrul[r] / sumew;
00259 }
00260 }
00261
00262
00263 Double_t TMVA::RuleFitParams::LossFunction( const Event& e ) const
00264 {
00265
00266
00267 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
00268 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
00269
00270 return diff*diff*e.GetWeight();
00271 }
00272
00273
00274 Double_t TMVA::RuleFitParams::LossFunction( UInt_t evtidx ) const
00275 {
00276
00277
00278 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
00279 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
00280
00281 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
00282 }
00283
00284
00285 Double_t TMVA::RuleFitParams::LossFunction( UInt_t evtidx, UInt_t itau ) const
00286 {
00287
00288
00289 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
00290 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
00291 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
00292
00293 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
00294 }
00295
00296
00297 Double_t TMVA::RuleFitParams::Risk(UInt_t ind1,UInt_t ind2, Double_t neff) const
00298 {
00299
00300 UInt_t neve = ind2-ind1+1;
00301 if (neve<1) {
00302 Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
00303 }
00304 Double_t rval=0;
00305
00306
00307 for ( UInt_t i=ind1; i<ind2+1; i++) {
00308 rval += LossFunction(i);
00309 }
00310 rval = rval/neff;
00311
00312 return rval;
00313 }
00314
00315
00316 Double_t TMVA::RuleFitParams::Risk(UInt_t ind1,UInt_t ind2, Double_t neff, UInt_t itau) const
00317 {
00318
00319 UInt_t neve = ind2-ind1+1;
00320 if (neve<1) {
00321 Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
00322 }
00323 Double_t rval=0;
00324
00325
00326 for ( UInt_t i=ind1; i<ind2+1; i++) {
00327 rval += LossFunction(i,itau);
00328 }
00329 rval = rval/neff;
00330
00331 return rval;
00332 }
00333
00334
00335 Double_t TMVA::RuleFitParams::Penalty() const
00336 {
00337
00338
00339
00340 Log() << kWARNING << "<Penalty> Using unverified code! Check!" << Endl;
00341 Double_t rval=0;
00342 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
00343 for (UInt_t i=0; i<fNRules; i++) {
00344 rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
00345 }
00346 for (UInt_t i=0; i<fNLinear; i++) {
00347 rval += TMath::Abs((*lincoeff)[i]);
00348 }
00349 return rval;
00350 }
00351
00352
00353 void TMVA::RuleFitParams::InitGD()
00354 {
00355
00356 if (fGDNTau<2) {
00357 fGDNTau = 1;
00358 fGDTauScan = 0;
00359 }
00360 if (fGDTau<0.0) {
00361
00362 fGDTauScan = 1000;
00363 fGDTauMin = 0.0;
00364 fGDTauMax = 1.0;
00365 }
00366 else {
00367 fGDNTau = 1;
00368 fGDTauScan = 0;
00369 }
00370
00371 fGDTauVec.clear();
00372 fGDTauVec.resize( fGDNTau );
00373 if (fGDNTau==1) {
00374 fGDTauVec[0] = fGDTau;
00375 }
00376 else {
00377
00378 Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
00379 for (UInt_t itau=0; itau<fGDNTau; itau++) {
00380 fGDTauVec[itau] = static_cast<Double_t>(itau)*dtau + fGDTauMin;
00381 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
00382 }
00383 }
00384
00385
00386 fGradVec.clear();
00387 fGradVecLin.clear();
00388 fGradVecTst.clear();
00389 fGradVecLinTst.clear();
00390 fGDErrTst.clear();
00391 fGDErrTstOK.clear();
00392 fGDOfsTst.clear();
00393 fGDCoefTst.clear();
00394 fGDCoefLinTst.clear();
00395
00396
00397
00398 fGDCoefTst.resize(fGDNTau);
00399 fGradVec.resize(fNRules,0);
00400 fGradVecTst.resize(fGDNTau);
00401 for (UInt_t i=0; i<fGDNTau; i++) {
00402 fGradVecTst[i].resize(fNRules,0);
00403 fGDCoefTst[i].resize(fNRules,0);
00404 }
00405
00406
00407
00408 fGDCoefLinTst.resize(fGDNTau);
00409 fGradVecLin.resize(fNLinear,0);
00410 fGradVecLinTst.resize(fGDNTau);
00411 for (UInt_t i=0; i<fGDNTau; i++) {
00412 fGradVecLinTst[i].resize(fNLinear,0);
00413 fGDCoefLinTst[i].resize(fNLinear,0);
00414 }
00415
00416
00417
00418 fGDErrTst.resize(fGDNTau,0);
00419 fGDErrTstOK.resize(fGDNTau,kTRUE);
00420 fGDOfsTst.resize(fGDNTau,0);
00421 fGDNTauTstOK = fGDNTau;
00422
00423
00424
00425 }
00426
00427
00428 Int_t TMVA::RuleFitParams::FindGDTau()
00429 {
00430
00431 if (fGDNTau<2) return 0;
00432 if (fGDTauScan==0) return 0;
00433
00434 if (fGDOfsTst.size()<1)
00435 Log() << kFATAL << "BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
00436
00437 Log() << kINFO << "Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
00438
00439
00440 UInt_t nscan = fGDTauScan;
00441 UInt_t netst = std::min(nscan,UInt_t(100));
00442 UInt_t nscanned=0;
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454 Bool_t doloop=kTRUE;
00455 UInt_t ip=0;
00456 UInt_t itauMin=0;
00457 Timer timer( nscan, "RuleFit" );
00458 while (doloop) {
00459
00460 MakeTstGradientVector();
00461
00462 UpdateTstCoefficients();
00463
00464
00465 nscanned++;
00466 if ( (ip==0) || ((ip+1)%netst==0) ) {
00467
00468 itauMin = RiskPerfTst();
00469 Log() << kVERBOSE << Form("%4d",ip+1) << ". tau = " << Form("%4.4f",fGDTauVec[itauMin])
00470 << " => error rate = " << fGDErrTst[itauMin] << Endl;
00471 }
00472 ip++;
00473 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
00474 gFIRSTTST=kFALSE;
00475 if (Log().GetMinType()>kVERBOSE)
00476 timer.DrawProgressBar(ip);
00477 }
00478
00479
00480
00481
00482 if (nscanned==0) {
00483 Log() << kERROR << "<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
00484 }
00485 fGDTau = fGDTauVec[itauMin];
00486 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
00487 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
00488 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
00489 Log() << kINFO << "Best path found with tau = " << Form("%4.4f",fGDTau)
00490 << " after " << timer.GetElapsedTime() << " " << Endl;
00491
00492 return nscan;
00493 }
00494
00495
00496 void TMVA::RuleFitParams::MakeGDPath()
00497 {
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519 Log() << kINFO << "GD path scan - the scan stops when the max num. of steps is reached or a min is found"
00520 << Endl;
00521 Log() << kVERBOSE << "Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
00522 Log() << kVERBOSE << "Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;
00523
00524
00525 const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
00526 const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
00527
00528
00529 InitGD();
00530
00531
00532 EvaluateAveragePath();
00533 EvaluateAveragePerf();
00534
00535
00536 Log() << kVERBOSE << "Creating GD path" << Endl;
00537 Log() << kVERBOSE << " N(steps) = " << fGDNPathSteps << Endl;
00538 Log() << kVERBOSE << " step = " << fGDPathStep << Endl;
00539 Log() << kVERBOSE << " N(tau) = " << fGDNTau << Endl;
00540 Log() << kVERBOSE << " N(tau steps) = " << fGDTauScan << Endl;
00541 Log() << kVERBOSE << " tau range = [ " << fGDTauVec[0] << " , " << fGDTauVec[fGDNTau-1] << " ]" << Endl;
00542
00543
00544 if (isDebug) InitNtuple();
00545
00546
00547 Int_t nbadrisk=0;
00548 Double_t trisk=0;
00549 Double_t strisk=0;
00550 Double_t rprev=1e32;
00551
00552
00553 Double_t errmin=1e32;
00554 Double_t riskMin=0;
00555 Int_t indMin=-1;
00556 std::vector<Double_t> coefsMin;
00557 std::vector<Double_t> lincoefsMin;
00558 Double_t offsetMin;
00559
00560
00561
00562 clock_t t0=0;
00563 Double_t tgradvec;
00564 Double_t tupgrade;
00565 Double_t tperf;
00566 Double_t stgradvec=0;
00567 Double_t stupgrade=0;
00568 Double_t stperf=0;
00569
00570
00571 const UInt_t npreg=5;
00572 std::vector<Double_t> valx;
00573 std::vector<Double_t> valy;
00574 std::vector<Double_t> valxy;
00575
00576
00577 Bool_t docheck;
00578 Int_t iloop=0;
00579 Bool_t found=kFALSE;
00580 Bool_t riskFlat=kFALSE;
00581 Bool_t done = kFALSE;
00582
00583
00584 int imod = fGDNPathSteps/100;
00585 if (imod<100) imod = std::min(100,fGDNPathSteps);
00586 if (imod>100) imod=100;
00587
00588
00589 fAverageTruth = -CalcAverageTruth();
00590 offsetMin = fAverageTruth;
00591 fRuleEnsemble->SetOffset(offsetMin);
00592 fRuleEnsemble->ClearCoefficients(0);
00593 fRuleEnsemble->ClearLinCoefficients(0);
00594 for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
00595 fGDOfsTst[i] = offsetMin;
00596 }
00597 Log() << kVERBOSE << "Obtained initial offset = " << offsetMin << Endl;
00598
00599
00600 Int_t nprescan = FindGDTau();
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610 fNTRisk = rprev;
00611 fNTCoefRad = -1.0;
00612 fNTErrorRate = 0;
00613
00614
00615 Int_t stopCondition=0;
00616
00617 Log() << kINFO << "Fitting model..." << Endl;
00618
00619 Timer timer( fGDNPathSteps, "RuleFit" );
00620 while (!done) {
00621
00622 if (isVerbose) t0 = clock();
00623 MakeGradientVector();
00624 if (isVerbose) {
00625 tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
00626 stgradvec += tgradvec;
00627 }
00628
00629
00630 if (isVerbose) t0 = clock();
00631 UpdateCoefficients();
00632 if (isVerbose) {
00633 tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
00634 stupgrade += tupgrade;
00635 }
00636
00637
00638 docheck = ((iloop==0) ||((iloop+1)%imod==0));
00639
00640 if (docheck) {
00641
00642 if (!isVerbose)
00643 timer.DrawProgressBar(iloop);
00644 fNTNuval = Double_t(iloop)*fGDPathStep;
00645 fNTRisk = 0.0;
00646
00647
00648
00649 if (isDebug) FillCoefficients();
00650 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
00651
00652
00653 t0 = clock();
00654 fNTRisk = RiskPath();
00655 trisk = Double_t(clock()-t0)/CLOCKS_PER_SEC;
00656 strisk += trisk;
00657
00658
00659
00660
00661
00662 if (fNTRisk>=rprev) {
00663 if (fNTRisk>rprev) {
00664 nbadrisk++;
00665 Log() << kWARNING << "Risk(i+1)>=Risk(i) in path" << Endl;
00666 riskFlat=(nbadrisk>3);
00667 if (riskFlat) {
00668 Log() << kWARNING << "Chaotic behaviour of risk evolution" << Endl;
00669 Log() << kWARNING << "--- STOPPING MINIMISATION ---" << Endl;
00670 Log() << kWARNING << "This may be OK if minimum is already found" << Endl;
00671 }
00672 }
00673 }
00674 rprev = fNTRisk;
00675
00676
00677
00678
00679
00680 if (isVerbose) t0 = clock();
00681 fNTErrorRate = 0;
00682
00683
00684 Double_t errroc;
00685 Double_t riskPerf = RiskPerf();
00686
00687
00688 errroc = riskPerf;
00689
00690 fNTErrorRate = errroc;
00691
00692 if (isVerbose) {
00693 tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
00694 stperf +=tperf;
00695 }
00696
00697
00698
00699
00700 if (fNTErrorRate<=errmin) {
00701 errmin = fNTErrorRate;
00702 riskMin = fNTRisk;
00703 indMin = iloop;
00704 fRuleEnsemble->GetCoefficients(coefsMin);
00705 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
00706 offsetMin = fRuleEnsemble->GetOffset();
00707 }
00708 if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
00709
00710
00711
00712 if (valx.size()==npreg) {
00713 valx.erase(valx.begin());
00714 valy.erase(valy.begin());
00715 valxy.erase(valxy.begin());
00716 }
00717 valx.push_back(fNTNuval);
00718 valy.push_back(fNTErrorRate);
00719 valxy.push_back(fNTErrorRate*fNTNuval);
00720
00721 gFIRSTORG=kFALSE;
00722
00723
00724 if (isDebug) fGDNtuple->Fill();
00725 if (isVerbose) {
00726 Log() << kVERBOSE << "ParamsIRE : "
00727 << std::setw(10)
00728 << Form("%8d",iloop+1) << " "
00729 << Form("%4.4f",fNTRisk) << " "
00730 << Form("%4.4f",riskPerf) << " "
00731 << Form("%4.4f",fNTRisk+riskPerf) << " "
00732
00733
00734
00735
00736
00737
00738
00739 << Endl;
00740 }
00741 }
00742 iloop++;
00743
00744
00745
00746
00747 Bool_t endOfLoop = (iloop==fGDNPathSteps);
00748 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
00749 if (riskFlat) {
00750 stopCondition = 1;
00751 }
00752 else if (endOfLoop) {
00753 stopCondition = 2;
00754 }
00755 if (indMin<0) {
00756 Log() << kWARNING << "BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
00757 errmin = fNTErrorRate;
00758 riskMin = fNTRisk;
00759 indMin = iloop;
00760 fRuleEnsemble->GetCoefficients(coefsMin);
00761 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
00762 offsetMin = fRuleEnsemble->GetOffset();
00763 }
00764 found = kTRUE;
00765 }
00766 done = (found);
00767 }
00768 Log() << kINFO << "Minimisation elapsed time : " << timer.GetElapsedTime() << " " << Endl;
00769 Log() << kINFO << "----------------------------------------------------------------" << Endl;
00770 Log() << kINFO << "Found minimum at step " << indMin+1 << " with error = " << errmin << Endl;
00771 Log() << kINFO << "Reason for ending loop: ";
00772 switch (stopCondition) {
00773 case 0:
00774 Log() << kINFO << "clear minima found";
00775 break;
00776 case 1:
00777 Log() << kINFO << "chaotic behaviour of risk";
00778 break;
00779 case 2:
00780 Log() << kINFO << "end of loop reached";
00781 break;
00782 default:
00783 Log() << kINFO << "unknown!";
00784 break;
00785 }
00786 Log() << Endl;
00787 Log() << kINFO << "----------------------------------------------------------------" << Endl;
00788
00789
00790 if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
00791 Log() << kWARNING << "Reached minimum early in the search" << Endl;
00792 Log() << kWARNING << "Check results and maybe decrease GDStep size" << Endl;
00793 }
00794
00795
00796
00797 Double_t sumx = std::accumulate( valx.begin(), valx.end(), Double_t() );
00798 Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
00799 Double_t sumy = std::accumulate( valy.begin(), valy.end(), Double_t() );
00800 Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
00801 if (slope<0) {
00802 Log() << kINFO << "The error rate was still decreasing at the end of the path" << Endl;
00803 Log() << kINFO << "Increase number of steps (GDNSteps)." << Endl;
00804 }
00805
00806
00807
00808 if (found) {
00809 fRuleEnsemble->SetCoefficients( coefsMin );
00810 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
00811 fRuleEnsemble->SetOffset( offsetMin );
00812 }
00813 else {
00814 Log() << kFATAL << "BUG TRAP: minimum not found in MakeGDPath()" << Endl;
00815 }
00816
00817
00818
00819
00820 if (isVerbose) {
00821 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
00822 Log() << kVERBOSE << "Timing per loop (ms):" << Endl;
00823 Log() << kVERBOSE << " gradvec = " << 1000*stgradvec/iloop << Endl;
00824 Log() << kVERBOSE << " upgrade = " << 1000*stupgrade/iloop << Endl;
00825 Log() << kVERBOSE << " risk = " << 1000*strisk/iloop << Endl;
00826 Log() << kVERBOSE << " perf = " << 1000*stperf/iloop << Endl;
00827 Log() << kVERBOSE << " loop = " << 1000*stloop/iloop << Endl;
00828
00829 Log() << kVERBOSE << " GDInit = " << 1000*gGDInit/iloop << Endl;
00830 Log() << kVERBOSE << " GDPtr = " << 1000*gGDPtr/iloop << Endl;
00831 Log() << kVERBOSE << " GDEval = " << 1000*gGDEval/iloop << Endl;
00832 Log() << kVERBOSE << " GDEvalRule = " << 1000*gGDEvalRule/iloop << Endl;
00833 Log() << kVERBOSE << " GDNorm = " << 1000*gGDNorm/iloop << Endl;
00834 Log() << kVERBOSE << " GDRuleLoop = " << 1000*gGDRuleLoop/iloop << Endl;
00835 Log() << kVERBOSE << " GDLinLoop = " << 1000*gGDLinLoop/iloop << Endl;
00836 }
00837
00838
00839 if (isDebug) fGDNtuple->Write();
00840 }
00841
00842
00843 void TMVA::RuleFitParams::FillCoefficients()
00844 {
00845
00846
00847 fNTOffset = fRuleEnsemble->GetOffset();
00848
00849 for (UInt_t i=0; i<fNRules; i++) {
00850 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
00851 }
00852 for (UInt_t i=0; i<fNLinear; i++) {
00853 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
00854 }
00855 }
00856
00857
00858 void TMVA::RuleFitParams::CalcFStar()
00859 {
00860
00861
00862
00863
00864 Log() << kWARNING << "<CalcFStar> Using unverified code! Check!" << Endl;
00865 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
00866 if (neve<1) {
00867 Log() << kFATAL << "<CalcFStar> Invalid start/end indices!" << Endl;
00868 return;
00869 }
00870
00871 const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
00872
00873 fFstar.clear();
00874 std::vector<Double_t> fstarSorted;
00875 Double_t fstarVal;
00876
00877 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
00878 const Event& e = *(*events)[i];
00879 fstarVal = fRuleEnsemble->FStar(e);
00880 fFstar.push_back(fstarVal);
00881 fstarSorted.push_back(fstarVal);
00882 if (isnan(fstarVal)) Log() << kFATAL << "F* is NAN!" << Endl;
00883 }
00884
00885 std::sort( fstarSorted.begin(), fstarSorted.end() );
00886 UInt_t ind = neve/2;
00887 if (neve&1) {
00888 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
00889 }
00890 else {
00891 fFstarMedian = fstarSorted[ind];
00892 }
00893 }
00894
00895
00896 Double_t TMVA::RuleFitParams::Optimism()
00897 {
00898
00899
00900
00901
00902
00903
00904 Log() << kWARNING << "<Optimism> Using unverified code! Check!" << Endl;
00905 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
00906 if (neve<1) {
00907 Log() << kFATAL << "<Optimism> Invalid start/end indices!" << Endl;
00908 }
00909
00910 const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
00911
00912 Double_t sumy=0;
00913 Double_t sumyhat=0;
00914 Double_t sumyhaty=0;
00915 Double_t sumw2=0;
00916 Double_t sumw=0;
00917 Double_t yhat;
00918 Double_t y;
00919 Double_t w;
00920
00921 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
00922 const Event& e = *(*events)[i];
00923 yhat = fRuleEnsemble->EvalEvent(i);
00924 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0);
00925 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf;
00926 sumy += w*y;
00927 sumyhat += w*yhat;
00928 sumyhaty += w*yhat*y;
00929 sumw2 += w*w;
00930 sumw += w;
00931 }
00932 Double_t div = 1.0-sumw2;
00933 Double_t cov = sumyhaty - sumyhat*sumy;
00934 return 2.0*cov/div;
00935 }
00936
00937
00938 Double_t TMVA::RuleFitParams::ErrorRateReg()
00939 {
00940
00941
00942
00943
00944
00945 Log() << kWARNING << "<ErrorRateReg> Using unverified code! Check!" << Endl;
00946 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
00947 if (neve<1) {
00948 Log() << kFATAL << "<ErrorRateReg> Invalid start/end indices!" << Endl;
00949 }
00950 if (fFstar.size()!=neve) {
00951 Log() << kFATAL << "--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
00952 << " Fstar.size() = " << fFstar.size() << " , N(events) = " << neve << Endl;
00953 }
00954
00955 Double_t sF;
00956
00957 const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
00958
00959 Double_t sumdf = 0;
00960 Double_t sumdfmed = 0;
00961
00962
00963
00964
00965
00966 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
00967 const Event& e = *(*events)[i];
00968 sF = fRuleEnsemble->EvalEvent( e );
00969
00970 sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
00971 sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
00972 }
00973
00974
00975
00976 return sumdf/sumdfmed;
00977 }
00978
00979
00980 Double_t TMVA::RuleFitParams::ErrorRateBin()
00981 {
00982
00983
00984
00985
00986
00987
00988
00989 Log() << kWARNING << "<ErrorRateBin> Using unverified code! Check!" << Endl;
00990 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
00991 if (neve<1) {
00992 Log() << kFATAL << "<ErrorRateBin> Invalid start/end indices!" << Endl;
00993 }
00994
00995 const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
00996
00997 Double_t sumdfbin = 0;
00998 Double_t dneve = Double_t(neve);
00999 Int_t signF, signy;
01000 Double_t sF;
01001
01002 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
01003 const Event& e = *(*events)[i];
01004 sF = fRuleEnsemble->EvalEvent( e );
01005
01006 signF = (sF>0 ? +1:-1);
01007
01008 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
01009 sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
01010 }
01011 Double_t f = sumdfbin/dneve;
01012
01013 return f;
01014 }
01015
01016
01017 Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
01018 std::vector<Double_t> & sFbkg )
01019
01020 {
01021
01022
01023
01024
01025
01026 std::sort(sFsig.begin(), sFsig.end());
01027 std::sort(sFbkg.begin(), sFbkg.end());
01028 const Double_t minsig = sFsig.front();
01029 const Double_t minbkg = sFbkg.front();
01030 const Double_t maxsig = sFsig.back();
01031 const Double_t maxbkg = sFbkg.back();
01032 const Double_t minf = std::min(minsig,minbkg);
01033 const Double_t maxf = std::max(maxsig,maxbkg);
01034 const Int_t nsig = Int_t(sFsig.size());
01035 const Int_t nbkg = Int_t(sFbkg.size());
01036 const Int_t np = std::min((nsig+nbkg)/4,50);
01037 const Double_t df = (maxf-minf)/(np-1);
01038
01039
01040
01041 Double_t fcut;
01042 std::vector<Double_t>::const_iterator indit;
01043 Int_t nrbkg;
01044 Int_t nesig;
01045 Int_t pnesig=0;
01046 Double_t rejb=0;
01047 Double_t effs=1.0;
01048 Double_t prejb=0;
01049 Double_t peffs=1.0;
01050 Double_t drejb;
01051 Double_t deffs;
01052 Double_t area=0;
01053 Int_t npok=0;
01054
01055
01056
01057 for (Int_t i=0; i<np; i++) {
01058 fcut = minf + df*Double_t(i);
01059 indit = std::find_if( sFsig.begin(), sFsig.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
01060 nesig = sFsig.end()-indit;
01061 if (TMath::Abs(pnesig-nesig)>0) {
01062 npok++;
01063 indit = std::find_if( sFbkg.begin(), sFbkg.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
01064 nrbkg = indit-sFbkg.begin();
01065 rejb = Double_t(nrbkg)/Double_t(nbkg);
01066 effs = Double_t(nesig)/Double_t(nsig);
01067
01068 drejb = rejb-prejb;
01069 deffs = effs-peffs;
01070 area += 0.5*TMath::Abs(deffs)*(rejb+prejb);
01071 prejb = rejb;
01072 peffs = effs;
01073 }
01074 pnesig = nesig;
01075 }
01076 area += 0.5*(1+rejb)*effs;
01077
01078 return (1.0-area);
01079 }
01080
01081
01082 Double_t TMVA::RuleFitParams::ErrorRateRoc()
01083 {
01084
01085
01086
01087
01088
01089
01090 Log() << kWARNING << "<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
01091 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
01092 if (neve<1) {
01093 Log() << kFATAL << "<ErrorRateRoc> Invalid start/end indices!" << Endl;
01094 }
01095
01096 const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
01097
01098 Double_t sF;
01099
01100 std::vector<Double_t> sFsig;
01101 std::vector<Double_t> sFbkg;
01102 Double_t sumfsig=0;
01103 Double_t sumfbkg=0;
01104 Double_t sumf2sig=0;
01105 Double_t sumf2bkg=0;
01106
01107 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
01108 const Event& e = *(*events)[i];
01109 sF = fRuleEnsemble->EvalEvent(i);
01110 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
01111 sFsig.push_back(sF);
01112 sumfsig +=sF;
01113 sumf2sig +=sF*sF;
01114 }
01115 else {
01116 sFbkg.push_back(sF);
01117 sumfbkg +=sF;
01118 sumf2bkg +=sF*sF;
01119 }
01120 }
01121 fsigave = sumfsig/sFsig.size();
01122 fbkgave = sumfbkg/sFbkg.size();
01123 fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
01124 fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
01125
01126 return ErrorRateRocRaw( sFsig, sFbkg );
01127 }
01128
01129
01130 void TMVA::RuleFitParams::ErrorRateRocTst()
01131 {
01132
01133
01134
01135
01136
01137
01138
01139 Log() << kWARNING << "<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
01140 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
01141 if (neve<1) {
01142 Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
01143 return;
01144 }
01145
01146 const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
01147
01148
01149 Double_t sF;
01150 std::vector< std::vector<Double_t> > sFsig;
01151 std::vector< std::vector<Double_t> > sFbkg;
01152
01153 sFsig.resize( fGDNTau );
01154 sFbkg.resize( fGDNTau );
01155
01156
01157 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
01158 for (UInt_t itau=0; itau<fGDNTau; itau++) {
01159
01160
01161 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
01162 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
01163 sFsig[itau].push_back(sF);
01164 }
01165 else {
01166 sFbkg[itau].push_back(sF);
01167 }
01168 }
01169 }
01170 Double_t err;
01171
01172 for (UInt_t itau=0; itau<fGDNTau; itau++) {
01173 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
01174 fGDErrTst[itau] = err;
01175 }
01176 }
01177
01178
01179 UInt_t TMVA::RuleFitParams::RiskPerfTst()
01180 {
01181
01182
01183
01184
01185
01186 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
01187 if (neve<1) {
01188 Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
01189 return 0;
01190 }
01191
01192 Double_t sumx = 0;
01193 Double_t sumx2 = 0;
01194 Double_t maxx = -100.0;
01195 Double_t minx = 1e30;
01196 UInt_t itaumin = 0;
01197 UInt_t nok=0;
01198 for (UInt_t itau=0; itau<fGDNTau; itau++) {
01199 if (fGDErrTstOK[itau]) {
01200 nok++;
01201 fGDErrTst[itau] = RiskPerf(itau);
01202 sumx += fGDErrTst[itau];
01203 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
01204 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
01205 if (fGDErrTst[itau]<minx) {
01206 minx=fGDErrTst[itau];
01207 itaumin = itau;
01208 }
01209 }
01210 }
01211 Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
01212 Double_t maxacc = minx+sigx;
01213
01214 if (nok>0) {
01215 nok = 0;
01216 for (UInt_t itau=0; itau<fGDNTau; itau++) {
01217 if (fGDErrTstOK[itau]) {
01218 if (fGDErrTst[itau] > maxacc) {
01219 fGDErrTstOK[itau] = kFALSE;
01220 }
01221 else {
01222 nok++;
01223 }
01224 }
01225 }
01226 }
01227 fGDNTauTstOK = nok;
01228 Log() << kVERBOSE << "TAU: "
01229 << itaumin << " "
01230 << nok << " "
01231 << minx << " "
01232 << maxx << " "
01233 << sigx << Endl;
01234
01235 return itaumin;
01236 }
01237
01238
01239 void TMVA::RuleFitParams::MakeTstGradientVector()
01240 {
01241
01242
01243 UInt_t neve = fPathIdx1-fPathIdx2+1;
01244 if (neve<1) {
01245 Log() << kFATAL << "<MakeTstGradientVector> Invalid start/end indices!" << Endl;
01246 return;
01247 }
01248
01249 Double_t norm = 2.0/fNEveEffPath;
01250
01251 const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
01252
01253
01254 for (UInt_t itau=0; itau<fGDNTau; itau++) {
01255 if (fGDErrTstOK[itau]) {
01256 for (UInt_t ir=0; ir<fNRules; ir++) {
01257 fGradVecTst[itau][ir]=0;
01258 }
01259 for (UInt_t il=0; il<fNLinear; il++) {
01260 fGradVecLinTst[itau][il]=0;
01261 }
01262 }
01263 }
01264
01265
01266 Double_t sF;
01267 Double_t r;
01268 Double_t y;
01269 const std::vector<UInt_t> *eventRuleMap=0;
01270 UInt_t rind;
01271
01272
01273
01274 UInt_t nsfok=0;
01275 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
01276 const Event *e = (*events)[i];
01277 UInt_t nrules=0;
01278 if (fRuleEnsemble->DoRules()) {
01279 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
01280 nrules = (*eventRuleMap).size();
01281 }
01282 for (UInt_t itau=0; itau<fGDNTau; itau++) {
01283
01284
01285 if (fGDErrTstOK[itau]) {
01286 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
01287 if (TMath::Abs(sF)<1.0) {
01288 nsfok++;
01289 r = 0;
01290 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
01291 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
01292
01293 for (UInt_t ir=0; ir<nrules; ir++) {
01294 rind = (*eventRuleMap)[ir];
01295 fGradVecTst[itau][rind] += r;
01296 }
01297
01298 for (UInt_t il=0; il<fNLinear; il++) {
01299 fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
01300 }
01301 }
01302 }
01303 }
01304 }
01305 }
01306
01307
01308 void TMVA::RuleFitParams::UpdateTstCoefficients()
01309 {
01310
01311
01312
01313 Double_t maxr, maxl, cthresh, val;
01314
01315 for (UInt_t itau=0; itau<fGDNTau; itau++) {
01316 if (fGDErrTstOK[itau]) {
01317
01318 maxr = ( (fNRules>0 ?
01319 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
01320 maxl = ( (fNLinear>0 ?
01321 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );
01322
01323
01324 Double_t maxv = (maxr>maxl ? maxr:maxl);
01325 cthresh = maxv * fGDTauVec[itau];
01326
01327
01328
01329
01330
01331
01332
01333 if (maxv>0) {
01334 const Double_t stepScale=1.0;
01335 for (UInt_t i=0; i<fNRules; i++) {
01336 val = fGradVecTst[itau][i];
01337
01338 if (TMath::Abs(val)>=cthresh) {
01339 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
01340 }
01341 }
01342
01343 for (UInt_t i=0; i<fNLinear; i++) {
01344 val = fGradVecLinTst[itau][i];
01345 if (TMath::Abs(val)>=cthresh) {
01346 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
01347 }
01348 }
01349 }
01350 }
01351 }
01352
01353 CalcTstAverageResponse();
01354 }
01355
01356
01357 void TMVA::RuleFitParams::MakeGradientVector()
01358 {
01359
01360
01361 clock_t t0;
01362
01363 t0 = clock();
01364
01365 UInt_t neve = fPathIdx2-fPathIdx1+1;
01366 if (neve<1) {
01367 Log() << kFATAL << "<MakeGradientVector> Invalid start/end indices!" << Endl;
01368 return;
01369 }
01370
01371 const Double_t norm = 2.0/fNEveEffPath;
01372
01373 const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
01374
01375
01376 for (UInt_t ir=0; ir<fNRules; ir++) {
01377 fGradVec[ir]=0;
01378 }
01379 for (UInt_t il=0; il<fNLinear; il++) {
01380 fGradVecLin[il]=0;
01381 }
01382
01383
01384 Double_t sF;
01385 Double_t r;
01386 Double_t y;
01387 const std::vector<UInt_t> *eventRuleMap=0;
01388 UInt_t rind;
01389
01390 gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;
01391
01392 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
01393 const Event *e = (*events)[i];
01394
01395
01396 sF = fRuleEnsemble->EvalEvent( i );
01397
01398 if (TMath::Abs(sF)<1.0) {
01399 UInt_t nrules=0;
01400 if (fRuleEnsemble->DoRules()) {
01401 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
01402 nrules = (*eventRuleMap).size();
01403 }
01404 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
01405 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
01406
01407 for (UInt_t ir=0; ir<nrules; ir++) {
01408 rind = (*eventRuleMap)[ir];
01409 fGradVec[rind] += r;
01410 }
01411
01412
01413
01414 for (UInt_t il=0; il<fNLinear; il++) {
01415 fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
01416 }
01417
01418 }
01419 }
01420 }
01421
01422
01423
01424 void TMVA::RuleFitParams::UpdateCoefficients()
01425 {
01426
01427
01428 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
01429 TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
01430 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
01431 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
01432
01433 Double_t maxv = (maxr>maxl ? maxr:maxl);
01434 Double_t cthresh = maxv * fGDTau;
01435
01436 Double_t useRThresh;
01437 Double_t useLThresh;
01438
01439
01440
01441 useRThresh = cthresh;
01442 useLThresh = cthresh;
01443
01444 Double_t gval, lval, coef, lcoef;
01445
01446
01447
01448
01449 if (maxv>0) {
01450 for (UInt_t i=0; i<fGradVec.size(); i++) {
01451 gval = fGradVec[i];
01452 if (TMath::Abs(gval)>=useRThresh) {
01453 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
01454 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
01455 }
01456 }
01457
01458
01459 for (UInt_t i=0; i<fGradVecLin.size(); i++) {
01460 lval = fGradVecLin[i];
01461 if (TMath::Abs(lval)>=useLThresh) {
01462 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
01463 fRuleEnsemble->SetLinCoefficient(i,lcoef);
01464 }
01465 }
01466
01467 Double_t offset = CalcAverageResponse();
01468 fRuleEnsemble->SetOffset( offset );
01469 }
01470 }
01471
01472
01473 void TMVA::RuleFitParams::CalcTstAverageResponse()
01474 {
01475
01476
01477 for (UInt_t itau=0; itau<fGDNTau; itau++) {
01478 if (fGDErrTstOK[itau]) {
01479 fGDOfsTst[itau] = 0;
01480 for (UInt_t s=0; s<fNLinear; s++) {
01481 fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
01482 }
01483 for (UInt_t r=0; r<fNRules; r++) {
01484 fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
01485 }
01486 }
01487 }
01488
01489 }
01490
01491
01492 Double_t TMVA::RuleFitParams::CalcAverageResponse()
01493 {
01494
01495
01496
01497 Double_t ofs = 0;
01498 for (UInt_t s=0; s<fNLinear; s++) {
01499 ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
01500 }
01501 for (UInt_t r=0; r<fNRules; r++) {
01502 ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
01503 }
01504 return ofs;
01505 }
01506
01507
01508 Double_t TMVA::RuleFitParams::CalcAverageTruth()
01509 {
01510
01511
01512 if (fPathIdx2<=fPathIdx1) {
01513 Log() << kFATAL << "<CalcAverageTruth> Invalid start/end indices!" << Endl;
01514 return 0;
01515 }
01516 Double_t sum=0;
01517 Double_t ensig=0;
01518 Double_t enbkg=0;
01519 const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
01520 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
01521 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
01522 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
01523 else enbkg += ew;
01524 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
01525 }
01526 Log() << kVERBOSE << "Effective number of signal / background = " << ensig << " / " << enbkg << Endl;
01527
01528 return sum/fNEveEffPath;
01529 }
01530
01531
01532
01533 Int_t TMVA::RuleFitParams::Type( const Event * e ) const {
01534 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
01535 }
01536
01537
01538
01539 void TMVA::RuleFitParams::SetMsgType( EMsgType t ) {
01540 fLogger->SetMinType(t);
01541 }