00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103 #include <iomanip>
00104 #include <vector>
00105 #include <cstdlib>
00106
00107 #include "TMatrixD.h"
00108 #include "TVector.h"
00109 #include "TMath.h"
00110 #include "TObjString.h"
00111 #include "TFile.h"
00112 #include "TKey.h"
00113 #include "TH1.h"
00114 #include "TClass.h"
00115 #include "Riostream.h"
00116
00117 #include "TMVA/ClassifierFactory.h"
00118 #include "TMVA/MethodLikelihood.h"
00119 #include "TMVA/Tools.h"
00120 #include "TMVA/Ranking.h"
00121
00122 REGISTER_METHOD(Likelihood)
00123
00124 ClassImp(TMVA::MethodLikelihood)
00125
00126
00127 TMVA::MethodLikelihood::MethodLikelihood( const TString& jobName,
00128 const TString& methodTitle,
00129 DataSetInfo& theData,
00130 const TString& theOption,
00131 TDirectory* theTargetDir ) :
00132 TMVA::MethodBase( jobName, Types::kLikelihood, methodTitle, theData, theOption, theTargetDir ),
00133 fEpsilon ( 1.e3 * DBL_MIN ),
00134 fTransformLikelihoodOutput( kFALSE ),
00135 fDropVariable ( 0 ),
00136 fHistSig ( 0 ),
00137 fHistBgd ( 0 ),
00138 fHistSig_smooth( 0 ),
00139 fHistBgd_smooth( 0 ),
00140 fDefaultPDFLik ( 0 ),
00141 fPDFSig ( 0 ),
00142 fPDFBgd ( 0 ),
00143 fNsmooth ( 2 ),
00144 fAverageEvtPerBin( 0 ),
00145 fKDEfineFactor ( 0 )
00146 {
00147
00148 }
00149
00150
00151 TMVA::MethodLikelihood::MethodLikelihood( DataSetInfo& theData,
00152 const TString& theWeightFile,
00153 TDirectory* theTargetDir ) :
00154 TMVA::MethodBase( Types::kLikelihood, theData, theWeightFile, theTargetDir ),
00155 fEpsilon ( 1.e3 * DBL_MIN ),
00156 fTransformLikelihoodOutput( kFALSE ),
00157 fDropVariable ( 0 ),
00158 fHistSig ( 0 ),
00159 fHistBgd ( 0 ),
00160 fHistSig_smooth( 0 ),
00161 fHistBgd_smooth( 0 ),
00162 fDefaultPDFLik ( 0 ),
00163 fPDFSig ( 0 ),
00164 fPDFBgd ( 0 ),
00165 fNsmooth ( 2 ),
00166 fAverageEvtPerBin( 0 ),
00167 fKDEfineFactor ( 0 )
00168 {
00169
00170 }
00171
00172
00173 TMVA::MethodLikelihood::~MethodLikelihood( void )
00174 {
00175
00176 if (NULL != fDefaultPDFLik) delete fDefaultPDFLik;
00177 if (NULL != fHistSig) delete fHistSig;
00178 if (NULL != fHistBgd) delete fHistBgd;
00179 if (NULL != fHistSig_smooth) delete fHistSig_smooth;
00180 if (NULL != fHistBgd_smooth) delete fHistBgd_smooth;
00181 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00182 if ((*fPDFSig)[ivar] !=0) delete (*fPDFSig)[ivar];
00183 if ((*fPDFBgd)[ivar] !=0) delete (*fPDFBgd)[ivar];
00184 }
00185 if (NULL != fPDFSig) delete fPDFSig;
00186 if (NULL != fPDFBgd) delete fPDFBgd;
00187 }
00188
00189
00190 Bool_t TMVA::MethodLikelihood::HasAnalysisType( Types::EAnalysisType type,
00191 UInt_t numberClasses, UInt_t )
00192 {
00193
00194 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
00195 return kFALSE;
00196 }
00197
00198
00199 void TMVA::MethodLikelihood::Init( void )
00200 {
00201
00202
00203
00204 fDropVariable = -1;
00205 fHistSig = new std::vector<TH1*> ( GetNvar(), (TH1*)0 );
00206 fHistBgd = new std::vector<TH1*> ( GetNvar(), (TH1*)0 );
00207 fHistSig_smooth = new std::vector<TH1*> ( GetNvar(), (TH1*)0 );
00208 fHistBgd_smooth = new std::vector<TH1*> ( GetNvar(), (TH1*)0 );
00209 fPDFSig = new std::vector<TMVA::PDF*>( GetNvar(), (TMVA::PDF*)0 );
00210 fPDFBgd = new std::vector<TMVA::PDF*>( GetNvar(), (TMVA::PDF*)0 );
00211 }
00212
00213
00214 void TMVA::MethodLikelihood::DeclareOptions()
00215 {
00216
00217
00218
00219 DeclareOptionRef( fTransformLikelihoodOutput = kFALSE, "TransformOutput",
00220 "Transform likelihood output by inverse sigmoid function" );
00221
00222
00223
00224
00225 TString updatedOptions = GetOptions();
00226 fDefaultPDFLik = new PDF( TString(GetName()) + " PDF", updatedOptions );
00227 fDefaultPDFLik->DeclareOptions();
00228 fDefaultPDFLik->ParseOptions();
00229 updatedOptions = fDefaultPDFLik->GetOptions();
00230 for (UInt_t ivar = 0; ivar< DataInfo().GetNVariables(); ivar++) {
00231 (*fPDFSig)[ivar] = new PDF( Form("%s PDF Sig[%d]", GetName(), ivar), updatedOptions,
00232 Form("Sig[%d]",ivar), fDefaultPDFLik );
00233 (*fPDFSig)[ivar]->DeclareOptions();
00234 (*fPDFSig)[ivar]->ParseOptions();
00235 updatedOptions = (*fPDFSig)[ivar]->GetOptions();
00236 (*fPDFBgd)[ivar] = new PDF( Form("%s PDF Bkg[%d]", GetName(), ivar), updatedOptions,
00237 Form("Bkg[%d]",ivar), fDefaultPDFLik );
00238 (*fPDFBgd)[ivar]->DeclareOptions();
00239 (*fPDFBgd)[ivar]->ParseOptions();
00240 updatedOptions = (*fPDFBgd)[ivar]->GetOptions();
00241 }
00242
00243
00244 SetOptions( updatedOptions );
00245 }
00246
00247
00248 void TMVA::MethodLikelihood::DeclareCompatibilityOptions()
00249 {
00250 MethodBase::DeclareCompatibilityOptions();
00251 DeclareOptionRef( fNsmooth = 1, "NSmooth",
00252 "Number of smoothing iterations for the input histograms");
00253 DeclareOptionRef( fAverageEvtPerBin = 50, "NAvEvtPerBin",
00254 "Average number of events per PDF bin");
00255 DeclareOptionRef( fKDEfineFactor =1. , "KDEFineFactor",
00256 "Fine tuning factor for Adaptive KDE: Factor to multyply the width of the kernel");
00257 DeclareOptionRef( fBorderMethodString = "None", "KDEborder",
00258 "Border effects treatment (1=no treatment , 2=kernel renormalization, 3=sample mirroring)" );
00259 DeclareOptionRef( fKDEiterString = "Nonadaptive", "KDEiter",
00260 "Number of iterations (1=non-adaptive, 2=adaptive)" );
00261 DeclareOptionRef( fKDEtypeString = "Gauss", "KDEtype",
00262 "KDE kernel type (1=Gauss)" );
00263 fAverageEvtPerBinVarS = new Int_t[GetNvar()];
00264 fAverageEvtPerBinVarB = new Int_t[GetNvar()];
00265 fNsmoothVarS = new Int_t[GetNvar()];
00266 fNsmoothVarB = new Int_t[GetNvar()];
00267 fInterpolateString = new TString[GetNvar()];
00268 for(UInt_t i=0; i<GetNvar(); ++i) {
00269 fAverageEvtPerBinVarS[i] = fAverageEvtPerBinVarB[i] = 0;
00270 fNsmoothVarS[i] = fNsmoothVarB[i] = 0;
00271 fInterpolateString[i] = "";
00272 }
00273 DeclareOptionRef( fAverageEvtPerBinVarS, GetNvar(), "NAvEvtPerBinSig",
00274 "Average num of events per PDF bin and variable (signal)");
00275 DeclareOptionRef( fAverageEvtPerBinVarB, GetNvar(), "NAvEvtPerBinBkg",
00276 "Average num of events per PDF bin and variable (background)");
00277 DeclareOptionRef(fNsmoothVarS, GetNvar(), "NSmoothSig",
00278 "Number of smoothing iterations for the input histograms");
00279 DeclareOptionRef(fNsmoothVarB, GetNvar(), "NSmoothBkg",
00280 "Number of smoothing iterations for the input histograms");
00281 DeclareOptionRef(fInterpolateString, GetNvar(), "PDFInterpol", "Method of interpolating reference histograms (e.g. Spline2 or KDE)");
00282 }
00283
00284
00285
00286
00287
00288 void TMVA::MethodLikelihood::ProcessOptions()
00289 {
00290
00291
00292
00293 SetSignalReferenceCut( TransformLikelihoodOutput( 0.5, 0.5 ) );
00294
00295 fDefaultPDFLik->ProcessOptions();
00296 for (UInt_t ivar = 0; ivar< DataInfo().GetNVariables(); ivar++) {
00297 (*fPDFBgd)[ivar]->ProcessOptions();
00298 (*fPDFSig)[ivar]->ProcessOptions();
00299 }
00300 }
00301
00302
00303 void TMVA::MethodLikelihood::Train( void )
00304 {
00305
00306
00307
00308
00309
00310
00311 vector<Double_t> xmin(GetNvar()), xmax(GetNvar());
00312 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {xmin[ivar]=1e30; xmax[ivar]=-1e30;}
00313 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00314
00315
00316 const Event* origEv = Data()->GetEvent(ievt);
00317 if (IgnoreEventsWithNegWeightsInTraining() && origEv->GetWeight()<=0) continue;
00318
00319 for (int cls=0;cls<2;cls++){
00320 GetTransformationHandler().SetTransformationReferenceClass(cls);
00321 const Event* ev = GetTransformationHandler().Transform( origEv );
00322 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00323 Float_t value = ev->GetValue(ivar);
00324 if (value < xmin[ivar]) xmin[ivar] = value;
00325 if (value > xmax[ivar]) xmax[ivar] = value;
00326 }
00327 }
00328 }
00329
00330
00331
00332 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00333 TString var = (*fInputVars)[ivar];
00334
00335
00336
00337
00338
00339
00340 if (DataInfo().GetVariableInfo(ivar).GetVarType() == 'I') {
00341
00342 Int_t ixmin = TMath::Nint( xmin[ivar] );
00343 xmax[ivar]=xmax[ivar]+1;
00344 Int_t ixmax = TMath::Nint( xmax[ivar] );
00345 Int_t nbins = ixmax - ixmin;
00346
00347 (*fHistSig)[ivar] = new TH1F( var + "_sig", var + " signal training", nbins, ixmin, ixmax );
00348 (*fHistBgd)[ivar] = new TH1F( var + "_bgd", var + " background training", nbins, ixmin, ixmax );
00349 } else {
00350
00351 UInt_t minNEvt = TMath::Min(Data()->GetNEvtSigTrain(),Data()->GetNEvtBkgdTrain());
00352 Int_t nbinsS = (*fPDFSig)[ivar]->GetHistNBins( minNEvt );
00353 Int_t nbinsB = (*fPDFBgd)[ivar]->GetHistNBins( minNEvt );
00354
00355 (*fHistSig)[ivar] = new TH1F( Form("%s_sig",var.Data()),
00356 Form("%s signal training",var.Data()), nbinsS, xmin[ivar], xmax[ivar] );
00357 (*fHistBgd)[ivar] = new TH1F( Form("%s_bgd",var.Data()),
00358 Form("%s background training",var.Data()), nbinsB, xmin[ivar], xmax[ivar] );
00359 }
00360 }
00361
00362
00363 Log() << kINFO << "Filling reference histograms" << Endl;
00364
00365
00366 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00367
00368
00369
00370 const Event* origEv = Data()->GetEvent(ievt);
00371 if (IgnoreEventsWithNegWeightsInTraining() && origEv->GetWeight()<=0) continue;
00372 GetTransformationHandler().SetTransformationReferenceClass( origEv->GetClass() );
00373 const Event* ev = GetTransformationHandler().Transform( origEv );
00374
00375
00376 Float_t weight = ev->GetWeight();
00377
00378
00379 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00380 Double_t value = ev->GetValue(ivar);
00381
00382 if (value >= xmax[ivar]) value = xmax[ivar] - 1.0e-10;
00383 else if (value < xmin[ivar]) value = xmin[ivar] + 1.0e-10;
00384
00385 if (value >=(*fHistSig)[ivar]->GetXaxis()->GetXmax() ||
00386 value <(*fHistSig)[ivar]->GetXaxis()->GetXmin()){
00387 Log()<<kWARNING
00388 <<"error in filling likelihood reference histograms var="
00389 <<(*fInputVars)[ivar]
00390 << ", xmin="<<(*fHistSig)[ivar]->GetXaxis()->GetXmin()
00391 << ", value="<<value
00392 << ", xmax="<<(*fHistSig)[ivar]->GetXaxis()->GetXmax()
00393 << Endl;
00394 }
00395 if (DataInfo().IsSignal(ev)) (*fHistSig)[ivar]->Fill( value, weight );
00396 else (*fHistBgd)[ivar]->Fill( value, weight );
00397 }
00398 }
00399
00400
00401 Log() << kINFO << "Building PDF out of reference histograms" << Endl;
00402 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00403
00404
00405
00406 (*fPDFSig)[ivar]->BuildPDF( (*fHistSig)[ivar] );
00407 (*fPDFBgd)[ivar]->BuildPDF( (*fHistBgd)[ivar] );
00408
00409 (*fPDFSig)[ivar]->ValidatePDF( (*fHistSig)[ivar] );
00410 (*fPDFBgd)[ivar]->ValidatePDF( (*fHistBgd)[ivar] );
00411
00412
00413 if ((*fPDFSig)[ivar]->GetSmoothedHist() != 0) (*fHistSig_smooth)[ivar] = (*fPDFSig)[ivar]->GetSmoothedHist();
00414 if ((*fPDFBgd)[ivar]->GetSmoothedHist() != 0) (*fHistBgd_smooth)[ivar] = (*fPDFBgd)[ivar]->GetSmoothedHist();
00415 }
00416 }
00417
00418
00419 Double_t TMVA::MethodLikelihood::GetMvaValue( Double_t* err, Double_t* errUpper )
00420 {
00421
00422
00423 UInt_t ivar;
00424
00425
00426 NoErrorCalc(err, errUpper);
00427
00428
00429 TVector vs( GetNvar() );
00430 TVector vb( GetNvar() );
00431
00432
00433
00434
00435
00436
00437 GetTransformationHandler().SetTransformationReferenceClass( 0 );
00438 const Event* ev = GetEvent();
00439 for (ivar=0; ivar<GetNvar(); ivar++) vs(ivar) = ev->GetValue(ivar);
00440
00441
00442
00443 GetTransformationHandler().SetTransformationReferenceClass( 1 );
00444 ev = GetEvent();
00445 for (ivar=0; ivar<GetNvar(); ivar++) vb(ivar) = ev->GetValue(ivar);
00446
00447
00448 Double_t ps(1), pb(1), p(0);
00449 for (ivar=0; ivar<GetNvar(); ivar++) {
00450
00451
00452 if ((Int_t)ivar == fDropVariable) continue;
00453
00454 Double_t x[2] = { vs(ivar), vb(ivar) };
00455
00456 for (UInt_t itype=0; itype < 2; itype++) {
00457
00458
00459 if (x[itype] >= (*fPDFSig)[ivar]->GetXmax()) x[itype] = (*fPDFSig)[ivar]->GetXmax() - 1.0e-10;
00460 else if (x[itype] < (*fPDFSig)[ivar]->GetXmin()) x[itype] = (*fPDFSig)[ivar]->GetXmin();
00461
00462
00463 PDF* pdf = (itype == 0) ? (*fPDFSig)[ivar] : (*fPDFBgd)[ivar];
00464 if (pdf == 0) Log() << kFATAL << "<GetMvaValue> Reference histograms don't exist" << Endl;
00465 TH1* hist = pdf->GetPDFHist();
00466
00467
00468
00469 Int_t bin = hist->FindBin(x[itype]);
00470
00471
00472
00473 if ((*fPDFSig)[ivar]->GetInterpolMethod() == TMVA::PDF::kSpline0 ||
00474 DataInfo().GetVariableInfo(ivar).GetVarType() == 'N') {
00475 p = TMath::Max( hist->GetBinContent(bin), fEpsilon );
00476 } else {
00477 Int_t nextbin = bin;
00478 if ((x[itype] > hist->GetBinCenter(bin) && bin != hist->GetNbinsX()) || bin == 1)
00479 nextbin++;
00480 else
00481 nextbin--;
00482
00483
00484 Double_t dx = hist->GetBinCenter(bin) - hist->GetBinCenter(nextbin);
00485 Double_t dy = hist->GetBinContent(bin) - hist->GetBinContent(nextbin);
00486 Double_t like = hist->GetBinContent(bin) + (x[itype] - hist->GetBinCenter(bin)) * dy/dx;
00487
00488 p = TMath::Max( like, fEpsilon );
00489 }
00490
00491 if (itype == 0) ps *= p;
00492 else pb *= p;
00493 }
00494 }
00495
00496
00497 return TransformLikelihoodOutput( ps, pb );
00498 }
00499
00500
00501 Double_t TMVA::MethodLikelihood::TransformLikelihoodOutput( Double_t ps, Double_t pb ) const
00502 {
00503
00504 if (ps < fEpsilon) ps = fEpsilon;
00505 if (pb < fEpsilon) pb = fEpsilon;
00506 Double_t r = ps/(ps + pb);
00507 if (r >= 1.0) r = 1. - 1.e-15;
00508
00509 if (fTransformLikelihoodOutput) {
00510
00511
00512
00513 if (r <= 0.0) r = fEpsilon;
00514 else if (r >= 1.0) r = 1. - 1.e-15;
00515
00516 Double_t tau = 15.0;
00517 r = - TMath::Log(1.0/r - 1.0)/tau;
00518 }
00519
00520 return r;
00521 }
00522
00523
00524 void TMVA::MethodLikelihood::WriteOptionsToStream( ostream& o, const TString& prefix ) const
00525 {
00526
00527 Configurable::WriteOptionsToStream( o, prefix);
00528
00529
00530 if (fDefaultPDFLik != 0) {
00531 o << prefix << endl << prefix << "#Default Likelihood PDF Options:" << endl << prefix << endl;
00532 fDefaultPDFLik->WriteOptionsToStream( o, prefix );
00533 }
00534 for (UInt_t ivar = 0; ivar < fPDFSig->size(); ivar++) {
00535 if ((*fPDFSig)[ivar] != 0) {
00536 o << prefix << endl << prefix << Form("#Signal[%d] Likelihood PDF Options:",ivar) << endl << prefix << endl;
00537 (*fPDFSig)[ivar]->WriteOptionsToStream( o, prefix );
00538 }
00539 if ((*fPDFBgd)[ivar] != 0) {
00540 o << prefix << endl << prefix << "#Background[%d] Likelihood PDF Options:" << endl << prefix << endl;
00541 (*fPDFBgd)[ivar]->WriteOptionsToStream( o, prefix );
00542 }
00543 }
00544 }
00545
00546
00547 void TMVA::MethodLikelihood::AddWeightsXMLTo( void* parent ) const
00548 {
00549
00550 void* wght = gTools().AddChild(parent, "Weights");
00551 gTools().AddAttr(wght, "NVariables", GetNvar());
00552 gTools().AddAttr(wght, "NClasses", 2);
00553 void* pdfwrap;
00554 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00555 if ( (*fPDFSig)[ivar]==0 || (*fPDFBgd)[ivar]==0 )
00556 Log() << kFATAL << "Reference histograms for variable " << ivar
00557 << " don't exist, can't write it to weight file" << Endl;
00558 pdfwrap = gTools().AddChild(wght, "PDFDescriptor");
00559 gTools().AddAttr(pdfwrap, "VarIndex", ivar);
00560 gTools().AddAttr(pdfwrap, "ClassIndex", 0);
00561 (*fPDFSig)[ivar]->AddXMLTo(pdfwrap);
00562 pdfwrap = gTools().AddChild(wght, "PDFDescriptor");
00563 gTools().AddAttr(pdfwrap, "VarIndex", ivar);
00564 gTools().AddAttr(pdfwrap, "ClassIndex", 1);
00565 (*fPDFBgd)[ivar]->AddXMLTo(pdfwrap);
00566 }
00567 }
00568
00569
00570 const TMVA::Ranking* TMVA::MethodLikelihood::CreateRanking()
00571 {
00572
00573
00574
00575 if (fRanking) delete fRanking;
00576 fRanking = new Ranking( GetName(), "Delta Separation" );
00577
00578 Double_t sepRef = -1, sep = -1;
00579 for (Int_t ivar=-1; ivar<(Int_t)GetNvar(); ivar++) {
00580
00581
00582 fDropVariable = ivar;
00583
00584 TString nameS = Form( "rS_%i", ivar+1 );
00585 TString nameB = Form( "rB_%i", ivar+1 );
00586 TH1* rS = new TH1F( nameS, nameS, 80, 0, 1 );
00587 TH1* rB = new TH1F( nameB, nameB, 80, 0, 1 );
00588
00589
00590 for (Int_t ievt=0; ievt<Data()->GetNTrainingEvents(); ievt++) {
00591
00592 const Event* origEv = Data()->GetEvent(ievt);
00593 GetTransformationHandler().SetTransformationReferenceClass( origEv->GetClass() );
00594 const Event* ev = GetTransformationHandler().Transform(Data()->GetEvent(ievt));
00595
00596 Double_t lk = this->GetMvaValue();
00597 Double_t w = ev->GetWeight();
00598 if (DataInfo().IsSignal(ev)) rS->Fill( lk, w );
00599 else rB->Fill( lk, w );
00600 }
00601
00602
00603 sep = TMVA::gTools().GetSeparation( rS, rB );
00604 if (ivar == -1) sepRef = sep;
00605 sep = sepRef - sep;
00606
00607
00608 delete rS;
00609 delete rB;
00610
00611 if (ivar >= 0) fRanking->AddRank( Rank( DataInfo().GetVariableInfo(ivar).GetInternalName(), sep ) );
00612 }
00613
00614 fDropVariable = -1;
00615
00616 return fRanking;
00617 }
00618
00619
00620 void TMVA::MethodLikelihood::WriteWeightsToStream( TFile& ) const
00621 {
00622
00623 TString pname = "PDF_";
00624 for (UInt_t ivar=0; ivar<GetNvar(); ivar++){
00625 (*fPDFSig)[ivar]->Write( pname + GetInputVar( ivar ) + "_S" );
00626 (*fPDFBgd)[ivar]->Write( pname + GetInputVar( ivar ) + "_B" );
00627 }
00628 }
00629
00630 void TMVA::MethodLikelihood::ReadWeightsFromXML(void* wghtnode)
00631 {
00632
00633 TString pname = "PDF_";
00634 Bool_t addDirStatus = TH1::AddDirectoryStatus();
00635 TH1::AddDirectory(0);
00636 UInt_t nvars=0;
00637 gTools().ReadAttr(wghtnode, "NVariables",nvars);
00638 void* descnode = gTools().GetChild(wghtnode);
00639 for (UInt_t ivar=0; ivar<nvars; ivar++){
00640 void* pdfnode = gTools().GetChild(descnode);
00641 Log() << kINFO << "Reading signal and background PDF for variable: " << GetInputVar( ivar ) << Endl;
00642 if ((*fPDFSig)[ivar] !=0) delete (*fPDFSig)[ivar];
00643 if ((*fPDFBgd)[ivar] !=0) delete (*fPDFBgd)[ivar];
00644 (*fPDFSig)[ivar] = new PDF( GetInputVar( ivar ) + " PDF Sig" );
00645 (*fPDFBgd)[ivar] = new PDF( GetInputVar( ivar ) + " PDF Bkg" );
00646 (*fPDFSig)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
00647 (*fPDFBgd)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
00648 (*(*fPDFSig)[ivar]).ReadXML(pdfnode);
00649 descnode = gTools().GetNextChild(descnode);
00650 pdfnode = gTools().GetChild(descnode);
00651 (*(*fPDFBgd)[ivar]).ReadXML(pdfnode);
00652 descnode = gTools().GetNextChild(descnode);
00653 }
00654 TH1::AddDirectory(addDirStatus);
00655 }
00656
00657 void TMVA::MethodLikelihood::ReadWeightsFromStream( istream & istr )
00658 {
00659
00660
00661 TString pname = "PDF_";
00662 Bool_t addDirStatus = TH1::AddDirectoryStatus();
00663 TH1::AddDirectory(0);
00664 for (UInt_t ivar=0; ivar<GetNvar(); ivar++){
00665 Log() << kINFO << "Reading signal and background PDF for variable: " << GetInputVar( ivar ) << Endl;
00666 if ((*fPDFSig)[ivar] !=0) delete (*fPDFSig)[ivar];
00667 if ((*fPDFBgd)[ivar] !=0) delete (*fPDFBgd)[ivar];
00668 (*fPDFSig)[ivar] = new PDF(GetInputVar( ivar ) + " PDF Sig" );
00669 (*fPDFBgd)[ivar] = new PDF(GetInputVar( ivar ) + " PDF Bkg");
00670 (*fPDFSig)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
00671 (*fPDFBgd)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
00672 istr >> *(*fPDFSig)[ivar];
00673 istr >> *(*fPDFBgd)[ivar];
00674 }
00675 TH1::AddDirectory(addDirStatus);
00676 }
00677
00678
00679 void TMVA::MethodLikelihood::ReadWeightsFromStream( TFile& rf )
00680 {
00681
00682 TString pname = "PDF_";
00683 Bool_t addDirStatus = TH1::AddDirectoryStatus();
00684 TH1::AddDirectory(0);
00685 for (UInt_t ivar=0; ivar<GetNvar(); ivar++){
00686 (*fPDFSig)[ivar] = (TMVA::PDF*)rf.Get( Form( "PDF_%s_S", GetInputVar( ivar ).Data() ) );
00687 (*fPDFBgd)[ivar] = (TMVA::PDF*)rf.Get( Form( "PDF_%s_B", GetInputVar( ivar ).Data() ) );
00688 }
00689 TH1::AddDirectory(addDirStatus);
00690 }
00691
00692
00693 void TMVA::MethodLikelihood::WriteMonitoringHistosToFile( void ) const
00694 {
00695
00696
00697 Log() << kINFO << "Write monitoring histograms to file: " << BaseDir()->GetPath() << Endl;
00698 BaseDir()->cd();
00699
00700 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00701 (*fHistSig)[ivar]->Write();
00702 (*fHistBgd)[ivar]->Write();
00703 if ((*fHistSig_smooth)[ivar] != 0) (*fHistSig_smooth)[ivar]->Write();
00704 if ((*fHistBgd_smooth)[ivar] != 0) (*fHistBgd_smooth)[ivar]->Write();
00705 (*fPDFSig)[ivar]->GetPDFHist()->Write();
00706 (*fPDFBgd)[ivar]->GetPDFHist()->Write();
00707
00708 if ((*fPDFSig)[ivar]->GetNSmoothHist() != 0) (*fPDFSig)[ivar]->GetNSmoothHist()->Write();
00709 if ((*fPDFBgd)[ivar]->GetNSmoothHist() != 0) (*fPDFBgd)[ivar]->GetNSmoothHist()->Write();
00710
00711
00712 Float_t xmin=((*fPDFSig)[ivar]->GetPDFHist()->GetXaxis())->GetXmin();
00713 Float_t xmax=((*fPDFSig)[ivar]->GetPDFHist()->GetXaxis())->GetXmax();
00714 TH1F* mm = new TH1F( (*fInputVars)[ivar]+"_additional_check",
00715 (*fInputVars)[ivar]+"_additional_check", 15000, xmin, xmax );
00716 Double_t intBin = (xmax-xmin)/15000;
00717 for (Int_t bin=0; bin < 15000; bin++) {
00718 Double_t x = (bin + 0.5)*intBin + xmin;
00719 mm->SetBinContent(bin+1 ,(*fPDFSig)[ivar]->GetVal(x));
00720 }
00721 mm->Write();
00722
00723
00724 TH1* h[2] = { (*fHistSig)[ivar], (*fHistBgd)[ivar] };
00725 for (UInt_t i=0; i<2; i++) {
00726 TH1* hclone = (TH1F*)h[i]->Clone( TString(h[i]->GetName()) + "_nice" );
00727 hclone->SetName ( TString(h[i]->GetName()) + "_nice" );
00728 hclone->SetTitle( TString(h[i]->GetTitle()) + "" );
00729 if (hclone->GetNbinsX() > 100) {
00730 Int_t resFactor = 5;
00731 hclone->Rebin( resFactor );
00732 hclone->Scale( 1.0/resFactor );
00733 }
00734 hclone->Write();
00735 }
00736
00737 }
00738 }
00739
00740
00741 void TMVA::MethodLikelihood::MakeClassSpecificHeader( std::ostream& fout, const TString& ) const
00742 {
00743
00744 fout << "#include <math.h>" << endl;
00745 }
00746
00747
00748 void TMVA::MethodLikelihood::MakeClassSpecific( std::ostream& fout, const TString& className ) const
00749 {
00750
00751 Int_t dp = fout.precision();
00752 fout << " double fEpsilon;" << endl;
00753
00754 Int_t * nbin = new Int_t[GetNvar()];
00755
00756 Int_t nbinMax=-1;
00757 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00758 nbin[ivar]=(*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX();
00759 if (nbin[ivar] > nbinMax) nbinMax=nbin[ivar];
00760 }
00761
00762 fout << " static float fRefS[][" << nbinMax << "]; "
00763 << "// signal reference vector [nvars][max_nbins]" << endl;
00764 fout << " static float fRefB[][" << nbinMax << "]; "
00765 << "// backgr reference vector [nvars][max_nbins]" << endl << endl;
00766 fout << "// if a variable has its PDF encoded as a spline0 --> treat it like an Integer valued one" <<endl;
00767 fout << " bool fHasDiscretPDF[" << GetNvar() <<"]; "<< endl;
00768 fout << " int fNbin[" << GetNvar() << "]; "
00769 << "// number of bins (discrete variables may have less bins)" << endl;
00770 fout << " double fHistMin[" << GetNvar() << "]; " << endl;
00771 fout << " double fHistMax[" << GetNvar() << "]; " << endl;
00772
00773 fout << " double TransformLikelihoodOutput( double, double ) const;" << endl;
00774 fout << "};" << endl;
00775 fout << "" << endl;
00776 fout << "inline void " << className << "::Initialize() " << endl;
00777 fout << "{" << endl;
00778 fout << " fEpsilon = " << fEpsilon << ";" << endl;
00779 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00780 fout << " fNbin[" << ivar << "] = " << (*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() << ";" << endl;
00781 fout << " fHistMin[" << ivar << "] = " << (*fPDFSig)[ivar]->GetPDFHist()->GetXaxis()->GetXmin() << ";" << endl;
00782 fout << " fHistMax[" << ivar << "] = " << (*fPDFSig)[ivar]->GetPDFHist()->GetXaxis()->GetXmax() << ";" << endl;
00783
00784 if ((((*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() != nbin[ivar] ||
00785 (*fPDFBgd)[ivar]->GetPDFHist()->GetNbinsX() != nbin[ivar])
00786 ) ||
00787 (*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() != (*fPDFBgd)[ivar]->GetPDFHist()->GetNbinsX()) {
00788 Log() << kFATAL << "<MakeClassSpecific> Mismatch in binning of variable "
00789 << "\"" << GetOriginalVarName(ivar) << "\" of type: \'" << DataInfo().GetVariableInfo(ivar).GetVarType()
00790 << "\' : "
00791 << "nxS = " << (*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() << ", "
00792 << "nxB = " << (*fPDFBgd)[ivar]->GetPDFHist()->GetNbinsX()
00793 << " while we expect " << nbin[ivar]
00794 << Endl;
00795 }
00796 }
00797 for (UInt_t ivar=0; ivar<GetNvar(); ivar++){
00798 if ((*fPDFSig)[ivar]->GetInterpolMethod() == TMVA::PDF::kSpline0)
00799 fout << " fHasDiscretPDF[" << ivar <<"] = true; " << endl;
00800 else
00801 fout << " fHasDiscretPDF[" << ivar <<"] = false; " << endl;
00802 }
00803
00804 fout << "}" << endl << endl;
00805
00806 fout << "inline double " << className
00807 << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << endl;
00808 fout << "{" << endl;
00809 fout << " double ps(1), pb(1);" << endl;
00810 fout << " std::vector<double> inputValuesSig = inputValues;" << endl;
00811 fout << " std::vector<double> inputValuesBgd = inputValues;" << endl;
00812 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
00813 fout << " Transform(inputValuesSig,0);" << endl;
00814 fout << " Transform(inputValuesBgd,1);" << endl;
00815 }
00816 fout << " for (size_t ivar = 0; ivar < GetNvar(); ivar++) {" << endl;
00817 fout << endl;
00818 fout << " // dummy at present... will be used for variable transforms" << endl;
00819 fout << " double x[2] = { inputValuesSig[ivar], inputValuesBgd[ivar] };" << endl;
00820 fout << endl;
00821 fout << " for (int itype=0; itype < 2; itype++) {" << endl;
00822 fout << endl;
00823 fout << " // interpolate linearly between adjacent bins" << endl;
00824 fout << " // this is not useful for discrete variables (or forced Spline0)" << endl;
00825 fout << " int bin = int((x[itype] - fHistMin[ivar])/(fHistMax[ivar] - fHistMin[ivar])*fNbin[ivar]) + 0;" << endl;
00826 fout << endl;
00827 fout << " // since the test data sample is in general different from the training sample" << endl;
00828 fout << " // it can happen that the min/max of the training sample are trespassed --> correct this" << endl;
00829 fout << " if (bin < 0) {" << endl;
00830 fout << " bin = 0;" << endl;
00831 fout << " x[itype] = fHistMin[ivar];" << endl;
00832 fout << " }" << endl;
00833 fout << " else if (bin >= fNbin[ivar]) {" << endl;
00834 fout << " bin = fNbin[ivar]-1;" << endl;
00835 fout << " x[itype] = fHistMax[ivar];" << endl;
00836 fout << " }" << endl;
00837 fout << endl;
00838 fout << " // find corresponding histogram from cached indices" << endl;
00839 fout << " float ref = (itype == 0) ? fRefS[ivar][bin] : fRefB[ivar][bin];" << endl;
00840 fout << endl;
00841 fout << " // sanity check" << endl;
00842 fout << " if (ref < 0) {" << endl;
00843 fout << " std::cout << \"Fatal error in " << className
00844 << ": bin entry < 0 ==> abort\" << std::endl;" << endl;
00845 fout << " exit(1);" << endl;
00846 fout << " }" << endl;
00847 fout << endl;
00848 fout << " double p = ref;" << endl;
00849 fout << endl;
00850 fout << " if (GetType(ivar) != 'I' && !fHasDiscretPDF[ivar]) {" << endl;
00851 fout << " float bincenter = (bin + 0.5)/fNbin[ivar]*(fHistMax[ivar] - fHistMin[ivar]) + fHistMin[ivar];" << endl;
00852 fout << " int nextbin = bin;" << endl;
00853 fout << " if ((x[itype] > bincenter && bin != fNbin[ivar]-1) || bin == 0) " << endl;
00854 fout << " nextbin++;" << endl;
00855 fout << " else" << endl;
00856 fout << " nextbin--; " << endl;
00857 fout << endl;
00858 fout << " double refnext = (itype == 0) ? fRefS[ivar][nextbin] : fRefB[ivar][nextbin];" << endl;
00859 fout << " float nextbincenter = (nextbin + 0.5)/fNbin[ivar]*(fHistMax[ivar] - fHistMin[ivar]) + fHistMin[ivar];" << endl;
00860 fout << endl;
00861 fout << " double dx = bincenter - nextbincenter;" << endl;
00862 fout << " double dy = ref - refnext;" << endl;
00863 fout << " p += (x[itype] - bincenter) * dy/dx;" << endl;
00864 fout << " }" << endl;
00865 fout << endl;
00866 fout << " if (p < fEpsilon) p = fEpsilon; // avoid zero response" << endl;
00867 fout << endl;
00868 fout << " if (itype == 0) ps *= p;" << endl;
00869 fout << " else pb *= p;" << endl;
00870 fout << " } " << endl;
00871 fout << " } " << endl;
00872 fout << endl;
00873 fout << " // the likelihood ratio (transform it ?)" << endl;
00874 fout << " return TransformLikelihoodOutput( ps, pb ); " << endl;
00875 fout << "}" << endl << endl;
00876
00877 fout << "inline double " << className << "::TransformLikelihoodOutput( double ps, double pb ) const" << endl;
00878 fout << "{" << endl;
00879 fout << " // returns transformed or non-transformed output" << endl;
00880 fout << " if (ps < fEpsilon) ps = fEpsilon;" << endl;
00881 fout << " if (pb < fEpsilon) pb = fEpsilon;" << endl;
00882 fout << " double r = ps/(ps + pb);" << endl;
00883 fout << " if (r >= 1.0) r = 1. - 1.e-15;" << endl;
00884 fout << endl;
00885 fout << " if (" << (fTransformLikelihoodOutput ? "true" : "false") << ") {" << endl;
00886 fout << " // inverse Fermi function" << endl;
00887 fout << endl;
00888 fout << " // sanity check" << endl;
00889 fout << " if (r <= 0.0) r = fEpsilon;" << endl;
00890 fout << " else if (r >= 1.0) r = 1. - 1.e-15;" << endl;
00891 fout << endl;
00892 fout << " double tau = 15.0;" << endl;
00893 fout << " r = - log(1.0/r - 1.0)/tau;" << endl;
00894 fout << " }" << endl;
00895 fout << endl;
00896 fout << " return r;" << endl;
00897 fout << "}" << endl;
00898 fout << endl;
00899
00900 fout << "// Clean up" << endl;
00901 fout << "inline void " << className << "::Clear() " << endl;
00902 fout << "{" << endl;
00903 fout << " // nothing to clear" << endl;
00904 fout << "}" << endl << endl;
00905
00906 fout << "// signal map" << endl;
00907 fout << "float " << className << "::fRefS[][" << nbinMax << "] = " << endl;
00908 fout << "{ " << endl;
00909 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00910 fout << " { ";
00911 for (Int_t ibin=1; ibin<=nbinMax; ibin++) {
00912 if (ibin-1 < nbin[ivar])
00913 fout << (*fPDFSig)[ivar]->GetPDFHist()->GetBinContent(ibin);
00914 else
00915 fout << -1;
00916
00917 if (ibin < nbinMax) fout << ", ";
00918 }
00919 fout << " }, " << endl;
00920 }
00921 fout << "}; " << endl;
00922 fout << endl;
00923
00924 fout << "// background map" << endl;
00925 fout << "float " << className << "::fRefB[][" << nbinMax << "] = " << endl;
00926 fout << "{ " << endl;
00927 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00928 fout << " { ";
00929 fout << std::setprecision(8);
00930 for (Int_t ibin=1; ibin<=nbinMax; ibin++) {
00931 if (ibin-1 < nbin[ivar])
00932 fout << (*fPDFBgd)[ivar]->GetPDFHist()->GetBinContent(ibin);
00933 else
00934 fout << -1;
00935
00936 if (ibin < nbinMax) fout << ", ";
00937 }
00938 fout << " }, " << endl;
00939 }
00940 fout << "}; " << endl;
00941 fout << endl;
00942 fout << std::setprecision(dp);
00943
00944 delete[] nbin;
00945 }
00946
00947
00948 void TMVA::MethodLikelihood::GetHelpMessage() const
00949 {
00950
00951
00952
00953
00954 Log() << Endl;
00955 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
00956 Log() << Endl;
00957 Log() << "The maximum-likelihood classifier models the data with probability " << Endl;
00958 Log() << "density functions (PDF) reproducing the signal and background" << Endl;
00959 Log() << "distributions of the input variables. Correlations among the " << Endl;
00960 Log() << "variables are ignored." << Endl;
00961 Log() << Endl;
00962 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
00963 Log() << Endl;
00964 Log() << "Required for good performance are decorrelated input variables" << Endl;
00965 Log() << "(PCA transformation via the option \"VarTransform=Decorrelate\"" << Endl;
00966 Log() << "may be tried). Irreducible non-linear correlations may be reduced" << Endl;
00967 Log() << "by precombining strongly correlated input variables, or by simply" << Endl;
00968 Log() << "removing one of the variables." << Endl;
00969 Log() << Endl;
00970 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
00971 Log() << Endl;
00972 Log() << "High fidelity PDF estimates are mandatory, i.e., sufficient training " << Endl;
00973 Log() << "statistics is required to populate the tails of the distributions" << Endl;
00974 Log() << "It would be a surprise if the default Spline or KDE kernel parameters" << Endl;
00975 Log() << "provide a satisfying fit to the data. The user is advised to properly" << Endl;
00976 Log() << "tune the events per bin and smooth options in the spline cases" << Endl;
00977 Log() << "individually per variable. If the KDE kernel is used, the adaptive" << Endl;
00978 Log() << "Gaussian kernel may lead to artefacts, so please always also try" << Endl;
00979 Log() << "the non-adaptive one." << Endl;
00980 Log() << "" << Endl;
00981 Log() << "All tuning parameters must be adjusted individually for each input" << Endl;
00982 Log() << "variable!" << Endl;
00983 }
00984