00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include <iomanip>
00030
00031 #include "TMath.h"
00032 #include "Riostream.h"
00033 #include "TMatrix.h"
00034 #include "TMatrixD.h"
00035
00036 #include "TMVA/VariableTransformBase.h"
00037 #include "TMVA/MethodLD.h"
00038 #include "TMVA/Tools.h"
00039 #include "TMVA/Ranking.h"
00040 #include "TMVA/Types.h"
00041 #include "TMVA/PDF.h"
00042 #include "TMVA/ClassifierFactory.h"
00043
00044 REGISTER_METHOD(LD)
00045
00046 ClassImp(TMVA::MethodLD)
00047
00048
00049 TMVA::MethodLD::MethodLD( const TString& jobName,
00050 const TString& methodTitle,
00051 DataSetInfo& dsi,
00052 const TString& theOption,
00053 TDirectory* theTargetDir ) :
00054 MethodBase( jobName, Types::kLD, methodTitle, dsi, theOption, theTargetDir ),
00055 fNRegOut ( 0 ),
00056 fSumMatx ( 0 ),
00057 fSumValMatx( 0 ),
00058 fCoeffMatx ( 0 ),
00059 fLDCoeff ( 0 )
00060 {
00061
00062 }
00063
00064
00065 TMVA::MethodLD::MethodLD( DataSetInfo& theData, const TString& theWeightFile, TDirectory* theTargetDir )
00066 : MethodBase( Types::kLD, theData, theWeightFile, theTargetDir ),
00067 fNRegOut ( 0 ),
00068 fSumMatx ( 0 ),
00069 fSumValMatx( 0 ),
00070 fCoeffMatx ( 0 ),
00071 fLDCoeff ( 0 )
00072 {
00073
00074 }
00075
00076
00077 void TMVA::MethodLD::Init( void )
00078 {
00079
00080
00081 if(DataInfo().GetNTargets()!=0) fNRegOut = DataInfo().GetNTargets();
00082 else fNRegOut = 1;
00083
00084 fLDCoeff = new vector< vector< Double_t >* >(fNRegOut);
00085 for (Int_t iout = 0; iout<fNRegOut; iout++) (*fLDCoeff)[iout] = new std::vector<Double_t>( GetNvar()+1 );
00086
00087
00088 SetSignalReferenceCut( 0.0 );
00089 }
00090
00091
00092 TMVA::MethodLD::~MethodLD( void )
00093 {
00094
00095 if (fSumMatx) { delete fSumMatx; fSumMatx = 0; }
00096 if (fSumValMatx) { delete fSumValMatx; fSumValMatx = 0; }
00097 if (fCoeffMatx) { delete fCoeffMatx; fCoeffMatx = 0; }
00098 if (fLDCoeff) {
00099 for (vector< vector< Double_t >* >::iterator vi=fLDCoeff->begin(); vi!=fLDCoeff->end(); vi++)
00100 if (*vi) { delete *vi; *vi = 0; }
00101 delete fLDCoeff; fLDCoeff = 0;
00102 }
00103 }
00104
00105
00106 Bool_t TMVA::MethodLD::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets )
00107 {
00108
00109 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
00110 else if (type == Types::kRegression && numberTargets == 1) {
00111 Log() << "regression with " << numberTargets << " targets.";
00112 return kTRUE;
00113 }
00114 else return kFALSE;
00115 }
00116
00117
00118
00119 void TMVA::MethodLD::Train( void )
00120 {
00121
00122 GetSum();
00123
00124
00125 GetSumVal();
00126
00127
00128 GetLDCoeff();
00129
00130
00131 PrintCoefficients();
00132 }
00133
00134
00135 Double_t TMVA::MethodLD::GetMvaValue( Double_t* err, Double_t* errUpper )
00136 {
00137
00138 const Event* ev = GetEvent();
00139
00140 if (fRegressionReturnVal == NULL) fRegressionReturnVal = new vector< Float_t >();
00141 fRegressionReturnVal->resize( fNRegOut );
00142
00143 for (Int_t iout = 0; iout<fNRegOut; iout++) {
00144 (*fRegressionReturnVal)[iout] = (*(*fLDCoeff)[iout])[0] ;
00145
00146 int icoeff=0;
00147 for (std::vector<Float_t>::const_iterator it = ev->GetValues().begin();it!=ev->GetValues().end();++it){
00148 (*fRegressionReturnVal)[iout] += (*(*fLDCoeff)[iout])[++icoeff] * (*it);
00149 }
00150 }
00151
00152
00153 NoErrorCalc(err, errUpper);
00154
00155 return (*fRegressionReturnVal)[0];
00156 }
00157
00158
00159 const std::vector< Float_t >& TMVA::MethodLD::GetRegressionValues()
00160 {
00161
00162 const Event* ev = GetEvent();
00163
00164 if (fRegressionReturnVal == NULL) fRegressionReturnVal = new vector< Float_t >();
00165 fRegressionReturnVal->resize( fNRegOut );
00166
00167 for (Int_t iout = 0; iout<fNRegOut; iout++) {
00168 (*fRegressionReturnVal)[iout] = (*(*fLDCoeff)[iout])[0] ;
00169
00170 int icoeff = 0;
00171 for (std::vector<Float_t>::const_iterator it = ev->GetValues().begin();it!=ev->GetValues().end();++it){
00172 (*fRegressionReturnVal)[iout] += (*(*fLDCoeff)[iout])[++icoeff] * (*it);
00173 }
00174 }
00175
00176
00177 Event* evT = new Event(*ev);
00178 for (Int_t iout = 0; iout<fNRegOut; iout++) evT->SetTarget(iout,(*fRegressionReturnVal)[iout]);
00179
00180 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
00181 fRegressionReturnVal->clear();
00182 for (Int_t iout = 0; iout<fNRegOut; iout++) fRegressionReturnVal->push_back(evT2->GetTarget(iout));
00183
00184 delete evT;
00185 return (*fRegressionReturnVal);
00186 }
00187
00188
00189 void TMVA::MethodLD::InitMatrices( void )
00190 {
00191
00192
00193 fSumMatx = new TMatrixD( GetNvar()+1, GetNvar()+1 );
00194 fSumValMatx = new TMatrixD( GetNvar()+1, fNRegOut );
00195 fCoeffMatx = new TMatrixD( GetNvar()+1, fNRegOut );
00196
00197 }
00198
00199
00200 void TMVA::MethodLD::GetSum( void )
00201 {
00202
00203
00204 const UInt_t nvar = DataInfo().GetNVariables();
00205
00206 for (UInt_t ivar = 0; ivar<=nvar; ivar++)
00207 for (UInt_t jvar = 0; jvar<=nvar; jvar++) (*fSumMatx)( ivar, jvar ) = 0;
00208
00209
00210 Long64_t nevts = Data()->GetNEvents();
00211 for (Int_t ievt=0; ievt<nevts; ievt++) {
00212 const Event * ev = GetEvent(ievt);
00213 Double_t weight = ev->GetWeight();
00214
00215 if (IgnoreEventsWithNegWeightsInTraining() && weight <= 0) continue;
00216
00217
00218 (*fSumMatx)( 0, 0 ) += weight;
00219
00220
00221 for (UInt_t ivar=0; ivar<nvar; ivar++) {
00222 (*fSumMatx)( ivar+1, 0 ) += ev->GetValue( ivar ) * weight;
00223 (*fSumMatx)( 0, ivar+1 ) += ev->GetValue( ivar ) * weight;
00224 }
00225
00226
00227 for (UInt_t ivar=0; ivar<nvar; ivar++)
00228 for (UInt_t jvar=0; jvar<nvar; jvar++)
00229 (*fSumMatx)( ivar+1, jvar+1 ) += ev->GetValue( ivar ) * ev->GetValue( jvar ) * weight;
00230 }
00231 }
00232
00233
00234 void TMVA::MethodLD::GetSumVal( void )
00235 {
00236
00237 const UInt_t nvar = DataInfo().GetNVariables();
00238
00239 for (Int_t ivar = 0; ivar<fNRegOut; ivar++)
00240 for (UInt_t jvar = 0; jvar<=nvar; jvar++)
00241 (*fSumValMatx)(jvar,ivar) = 0;
00242
00243
00244 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
00245
00246
00247 const Event* ev = GetEvent(ievt);
00248 Double_t weight = ev->GetWeight();
00249
00250
00251 if (IgnoreEventsWithNegWeightsInTraining() && weight <= 0) continue;
00252
00253 for (Int_t ivar=0; ivar<fNRegOut; ivar++) {
00254
00255 Double_t val = weight;
00256
00257 if (!DoRegression())
00258 val *= DataInfo().IsSignal(ev);
00259 else
00260 val *= ev->GetTarget( ivar );
00261
00262 (*fSumValMatx)( 0,ivar ) += val;
00263 for (UInt_t jvar=0; jvar<nvar; jvar++)
00264 (*fSumValMatx)(jvar+1,ivar ) += ev->GetValue(jvar) * val;
00265 }
00266 }
00267 }
00268
00269
00270 void TMVA::MethodLD::GetLDCoeff( void )
00271 {
00272
00273 const UInt_t nvar = DataInfo().GetNVariables();
00274
00275 for (Int_t ivar = 0; ivar<fNRegOut; ivar++){
00276 TMatrixD invSum( *fSumMatx );
00277 if ( TMath::Abs(invSum.Determinant()) < 10E-24 ) {
00278 Log() << kWARNING << "<GetCoeff> matrix is almost singular with determinant="
00279 << TMath::Abs(invSum.Determinant())
00280 << " did you use the variables that are linear combinations or highly correlated?"
00281 << Endl;
00282 }
00283 if ( TMath::Abs(invSum.Determinant()) < 10E-120 ) {
00284 Log() << kFATAL << "<GetCoeff> matrix is singular with determinant="
00285 << TMath::Abs(invSum.Determinant())
00286 << " did you use the variables that are linear combinations?"
00287 << Endl;
00288 }
00289 invSum.Invert();
00290
00291 fCoeffMatx = new TMatrixD( invSum * (*fSumValMatx));
00292 for (UInt_t jvar = 0; jvar<nvar+1; jvar++) {
00293 (*(*fLDCoeff)[ivar])[jvar] = (*fCoeffMatx)(jvar, ivar );
00294 }
00295 if (!DoRegression()) {
00296 (*(*fLDCoeff)[ivar])[0]=0.0;
00297 for (UInt_t jvar = 1; jvar<nvar+1; jvar++)
00298 (*(*fLDCoeff)[ivar])[0]+=(*fCoeffMatx)(jvar,ivar)*(*fSumMatx)(0,jvar)/(*fSumMatx)( 0, 0 );
00299 (*(*fLDCoeff)[ivar])[0]/=-2.0;
00300 }
00301
00302 }
00303 }
00304
00305
00306 void TMVA::MethodLD::ReadWeightsFromStream( istream& istr )
00307 {
00308
00309 for (Int_t iout=0; iout<fNRegOut; iout++)
00310 for (UInt_t icoeff=0; icoeff<GetNvar()+1; icoeff++)
00311 istr >> (*(*fLDCoeff)[iout])[icoeff];
00312 }
00313
00314
00315 void TMVA::MethodLD::AddWeightsXMLTo( void* parent ) const
00316 {
00317
00318
00319
00320 void* wght = gTools().AddChild(parent, "Weights");
00321 gTools().AddAttr( wght, "NOut", fNRegOut );
00322 gTools().AddAttr( wght, "NCoeff", GetNvar()+1 );
00323 for (Int_t iout=0; iout<fNRegOut; iout++) {
00324 for (UInt_t icoeff=0; icoeff<GetNvar()+1; icoeff++) {
00325 void* coeffxml = gTools().AddChild( wght, "Coefficient" );
00326 gTools().AddAttr( coeffxml, "IndexOut", iout );
00327 gTools().AddAttr( coeffxml, "IndexCoeff", icoeff );
00328 gTools().AddAttr( coeffxml, "Value", (*(*fLDCoeff)[iout])[icoeff] );
00329 }
00330 }
00331 }
00332
00333
00334 void TMVA::MethodLD::ReadWeightsFromXML( void* wghtnode )
00335 {
00336
00337 UInt_t ncoeff;
00338 gTools().ReadAttr( wghtnode, "NOut", fNRegOut );
00339 gTools().ReadAttr( wghtnode, "NCoeff", ncoeff );
00340
00341
00342 if (ncoeff != GetNvar()+1) Log() << kFATAL << "Mismatch in number of output variables/coefficients: "
00343 << ncoeff << " != " << GetNvar()+1 << Endl;
00344
00345
00346 if (fLDCoeff) {
00347 for (vector< vector< Double_t >* >::iterator vi=fLDCoeff->begin(); vi!=fLDCoeff->end(); vi++)
00348 if (*vi) { delete *vi; *vi = 0; }
00349 delete fLDCoeff; fLDCoeff = 0;
00350 }
00351 fLDCoeff = new vector< vector< Double_t >* >(fNRegOut);
00352 for (Int_t ivar = 0; ivar<fNRegOut; ivar++) (*fLDCoeff)[ivar] = new std::vector<Double_t>( ncoeff );
00353
00354 void* ch = gTools().GetChild(wghtnode);
00355 Double_t coeff;
00356 Int_t iout, icoeff;
00357 while (ch) {
00358 gTools().ReadAttr( ch, "IndexOut", iout );
00359 gTools().ReadAttr( ch, "IndexCoeff", icoeff );
00360 gTools().ReadAttr( ch, "Value", coeff );
00361
00362 (*(*fLDCoeff)[iout])[icoeff] = coeff;
00363
00364 ch = gTools().GetNextChild(ch);
00365 }
00366 }
00367
00368
00369 void TMVA::MethodLD::MakeClassSpecific( std::ostream& fout, const TString& className ) const
00370 {
00371
00372 fout << " std::vector<double> fLDCoefficients;" << endl;
00373 fout << "};" << endl;
00374 fout << "" << endl;
00375 fout << "inline void " << className << "::Initialize() " << endl;
00376 fout << "{" << endl;
00377 for (UInt_t ivar=0; ivar<GetNvar()+1; ivar++) {
00378 Int_t dp = fout.precision();
00379 fout << " fLDCoefficients.push_back( "
00380 << std::setprecision(12) << (*(*fLDCoeff)[0])[ivar]
00381 << std::setprecision(dp) << " );" << endl;
00382 }
00383 fout << endl;
00384 fout << " // sanity check" << endl;
00385 fout << " if (fLDCoefficients.size() != fNvars+1) {" << endl;
00386 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\"::Initialize: mismatch in number of input values\"" << endl;
00387 fout << " << fLDCoefficients.size() << \" != \" << fNvars+1 << std::endl;" << endl;
00388 fout << " fStatusIsClean = false;" << endl;
00389 fout << " } " << endl;
00390 fout << "}" << endl;
00391 fout << endl;
00392 fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << endl;
00393 fout << "{" << endl;
00394 fout << " double retval = fLDCoefficients[0];" << endl;
00395 fout << " for (size_t ivar = 1; ivar < fNvars+1; ivar++) {" << endl;
00396 fout << " retval += fLDCoefficients[ivar]*inputValues[ivar-1];" << endl;
00397 fout << " }" << endl;
00398 fout << endl;
00399 fout << " return retval;" << endl;
00400 fout << "}" << endl;
00401 fout << endl;
00402 fout << "// Clean up" << endl;
00403 fout << "inline void " << className << "::Clear() " << endl;
00404 fout << "{" << endl;
00405 fout << " // clear coefficients" << endl;
00406 fout << " fLDCoefficients.clear(); " << endl;
00407 fout << "}" << endl;
00408 }
00409
00410 const TMVA::Ranking* TMVA::MethodLD::CreateRanking()
00411 {
00412
00413
00414
00415 fRanking = new Ranking( GetName(), "Discr. power" );
00416
00417 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00418 fRanking->AddRank( Rank( GetInputLabel(ivar), TMath::Abs((* (*fLDCoeff)[0])[ivar+1] )) );
00419 }
00420
00421 return fRanking;
00422 }
00423
00424
00425 void TMVA::MethodLD::DeclareOptions()
00426 {
00427
00428 AddPreDefVal(TString("LD"));
00429 }
00430
00431
00432 void TMVA::MethodLD::ProcessOptions()
00433 {
00434
00435 if (HasTrainingTree()) InitMatrices();
00436 }
00437
00438
00439 void TMVA::MethodLD::PrintCoefficients( void )
00440 {
00441
00442 Log() << kINFO << "Results for LD coefficients:" << Endl;
00443
00444 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
00445 Log() << kINFO << "NOTE: The coefficients must be applied to TRANFORMED variables" << Endl;
00446 Log() << kINFO << " List of the transformation: " << Endl;
00447 TListIter trIt(&GetTransformationHandler().GetTransformationList());
00448 while (VariableTransformBase *trf = (VariableTransformBase*) trIt() ) {
00449 Log() << kINFO << " -- " << trf->GetName() << Endl;
00450 }
00451 }
00452 std::vector<TString> vars;
00453 std::vector<Double_t> coeffs;
00454 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00455 vars .push_back( GetInputLabel(ivar) );
00456 coeffs.push_back( (* (*fLDCoeff)[0])[ivar+1] );
00457 }
00458 vars .push_back( "(offset)" );
00459 coeffs.push_back((* (*fLDCoeff)[0])[0] );
00460 TMVA::gTools().FormattedOutput( coeffs, vars, "Variable" , "Coefficient", Log() );
00461 if (IsNormalised()) {
00462 Log() << kINFO << "NOTE: You have chosen to use the \"Normalise\" booking option. Hence, the" << Endl;
00463 Log() << kINFO << " coefficients must be applied to NORMALISED (') variables as follows:" << Endl;
00464 Int_t maxL = 0;
00465 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) if (GetInputLabel(ivar).Length() > maxL) maxL = GetInputLabel(ivar).Length();
00466
00467
00468 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00469 Log() << kINFO
00470 << setw(maxL+9) << TString("[") + GetInputLabel(ivar) + "]' = 2*("
00471 << setw(maxL+2) << TString("[") + GetInputLabel(ivar) + "]"
00472 << setw(3) << (GetXmin(ivar) > 0 ? " - " : " + ")
00473 << setw(6) << TMath::Abs(GetXmin(ivar)) << setw(3) << ")/"
00474 << setw(6) << (GetXmax(ivar) - GetXmin(ivar) )
00475 << setw(3) << " - 1"
00476 << Endl;
00477 }
00478 Log() << kINFO << "The TMVA Reader will properly account for this normalisation, but if the" << Endl;
00479 Log() << kINFO << "LD classifier is applied outside the Reader, the transformation must be" << Endl;
00480 Log() << kINFO << "implemented -- or the \"Normalise\" option is removed and LD retrained." << Endl;
00481 Log() << kINFO << Endl;
00482 }
00483 }
00484
00485
00486 void TMVA::MethodLD::GetHelpMessage() const
00487 {
00488
00489
00490
00491
00492 Log() << Endl;
00493 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
00494 Log() << Endl;
00495 Log() << "Linear discriminants select events by distinguishing the mean " << Endl;
00496 Log() << "values of the signal and background distributions in a trans- " << Endl;
00497 Log() << "formed variable space where linear correlations are removed." << Endl;
00498 Log() << "The LD implementation here is equivalent to the \"Fisher\" discriminant" << Endl;
00499 Log() << "for classification, but also provides linear regression." << Endl;
00500 Log() << Endl;
00501 Log() << " (More precisely: the \"linear discriminator\" determines" << Endl;
00502 Log() << " an axis in the (correlated) hyperspace of the input " << Endl;
00503 Log() << " variables such that, when projecting the output classes " << Endl;
00504 Log() << " (signal and background) upon this axis, they are pushed " << Endl;
00505 Log() << " as far as possible away from each other, while events" << Endl;
00506 Log() << " of a same class are confined in a close vicinity. The " << Endl;
00507 Log() << " linearity property of this classifier is reflected in the " << Endl;
00508 Log() << " metric with which \"far apart\" and \"close vicinity\" are " << Endl;
00509 Log() << " determined: the covariance matrix of the discriminating" << Endl;
00510 Log() << " variable space.)" << Endl;
00511 Log() << Endl;
00512 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
00513 Log() << Endl;
00514 Log() << "Optimal performance for the linear discriminant is obtained for " << Endl;
00515 Log() << "linearly correlated Gaussian-distributed variables. Any deviation" << Endl;
00516 Log() << "from this ideal reduces the achievable separation power. In " << Endl;
00517 Log() << "particular, no discrimination at all is achieved for a variable" << Endl;
00518 Log() << "that has the same sample mean for signal and background, even if " << Endl;
00519 Log() << "the shapes of the distributions are very different. Thus, the linear " << Endl;
00520 Log() << "discriminant often benefits from a suitable transformation of the " << Endl;
00521 Log() << "input variables. For example, if a variable x in [-1,1] has a " << Endl;
00522 Log() << "a parabolic signal distributions, and a uniform background" << Endl;
00523 Log() << "distributions, their mean value is zero in both cases, leading " << Endl;
00524 Log() << "to no separation. The simple transformation x -> |x| renders this " << Endl;
00525 Log() << "variable powerful for the use in a linear discriminant." << Endl;
00526 Log() << Endl;
00527 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
00528 Log() << Endl;
00529 Log() << "<None>" << Endl;
00530 }