00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <iostream>
00028 #include <iomanip>
00029
00030 #include "TVectorF.h"
00031 #include "TVectorD.h"
00032 #include "TMatrixD.h"
00033 #include "TMatrixDBase.h"
00034
00035 #include "TMVA/VariablePCATransform.h"
00036
00037 #ifndef ROOT_TMVA_MsgLogger
00038 #include "TMVA/MsgLogger.h"
00039 #endif
00040 #include "TMVA/DataSet.h"
00041 #include "TMVA/Tools.h"
00042
00043 ClassImp(TMVA::VariablePCATransform)
00044
00045
00046 TMVA::VariablePCATransform::VariablePCATransform( DataSetInfo& dsi )
00047 : VariableTransformBase( dsi, Types::kPCA, "PCA" )
00048 {
00049
00050 }
00051
00052
00053 TMVA::VariablePCATransform::~VariablePCATransform()
00054 {
00055
00056 for (UInt_t i=0; i<fMeanValues.size(); i++) {
00057 if (fMeanValues.at(i) != 0) delete fMeanValues.at(i);
00058 if (fEigenVectors.at(i) != 0) delete fEigenVectors.at(i);
00059 }
00060 }
00061
00062
00063 void TMVA::VariablePCATransform::Initialize()
00064 {
00065
00066
00067
00068
00069 }
00070
00071
00072 Bool_t TMVA::VariablePCATransform::PrepareTransformation( const std::vector<Event*>& events )
00073 {
00074
00075
00076 Initialize();
00077
00078 if (!IsEnabled() || IsCreated()) return kTRUE;
00079
00080 Log() << kINFO << "Preparing the Principle Component (PCA) transformation..." << Endl;
00081
00082 SetNVariables(events[0]->GetNVariables());
00083
00084
00085 if (GetNVariables() <= 1) {
00086 Log() << kINFO << "Cannot perform PCA transformation for " << GetNVariables() << " variable only" << Endl;
00087 return kFALSE;
00088 }
00089
00090 if (GetNVariables() > 200) {
00091 Log() << kINFO << "----------------------------------------------------------------------------"
00092 << Endl;
00093 Log() << kINFO
00094 << ": More than 200 variables, will not calculate PCA!" << Endl;
00095 Log() << kINFO << "----------------------------------------------------------------------------"
00096 << Endl;
00097 return kFALSE;
00098 }
00099
00100 CalculatePrincipalComponents( events );
00101
00102 SetCreated( kTRUE );
00103
00104 return kTRUE;
00105 }
00106
00107
00108 const TMVA::Event* TMVA::VariablePCATransform::Transform( const Event* const ev, Int_t cls ) const
00109 {
00110
00111 if (!IsCreated()) return 0;
00112
00113 const Int_t nvar = ev->GetNVariables();
00114
00115
00116
00117
00118
00119
00120 if (cls < 0 || cls >= (int) fMeanValues.size()) cls = fMeanValues.size()-1;
00121
00122
00123
00124
00125 if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
00126 if(fTransformedEvent!=0) delete fTransformedEvent;
00127 fTransformedEvent = new Event();
00128 }
00129
00130
00131
00132 const std::vector<UInt_t>* varArrange = ev->GetVariableArrangement();
00133
00134 if(!varArrange) {
00135 std::vector<Float_t> rv = X2P( ev->GetValues(), cls );
00136 for (Int_t ivar=0; ivar<nvar; ++ivar)
00137 fTransformedEvent->SetVal(ivar, rv[ivar]);
00138 } else {
00139 std::vector<Float_t> rv(nvar);
00140 for (Int_t ivar=0; ivar<nvar; ++ivar)
00141 rv[ivar] = ev->GetValue(ivar);
00142 rv = X2P( rv, cls );
00143 for (Int_t ivar=0; ivar<nvar; ++ivar)
00144 fTransformedEvent->SetVal(ivar, rv[ivar]);
00145 }
00146
00147 for (UInt_t itgt=0; itgt<ev->GetNTargets(); itgt++)
00148 fTransformedEvent->SetTarget( itgt, ev->GetTarget(itgt) );
00149
00150 fTransformedEvent->SetWeight ( ev->GetWeight() );
00151 fTransformedEvent->SetBoostWeight( ev->GetBoostWeight() );
00152 fTransformedEvent->SetClass ( ev->GetClass() );
00153
00154 return fTransformedEvent;
00155 }
00156
00157
00158 const TMVA::Event* TMVA::VariablePCATransform::InverseTransform( const Event* const ev, Int_t cls ) const
00159 {
00160
00161
00162 Log() << kFATAL << "Inverse transformation for PCA transformation not yet implemented. Hence, this transformation cannot be applied together with regression. Please contact the authors if necessary." << Endl;
00163
00164 if (!IsCreated()) return 0;
00165 const Int_t nvar = ev->GetNVariables();
00166
00167
00168
00169
00170 if (cls < 0 || cls > GetNClasses()) cls = ( GetNClasses() == 1 ? 0 : 1 );
00171
00172
00173
00174 std::vector<Float_t> rv = X2P( ev->GetValues(), cls );
00175
00176 if (fBackTransformedEvent==0 || fBackTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
00177 if(fBackTransformedEvent!=0) delete fBackTransformedEvent;
00178 fBackTransformedEvent = new Event( *ev );
00179 }
00180 for (Int_t ivar=0; ivar<nvar; ivar++) fBackTransformedEvent->SetVal(ivar, rv[ivar]);
00181 fBackTransformedEvent->SetClass ( ev->GetClass() );
00182 fBackTransformedEvent->SetWeight ( ev->GetWeight() );
00183 fBackTransformedEvent->SetBoostWeight( ev->GetBoostWeight() );
00184
00185 return fBackTransformedEvent;
00186 }
00187
00188
00189 void TMVA::VariablePCATransform::CalculatePrincipalComponents( const std::vector<Event*>& events )
00190 {
00191
00192
00193
00194 const Int_t nvar = GetNVariables();
00195
00196
00197 const UInt_t maxPCA = (GetNClasses()<=1) ? GetNClasses() : GetNClasses()+1;
00198
00199
00200 std::vector<TPrincipal*> pca(maxPCA);
00201 for (UInt_t i=0; i<maxPCA; i++) pca[i] = new TPrincipal(nvar,"");
00202
00203
00204
00205 Long64_t ievt, entries = events.size();
00206 Double_t *dvec = new Double_t[nvar];
00207
00208 for (ievt=0; ievt<entries; ievt++) {
00209 Event* ev = events[ievt];
00210 for (Int_t i = 0; i < nvar; i++) dvec[i] = (Double_t) ev->GetValue(i);
00211 pca.at(ev->GetClass())->AddRow( dvec );
00212 if (GetNClasses() > 1) pca.at(maxPCA-1)->AddRow( dvec );
00213 }
00214
00215
00216 for (UInt_t i=0; i<fMeanValues.size(); i++) if (fMeanValues[i] != 0) delete fMeanValues[i];
00217 for (UInt_t i=0; i<fEigenVectors.size(); i++) if (fEigenVectors[i] != 0) delete fEigenVectors[i];
00218 fMeanValues.resize(maxPCA,0);
00219 fEigenVectors.resize(maxPCA,0);
00220
00221 for (UInt_t i=0; i<maxPCA; i++ ) {
00222 pca.at(i)->MakePrincipals();
00223
00224
00225 fMeanValues[i] = new TVectorD( *(pca.at(i)->GetMeanValues()) );
00226 fEigenVectors[i] = new TMatrixD( *(pca.at(i)->GetEigenVectors()) );
00227 }
00228
00229 for (UInt_t i=0; i<maxPCA; i++) delete pca.at(i);
00230 delete [] dvec;
00231 }
00232
00233
00234 std::vector<Float_t> TMVA::VariablePCATransform::X2P( const std::vector<Float_t>& x, Int_t cls ) const
00235 {
00236
00237
00238
00239
00240 const Int_t nvar = x.size();
00241 std::vector<Float_t> p(nvar,0);
00242
00243 for (Int_t i = 0; i < nvar; i++) {
00244 Double_t pv = 0;
00245 for (Int_t j = 0; j < nvar; j++)
00246 pv += (((Double_t)x.at(j)) - (*fMeanValues.at(cls))(j)) * (*fEigenVectors.at(cls))(j,i);
00247 p[i] = pv;
00248 }
00249 return p;
00250 }
00251
00252
00253 void TMVA::VariablePCATransform::WriteTransformationToStream( std::ostream& o ) const
00254 {
00255
00256 for (Int_t sbType=0; sbType<2; sbType++) {
00257 o << "# PCA mean values " << std::endl;
00258 const TVectorD* means = fMeanValues[sbType];
00259 o << (sbType==0 ? "Signal" : "Background") << " " << means->GetNrows() << std::endl;
00260 for (Int_t row = 0; row<means->GetNrows(); row++) {
00261 o << std::setprecision(12) << std::setw(20) << (*means)[row];
00262 }
00263 o << std::endl;
00264 }
00265 o << "##" << std::endl;
00266
00267
00268 for (Int_t sbType=0; sbType<2; sbType++) {
00269 o << "# PCA eigenvectors " << std::endl;
00270 const TMatrixD* mat = fEigenVectors[sbType];
00271 o << (sbType==0 ? "Signal" : "Background") << " " << mat->GetNrows() << " x " << mat->GetNcols() << std::endl;
00272 for (Int_t row = 0; row<mat->GetNrows(); row++) {
00273 for (Int_t col = 0; col<mat->GetNcols(); col++) {
00274 o << std::setprecision(12) << std::setw(20) << (*mat)[row][col] << " ";
00275 }
00276 o << std::endl;
00277 }
00278 }
00279 o << "##" << std::endl;
00280 }
00281
00282
00283 void TMVA::VariablePCATransform::AttachXMLTo(void* parent) {
00284
00285
00286 void* trfxml = gTools().AddChild(parent, "Transform");
00287 gTools().AddAttr(trfxml, "Name", "PCA");
00288
00289
00290 for (UInt_t sbType=0; sbType<fMeanValues.size(); sbType++) {
00291 void* meanxml = gTools().AddChild( trfxml, "Statistics");
00292 const TVectorD* means = fMeanValues[sbType];
00293 gTools().AddAttr( meanxml, "Class", (sbType==0 ? "Signal" :(sbType==1 ? "Background":"Combined")) );
00294 gTools().AddAttr( meanxml, "ClassIndex", sbType );
00295 gTools().AddAttr( meanxml, "NRows", means->GetNrows() );
00296 TString meansdef = "";
00297 for (Int_t row = 0; row<means->GetNrows(); row++)
00298 meansdef += gTools().StringFromDouble((*means)[row]) + " ";
00299 gTools().AddRawLine( meanxml, meansdef );
00300 }
00301
00302
00303 for (UInt_t sbType=0; sbType<fEigenVectors.size(); sbType++) {
00304 void* evxml = gTools().AddChild( trfxml, "Eigenvectors");
00305 const TMatrixD* mat = fEigenVectors[sbType];
00306 gTools().AddAttr( evxml, "Class", (sbType==0 ? "Signal" :(sbType==1 ? "Background":"Combined") ) );
00307 gTools().AddAttr( evxml, "ClassIndex", sbType );
00308 gTools().AddAttr( evxml, "NRows", mat->GetNrows() );
00309 gTools().AddAttr( evxml, "NCols", mat->GetNcols() );
00310 TString evdef = "";
00311 for (Int_t row = 0; row<mat->GetNrows(); row++)
00312 for (Int_t col = 0; col<mat->GetNcols(); col++)
00313 evdef += gTools().StringFromDouble((*mat)[row][col]) + " ";
00314 gTools().AddRawLine( evxml, evdef );
00315 }
00316 }
00317
00318
00319 void TMVA::VariablePCATransform::ReadFromXML( void* trfnode )
00320 {
00321
00322
00323 Int_t nrows, ncols;
00324 UInt_t clsIdx;
00325 TString classtype;
00326 TString nodeName;
00327
00328 void* ch = gTools().GetChild(trfnode);
00329 while (ch) {
00330 nodeName = gTools().GetName(ch);
00331 if (nodeName == "Statistics") {
00332
00333 gTools().ReadAttr(ch, "Class", classtype);
00334 gTools().ReadAttr(ch, "ClassIndex", clsIdx);
00335 gTools().ReadAttr(ch, "NRows", nrows);
00336
00337
00338 if (fMeanValues.size()<=clsIdx) fMeanValues.resize(clsIdx+1,0);
00339 if (fMeanValues[clsIdx]==0) fMeanValues[clsIdx] = new TVectorD( nrows );
00340 fMeanValues[clsIdx]->ResizeTo( nrows );
00341
00342
00343 std::stringstream s(gTools().GetContent(ch));
00344 for (Int_t row = 0; row<nrows; row++) s >> (*fMeanValues[clsIdx])(row);
00345 }
00346 else if ( nodeName == "Eigenvectors" ) {
00347
00348 gTools().ReadAttr(ch, "Class", classtype);
00349 gTools().ReadAttr(ch, "ClassIndex", clsIdx);
00350 gTools().ReadAttr(ch, "NRows", nrows);
00351 gTools().ReadAttr(ch, "NCols", ncols);
00352
00353 if (fEigenVectors.size()<=clsIdx) fEigenVectors.resize(clsIdx+1,0);
00354 if (fEigenVectors[clsIdx]==0) fEigenVectors[clsIdx] = new TMatrixD( nrows, ncols );
00355 fEigenVectors[clsIdx]->ResizeTo( nrows, ncols );
00356
00357
00358 std::stringstream s(gTools().GetContent(ch));
00359 for (Int_t row = 0; row<nrows; row++)
00360 for (Int_t col = 0; col<ncols; col++)
00361 s >> (*fEigenVectors[clsIdx])[row][col];
00362 }
00363 ch = gTools().GetNextChild(ch);
00364 }
00365
00366 SetCreated();
00367 }
00368
00369
00370 void TMVA::VariablePCATransform::ReadTransformationFromStream( std::istream& istr, const TString& classname )
00371 {
00372
00373 char buf[512];
00374 istr.getline(buf,512);
00375 TString strvar, dummy;
00376 Int_t nrows(0), ncols(0);
00377 UInt_t classIdx=(classname=="signal"?0:1);
00378
00379 for (UInt_t i=0; i<fMeanValues.size(); i++) {
00380 if (fMeanValues.at(i) != 0) delete fMeanValues.at(i);
00381 if (fEigenVectors.at(i) != 0) delete fEigenVectors.at(i);
00382 }
00383 fMeanValues.resize(3);
00384 fEigenVectors.resize(3);
00385
00386 Log() << kINFO << "VariablePCATransform::ReadTransformationFromStream(): " << Endl;
00387
00388 while (!(buf[0]=='#'&& buf[1]=='#')) {
00389 char* p = buf;
00390 while (*p==' ' || *p=='\t') p++;
00391 if (*p=='#' || *p=='\0') {
00392 istr.getline(buf,512);
00393 continue;
00394 }
00395 std::stringstream sstr(buf);
00396 sstr >> strvar;
00397 if (strvar=="signal" || strvar=="background") {
00398
00399 sstr >> nrows;
00400 Int_t sbType = (strvar=="signal" ? 0 : 1);
00401
00402 if (fMeanValues[sbType] == 0) fMeanValues[sbType] = new TVectorD( nrows );
00403 else fMeanValues[sbType]->ResizeTo( nrows );
00404
00405
00406 for (Int_t row = 0; row<nrows; row++) istr >> (*fMeanValues[sbType])(row);
00407
00408 }
00409
00410 istr.getline(buf,512);
00411 }
00412
00413
00414 istr.getline(buf,512);
00415 while (!(buf[0]=='#'&& buf[1]=='#')) {
00416 char* p = buf;
00417 while(*p==' ' || *p=='\t') p++;
00418 if (*p=='#' || *p=='\0') {
00419 istr.getline(buf,512);
00420 continue;
00421 }
00422 std::stringstream sstr(buf);
00423 sstr >> strvar;
00424 if (strvar=="signal" || strvar=="background") {
00425
00426
00427 sstr >> nrows >> dummy >> ncols;
00428 Int_t sbType = (strvar=="signal" ? 0 : 1);
00429
00430 if (fEigenVectors[sbType] == 0) fEigenVectors[sbType] = new TMatrixD( nrows, ncols );
00431 else fEigenVectors[sbType]->ResizeTo( nrows, ncols );
00432
00433
00434 for (Int_t row = 0; row<fEigenVectors[sbType]->GetNrows(); row++) {
00435 for (Int_t col = 0; col<fEigenVectors[sbType]->GetNcols(); col++) {
00436 istr >> (*fEigenVectors[sbType])[row][col];
00437 }
00438 }
00439
00440 }
00441 istr.getline(buf,512);
00442 }
00443 fMeanValues[2] = new TVectorD( *fMeanValues[classIdx] );
00444 fEigenVectors[2] = new TMatrixD( *fEigenVectors[classIdx] );
00445
00446 SetCreated();
00447 }
00448
00449
00450 void TMVA::VariablePCATransform::MakeFunction( std::ostream& fout, const TString& fcncName,
00451 Int_t part, UInt_t trCounter, Int_t )
00452 {
00453
00454
00455 UInt_t nvar = fEigenVectors[0]->GetNrows();
00456
00457
00458 UInt_t numC = fMeanValues.size();
00459 if (part==1) {
00460 fout << std::endl;
00461 fout << " void X2P_"<<trCounter<<"( const double*, double*, int ) const;" << std::endl;
00462 fout << " double fMeanValues_"<<trCounter<<"["<<numC<<"]["
00463 << fMeanValues[0]->GetNrows() << "];" << std::endl;
00464 fout << " double fEigenVectors_"<<trCounter<<"["<<numC<<"]["
00465 << fEigenVectors[0]->GetNrows() << "]["
00466 << fEigenVectors[0]->GetNcols() <<"];" << std::endl;
00467 fout << std::endl;
00468 }
00469
00470
00471 if (numC>1){
00472 if (fMeanValues[0]->GetNrows() != fMeanValues[1]->GetNrows() ||
00473 fEigenVectors[0]->GetNrows() != fEigenVectors[1]->GetNrows() ||
00474 fEigenVectors[0]->GetNcols() != fEigenVectors[1]->GetNcols()) {
00475 Log() << kFATAL << "<MakeFunction> Mismatch in vector/matrix dimensions" << Endl;
00476 }
00477 }
00478
00479 if (part==2) {
00480
00481 fout << std::endl;
00482 fout << "//_______________________________________________________________________" << std::endl;
00483 fout << "inline void " << fcncName << "::X2P_"<<trCounter<<"( const double* x, double* p, int index ) const" << std::endl;
00484 fout << "{" << std::endl;
00485 fout << " // Calculate the principal components from the original data vector" << std::endl;
00486 fout << " // x, and return it in p (function extracted from TPrincipal::X2P)" << std::endl;
00487 fout << " // It's the users responsibility to make sure that both x and p are" << std::endl;
00488 fout << " // of the right size (i.e., memory must be allocated for p)." << std::endl;
00489 fout << " const int nvar = " << nvar << ";" << std::endl;
00490 fout << std::endl;
00491 fout << " for (int i = 0; i < nvar; i++) {" << std::endl;
00492 fout << " p[i] = 0;" << std::endl;
00493 fout << " for (int j = 0; j < nvar; j++) p[i] += (x[j] - fMeanValues_"<<trCounter<<"[index][j]) * fEigenVectors_"<<trCounter<<"[index][j][i];" << std::endl;
00494 fout << " }" << std::endl;
00495 fout << "}" << std::endl;
00496 fout << std::endl;
00497 fout << "//_______________________________________________________________________" << std::endl;
00498 fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
00499 fout << "{" << std::endl;
00500
00501
00502 fout << " // initialise vector of mean values" << std::endl;
00503 std::streamsize dp = fout.precision();
00504 for (UInt_t index=0; index<numC; index++) {
00505 for (int i=0; i<fMeanValues[index]->GetNrows(); i++) {
00506 fout << " fMeanValues_"<<trCounter<<"["<<index<<"]["<<i<<"] = " << std::setprecision(12)
00507 << (*fMeanValues[index])(i) << ";" << std::endl;
00508 }
00509 }
00510
00511
00512 fout << std::endl;
00513 fout << " // initialise matrix of eigenvectors" << std::endl;
00514 for (UInt_t index=0; index<numC; index++) {
00515 for (int i=0; i<fEigenVectors[index]->GetNrows(); i++) {
00516 for (int j=0; j<fEigenVectors[index]->GetNcols(); j++) {
00517 fout << " fEigenVectors_"<<trCounter<<"["<<index<<"]["<<i<<"]["<<j<<"] = " << std::setprecision(12)
00518 << (*fEigenVectors[index])(i,j) << ";" << std::endl;
00519 }
00520 }
00521 }
00522 fout << std::setprecision(dp);
00523 fout << "}" << std::endl;
00524 fout << std::endl;
00525 fout << "//_______________________________________________________________________" << std::endl;
00526 fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int cls ) const" << std::endl;
00527 fout << "{" << std::endl;
00528 fout << " const int nvar = " << nvar << ";" << std::endl;
00529 fout << " double *dv = new double[nvar];" << std::endl;
00530 fout << " double *rv = new double[nvar];" << std::endl;
00531 fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
00532 fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
00533 fout << " else cls = "<<(numC==1?0:2)<<";"<< std::endl;
00534 fout << " }"<< std::endl;
00535 fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[ivar];" << std::endl;
00536 fout << std::endl;
00537 fout << " // Perform PCA and put it into PCAed events tree" << std::endl;
00538 fout << " this->X2P_"<<trCounter<<"( dv, rv, cls );" << std::endl;
00539 fout << " for (int ivar=0; ivar<nvar; ivar++) iv[ivar] = rv[ivar];" << std::endl;
00540 fout << std::endl;
00541 fout << " delete [] dv;" << std::endl;
00542 fout << " delete [] rv;" << std::endl;
00543 fout << "}" << std::endl;
00544 }
00545 }