ROOT_528-00b_version: tmva/src/VariableGaussTransform.cxx Source File

00001 // @(#)root/tmva $Id: VariableGaussTransform.cxx 37986 2011-02-04 21:42:15Z pcanal $
00002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : VariableGaussTransform                                                *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Implementation (see header for description)                               *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
00015  *      Joerg Stelzer   <Joerg.Stelzer@cern.ch>  - CERN, Switzerland              *
00016  *      Eckhard v. Toerne     <evt@uni-bonn.de>  - Uni Bonn, Germany              *
00017  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
00018  *                                                                                *
00019  * Copyright (c) 2005:                                                            *
00020  *      CERN, Switzerland                                                         *
00021  *      MPI-K Heidelberg, Germany                                                 *
00022  *                                                                                *
00023  * Redistribution and use in source and binary forms, with or without             *
00024  * modification, are permitted according to the terms listed in LICENSE           *
00025  * (http://tmva.sourceforge.net/LICENSE)                                          *
00026  **********************************************************************************/
00027 
00028 ///////////////////////////////////////////////////////////////////////////
00029 //                                                                       //
00030 // Gaussian Transformation of input variables.                           //
00031 //                                                                       //
00032 ///////////////////////////////////////////////////////////////////////////
00033 
00034 #include <iostream>
00035 #include <iomanip>
00036 #include <list>
00037 #include <limits>
00038 
00039 #include "TVectorF.h"
00040 #include "TVectorD.h"
00041 #include "TMath.h"
00042 #include "TCanvas.h"
00043 
00044 #include "TMVA/VariableGaussTransform.h"
00045 #ifndef ROOT_TMVA_MsgLogger
00046 #include "TMVA/MsgLogger.h"
00047 #endif
00048 #ifndef ROOT_TMVA_Tools
00049 #include "TMVA/Tools.h"
00050 #endif
00051 #ifndef ROOT_TMVA_Version
00052 #include "TMVA/Version.h"
00053 #endif
00054 
00055 ClassImp(TMVA::VariableGaussTransform)
00056 
00057 //_______________________________________________________________________
00058 TMVA::VariableGaussTransform::VariableGaussTransform( DataSetInfo& dsi, TString strcor )
00059    : VariableTransformBase( dsi, Types::kGauss, "Gauss" ),
00060      fFlatNotGauss(kFALSE),
00061      fPdfMinSmooth(0),
00062      fPdfMaxSmooth(0),
00063      fElementsperbin(0)
00064 { 
00065    // constructor
00066    // can only be applied one after the other when they are created. But in order to
00067    // determine the Gauss transformation
00068   if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
00069     SetName("Uniform");
00070   }
00071 }
00072 
00073 //_______________________________________________________________________
00074 TMVA::VariableGaussTransform::~VariableGaussTransform( void )
00075 {
00076    // destructor
00077    CleanUpCumulativeArrays();
00078 }
00079 
00080 //_______________________________________________________________________
00081 void TMVA::VariableGaussTransform::Initialize()
00082 {
00083 
00084 }
00085 
00086 //_______________________________________________________________________
00087 Bool_t TMVA::VariableGaussTransform::PrepareTransformation( const std::vector<Event*>& events )
00088 {
00089    // calculate the cumulative distributions
00090    Initialize();
00091 
00092    if (!IsEnabled() || IsCreated()) return kTRUE;
00093 
00094    Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
00095 
00096    SetNVariables(events[0]->GetNVariables());
00097 
00098    if (GetNVariables() > 200) { 
00099       Log() << kWARNING << "----------------------------------------------------------------------------" 
00100               << Endl;
00101       Log() << kWARNING 
00102               << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
00103       Log() << kWARNING << "----------------------------------------------------------------------------" 
00104               << Endl;
00105       //      return kFALSE;
00106    }   
00107 
00108    GetCumulativeDist( events );
00109    
00110    SetCreated( kTRUE );
00111 
00112    return kTRUE;
00113 }
00114 
00115 //_______________________________________________________________________
00116 const TMVA::Event* TMVA::VariableGaussTransform::Transform(const Event* const ev, Int_t cls ) const
00117 {
00118    // apply the Gauss transformation
00119 
00120    if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
00121    //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...) 
00122    //EVT if (cls <0 || cls > GetNClasses() ) {
00123    //EVT   cls = GetNClasses();
00124    //EVT   if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
00125    //EVT}
00126    if (cls <0 || cls >=  (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
00127    //EVT workaround end
00128  
00129   // get the variable vector of the current event
00130    const UInt_t nvar = GetNVariables();
00131    TVectorD vec( nvar );
00132    for (UInt_t ivar=0; ivar<nvar; ivar++) vec(ivar) = ev->GetValue(ivar);
00133    Double_t cumulant;
00134    //transformation   
00135    for (UInt_t ivar=0; ivar<nvar; ivar++) {
00136       if (0 != fCumulativePDF[ivar][cls]) { 
00137          // first make it flat
00138          if(fTMVAVersion>TMVA_VERSION(3,9,7))
00139             cumulant = (fCumulativePDF[ivar][cls])->GetVal(vec(ivar)); 
00140          else
00141             cumulant = OldCumulant(vec(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
00142          cumulant = TMath::Min(cumulant,1.-10e-10);
00143          cumulant = TMath::Max(cumulant,0.+10e-10);
00144 
00145          if (fFlatNotGauss)
00146             vec(ivar) = cumulant; 
00147          else {
00148             // sanity correction for out-of-range values
00149             Double_t maxErfInvArgRange = 0.99999999;
00150             Double_t arg = 2.0*cumulant - 1.0;
00151             arg = TMath::Min(+maxErfInvArgRange,arg);
00152             arg = TMath::Max(-maxErfInvArgRange,arg);
00153             
00154             vec(ivar) = 1.414213562*TMath::ErfInverse(arg);
00155          }
00156       }
00157    }
00158    
00159    if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
00160       if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; }
00161       fTransformedEvent = new Event();
00162    }
00163 
00164    for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) fTransformedEvent->SetTarget( itgt, ev->GetTarget(itgt) );
00165    for (UInt_t ivar=0; ivar<nvar; ivar++)                  fTransformedEvent->SetVal   ( ivar, vec(ivar) );
00166 
00167    fTransformedEvent->SetWeight     ( ev->GetWeight() );
00168    fTransformedEvent->SetBoostWeight( ev->GetBoostWeight() );
00169    fTransformedEvent->SetClass      ( ev->GetClass() );
00170 
00171    return fTransformedEvent;
00172 }
00173 
00174 //_______________________________________________________________________
00175 const TMVA::Event* TMVA::VariableGaussTransform::InverseTransform( const Event* const ev, Int_t cls ) const
00176 {
00177    // apply the Gauss transformation
00178    // TODO: implementation of inverse transformation
00179    Log() << kFATAL << "Inverse transformation for Gauss transformation not yet implemented. Hence, this transformation cannot be applied together with regression. Please contact the authors if necessary." << Endl;
00180 
00181    if (!IsCreated())
00182       Log() << kFATAL << "Transformation not yet created" 
00183               << Endl;
00184 
00185    if (cls <0 || cls >= GetNClasses() ) cls = GetNClasses();
00186    if (GetNClasses() == 1 ) cls = 0;
00187 
00188    // get the variable vector of the current event
00189    const UInt_t nvar = GetNVariables();
00190    TVectorD vec( nvar );
00191    for (UInt_t ivar=0; ivar<nvar; ivar++) vec(ivar) = ev->GetValue(ivar);
00192    for (UInt_t ivar=0; ivar<nvar; ivar++) {
00193       if (0 != fCumulativeDist[ivar][cls]) { 
00194          // first make it flat
00195          Int_t    thebin   = (fCumulativeDist[ivar][cls])->FindBin(vec(ivar));
00196          Double_t cumulant = (fCumulativeDist[ivar][cls])->GetBinContent(thebin);
00197          // now transfor to a gaussian
00198          // actually, the "sqrt(2) is not really necessary, who cares if it is totally normalised
00199          //            vec(ivar) =  TMath::Sqrt(2.)*TMath::ErfInverse(2*sum - 1);
00200          if (fFlatNotGauss) vec(ivar) = cumulant; 
00201          else {
00202             // sanity correction for out-of-range values
00203             Double_t maxErfInvArgRange = 0.99999999;
00204             Double_t arg = 2.0*cumulant - 1.0;
00205             arg = TMath::Min(+maxErfInvArgRange,arg);
00206             arg = TMath::Max(-maxErfInvArgRange,arg);
00207             
00208             vec(ivar) = 1.414213562*TMath::ErfInverse(arg);
00209          }
00210       }
00211    }
00212    
00213    if (fBackTransformedEvent==0 || fBackTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
00214       if (fBackTransformedEvent!=0) { delete fBackTransformedEvent; fBackTransformedEvent = 0; }
00215       fBackTransformedEvent = new Event( *ev );
00216    }
00217 
00218    for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) fBackTransformedEvent->SetTarget( itgt, ev->GetTarget(itgt) );
00219    for (UInt_t ivar=0; ivar<nvar; ivar++)                  fBackTransformedEvent->SetVal   ( ivar, vec(ivar) );
00220 
00221    fBackTransformedEvent->SetWeight     ( ev->GetWeight() );
00222    fBackTransformedEvent->SetBoostWeight( ev->GetBoostWeight() );
00223    fBackTransformedEvent->SetClass      ( ev->GetClass() );
00224 
00225    return fBackTransformedEvent;
00226 }
00227 
00228 //_______________________________________________________________________
00229 void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector<Event*>& events )
00230 {
00231    // fill the cumulative distributions
00232 
00233    const UInt_t nvar = GetNVariables();
00234    UInt_t nevt = events.size();
00235    
00236    const UInt_t nClasses = GetNClasses();
00237    UInt_t numDist  = nClasses+1; // calculate cumulative distributions for all "event" classes seperately + one where all classes are treated (added) together
00238       
00239    if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence 
00240  
00241    UInt_t **nbins = new UInt_t*[numDist];
00242 
00243    std::list< TMVA::TMVAGaussPair >  **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
00244    std::vector< Float_t >   **vsForBinning = new std::vector<Float_t>* [numDist];
00245    for (UInt_t i=0; i < numDist; i++) {
00246       listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [nvar];
00247       vsForBinning[i]    = new std::vector<Float_t> [nvar];
00248       nbins[i] = new UInt_t[nvar];  // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
00249    }
00250 
00251 
00252    // perform event loop
00253    Float_t *sumOfWeights = new Float_t[numDist]; 
00254    Float_t *minWeight = new Float_t[numDist]; 
00255    Float_t *maxWeight = new Float_t[numDist]; 
00256    for (UInt_t i=0; i<numDist; i++) {
00257       sumOfWeights[i]=0;
00258       minWeight[i]=10E10;
00259       maxWeight[i]=0;
00260    }
00261    for (UInt_t ievt=0; ievt < nevt; ievt++) {
00262       const Event* ev= events[ievt];
00263       Int_t cls = ev->GetClass();
00264       sumOfWeights[cls] += ev->GetWeight();
00265       if (minWeight[cls] > ev->GetWeight()) minWeight[cls]=ev->GetWeight();
00266       if (maxWeight[cls] < ev->GetWeight()) maxWeight[cls]=ev->GetWeight();
00267       if (numDist>1) sumOfWeights[numDist-1] += ev->GetWeight();
00268       for (UInt_t ivar=0; ivar<nvar; ivar++) {
00269          listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(ev->GetValue(ivar),ev->GetWeight()));  
00270          if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(ev->GetValue(ivar),ev->GetWeight()));  
00271       }  
00272    }
00273    if (numDist > 1) {
00274       for (UInt_t icl=0; icl<numDist-1; icl++){
00275          minWeight[numDist-1] = TMath::Min(minWeight[icl],minWeight[numDist-1]);
00276          maxWeight[numDist-1] = TMath::Max(maxWeight[icl],maxWeight[numDist-1]);
00277       }
00278    }
00279 
00280    // Sorting the lists, getting nbins ...
00281    const UInt_t nevmin=10;  // minimum number of events per bin (to make sure we get reasonable distributions)
00282    const UInt_t nbinsmax=2000; // maximum number of bins
00283 
00284    for (UInt_t icl=0; icl< numDist; icl++){
00285       for (UInt_t ivar=0; ivar<nvar; ivar++) {
00286          listsForBinning[icl][ivar].sort();  
00287          std::list< TMVA::TMVAGaussPair >::iterator it;
00288          Float_t sumPerBin = sumOfWeights[icl]/nbinsmax;
00289          sumPerBin=TMath::Max(minWeight[icl]*nevmin,sumPerBin);
00290          Float_t sum=0;
00291          Float_t ev_value=listsForBinning[icl][ivar].begin()->GetValue();
00292          Float_t lastev_value=ev_value;
00293          const Float_t eps = 1.e-4;
00294          vsForBinning[icl][ivar].push_back(ev_value-eps);
00295          vsForBinning[icl][ivar].push_back(ev_value);
00296 
00297          for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); it++){
00298             sum+= it->GetWeight();
00299             if (sum >= sumPerBin) {
00300                ev_value=it->GetValue();
00301                if (ev_value>lastev_value) {   // protection against bin width of 0
00302                   vsForBinning[icl][ivar].push_back(ev_value);
00303                   sum = 0.;
00304                   lastev_value=ev_value;
00305                }
00306             }
00307          }
00308          if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue()); 
00309          nbins[icl][ivar] = vsForBinning[icl][ivar].size();
00310       }
00311    }
00312 
00313    delete[] sumOfWeights;
00314    delete[] minWeight;
00315    delete[] maxWeight;
00316 
00317    // create histogram for the cumulative distribution.
00318    fCumulativeDist.resize(nvar);
00319    for (UInt_t icls = 0; icls < numDist; icls++) {
00320       for (UInt_t ivar=0; ivar < nvar; ivar++){
00321          Float_t* binnings = new Float_t[nbins[icls][ivar]];
00322          //the binning for this particular histogram:
00323          for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
00324             binnings[k] = vsForBinning[icls][ivar][k];
00325          }
00326          fCumulativeDist[ivar].resize(numDist);         
00327          if (0 != fCumulativeDist[ivar][icls] ) {
00328             delete fCumulativeDist[ivar][icls]; 
00329          }
00330          fCumulativeDist[ivar][icls] = new TH1F(Form("Cumulative_Var%d_cls%d",ivar,icls),
00331                                                 Form("Cumulative_Var%d_cls%d",ivar,icls),
00332                                                 nbins[icls][ivar] -1, // class icls
00333                                                 binnings);
00334          fCumulativeDist[ivar][icls]->SetDirectory(0);
00335          delete [] binnings;
00336       }
00337    }
00338    
00339    // Deallocation
00340    for (UInt_t i=0; i<numDist; i++) {
00341       delete [] listsForBinning[numDist-i-1];
00342       delete [] vsForBinning[numDist-i-1];
00343       delete [] nbins[numDist-i-1];
00344    }
00345    delete [] listsForBinning;
00346    delete [] vsForBinning;
00347    delete [] nbins;
00348 
00349    // perform event loop
00350    std::vector<Int_t> ic(numDist);
00351    for (UInt_t ievt=0; ievt<nevt; ievt++) {
00352       
00353       const Event* ev= events[ievt];
00354       Int_t cls = ev->GetClass();
00355       
00356       for (UInt_t ivar=0; ivar<nvar; ivar++) {
00357          fCumulativeDist[ivar][cls]->Fill(ev->GetValue(ivar),ev->GetWeight());;               
00358          if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(ev->GetValue(ivar),ev->GetWeight());;               
00359       }
00360    }         
00361    
00362    // clean up 
00363    CleanUpCumulativeArrays("PDF");
00364 
00365    // now sum up in order to get the real cumulative distribution   
00366    Double_t  sum = 0, total=0;
00367    for (UInt_t ivar=0; ivar<nvar; ivar++) {
00368       fCumulativePDF.resize(ivar+1);
00369       for (UInt_t icls=0; icls<numDist; icls++) {      
00370          (fCumulativeDist[ivar][icls])->Smooth(); 
00371          sum = 0;
00372          total = 0.;
00373          for (Int_t ibin=1; ibin <=fCumulativeDist[ivar][icls]->GetNbinsX() ; ibin++){
00374             Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
00375             if (val>0) total += val;
00376          }
00377          for (Int_t ibin=1; ibin <=fCumulativeDist[ivar][icls]->GetNbinsX() ; ibin++){
00378             Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
00379             if (val>0) sum += val;
00380             (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
00381          }
00382          // create PDf
00383          fCumulativePDF[ivar].push_back(new PDF( Form("GaussTransform var%d cls%d",ivar,icls),  fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
00384       }
00385    }
00386 }
00387 
00388 //_______________________________________________________________________
00389 void TMVA::VariableGaussTransform::WriteTransformationToStream( std::ostream& ) const
00390 {
00391    Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl; 
00392 }
00393 
00394 //_______________________________________________________________________
00395 void TMVA::VariableGaussTransform::CleanUpCumulativeArrays(TString opt) {
00396    // clean up of cumulative arrays
00397    if (opt == "ALL" || opt == "PDF"){ 
00398       for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
00399          for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) { 
00400             if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
00401          }
00402       }
00403       fCumulativePDF.clear();
00404    }
00405    if (opt == "ALL" || opt == "Dist"){ 
00406       for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
00407          for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) { 
00408             if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
00409          }
00410       }
00411       fCumulativeDist.clear();
00412    }
00413 }
00414 //_______________________________________________________________________
00415 void TMVA::VariableGaussTransform::AttachXMLTo(void* parent) {
00416    // create XML description of Gauss transformation
00417    void* trfxml = gTools().AddChild(parent, "Transform");
00418    gTools().AddAttr(trfxml, "Name",        "Gauss");
00419    gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
00420 
00421    for (UInt_t ivar=0; ivar<GetNVariables(); ivar++) {
00422       void* varxml = gTools().AddChild( trfxml, "Variable");
00423       gTools().AddAttr( varxml, "Name",     Variables()[ivar].GetLabel() );
00424       gTools().AddAttr( varxml, "VarIndex", ivar );
00425          
00426       if ( fCumulativePDF[ivar][0]==0 || fCumulativePDF[ivar][1]==0 )
00427          Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
00428       
00429       for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
00430          void* pdfxml = gTools().AddChild( varxml, Form("CumulativePDF_cls%d",icls));
00431          (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
00432       }
00433    }
00434 }
00435 
00436 //_______________________________________________________________________
00437 void TMVA::VariableGaussTransform::ReadFromXML( void* trfnode ) {
00438    // Read the transformation matrices from the xml node
00439 
00440    // clean up first
00441    CleanUpCumulativeArrays();
00442    TString FlatOrGauss;
00443    gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
00444    if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
00445    else                       fFlatNotGauss = kFALSE;
00446 
00447    // Read the cumulative distribution
00448    void* varnode = gTools().GetChild( trfnode );
00449 
00450    TString varname, histname, classname;
00451    UInt_t ivar;
00452    while(varnode) {
00453       gTools().ReadAttr(varnode, "Name", varname);
00454       gTools().ReadAttr(varnode, "VarIndex", ivar);
00455       
00456       void* clsnode = gTools().GetChild( varnode);
00457 
00458       while(clsnode) {
00459          void* pdfnode = gTools().GetChild( clsnode);
00460          PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
00461          pdfToRead->ReadXML(pdfnode); // pdfnode
00462          // push_back PDF
00463          fCumulativePDF.resize( ivar+1 );
00464          fCumulativePDF[ivar].push_back(pdfToRead);
00465          clsnode = gTools().GetNextChild(clsnode);
00466       }
00467       
00468       varnode = gTools().GetNextChild(varnode);    
00469    }
00470    SetCreated();
00471 }
00472 
00473 //_______________________________________________________________________
00474 void TMVA::VariableGaussTransform::ReadTransformationFromStream( std::istream& istr, const TString& classname)
00475 {
00476    // Read the cumulative distribution
00477    Bool_t addDirStatus = TH1::AddDirectoryStatus();
00478    TH1::AddDirectory(0); // this avoids the binding of the hists in TMVA::PDF to the current ROOT file
00479    char buf[512];
00480    istr.getline(buf,512);
00481 
00482    TString strvar, dummy;
00483 
00484    while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
00485       char* p = buf;
00486       while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
00487       if (*p=='#' || *p=='\0') {
00488          istr.getline(buf,512);
00489          continue; // if comment or empty line, read the next line
00490       }
00491       std::stringstream sstr(buf);
00492       sstr >> strvar;
00493       
00494       if (strvar=="CumulativeHistogram") {
00495          UInt_t  type(0), ivar(0);
00496          TString devnullS(""),hname("");
00497          Int_t   nbins(0);
00498 
00499          // coverity[tainted_data_argument]
00500          sstr  >> type >> ivar >> hname >> nbins >> fElementsperbin;
00501 
00502          Float_t *Binnings = new Float_t[nbins+1];
00503          Float_t val;
00504          istr >> devnullS; // read the line "BinBoundaries" ..
00505          for (Int_t ibin=0; ibin<nbins+1; ibin++) {
00506             istr >> val;
00507             Binnings[ibin]=val;
00508          }
00509 
00510          if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
00511          if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
00512 
00513          TH1F * histToRead = fCumulativeDist[ivar][type];
00514          if ( histToRead !=0 ) delete histToRead;
00515          // recreate the cumulative histogram to be filled with the values read
00516          histToRead = new TH1F( hname, hname, nbins, Binnings );
00517          histToRead->SetDirectory(0);
00518          fCumulativeDist[ivar][type]=histToRead;
00519 
00520          istr >> devnullS; // read the line "BinContent" .. 
00521          for (Int_t ibin=0; ibin<nbins; ibin++) {
00522             istr >> val;
00523             histToRead->SetBinContent(ibin+1,val);
00524          }
00525 
00526          PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
00527          // push_back PDF
00528          fCumulativePDF.resize(ivar+1);
00529          fCumulativePDF[ivar].resize(type+1);
00530          fCumulativePDF[ivar][type] = pdf;
00531          delete [] Binnings;
00532       }
00533 
00534       //      if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
00535       if (strvar=="Uniform") { // don't correct this spelling mistake
00536          sstr >> fFlatNotGauss;
00537          istr.getline(buf,512);
00538          break;
00539       }
00540 
00541       istr.getline(buf,512); // reading the next line   
00542    }
00543    TH1::AddDirectory(addDirStatus);
00544 
00545    UInt_t classIdx=(classname=="signal")?0:1;
00546    for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
00547       PDF* src = fCumulativePDF[ivar][classIdx];
00548       fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
00549    }
00550    
00551    SetTMVAVersion(TMVA_VERSION(3,9,7));
00552 
00553    SetCreated();
00554 }
00555 
00556 Double_t TMVA::VariableGaussTransform::OldCumulant(Float_t x, TH1* h ) const {
00557 
00558    Int_t bin = h->FindBin(x);
00559    bin = TMath::Max(bin,1);
00560    bin = TMath::Min(bin,h->GetNbinsX());
00561 
00562    Double_t cumulant;
00563    Double_t x0, x1, y0, y1;
00564    Double_t total = h->GetNbinsX()*fElementsperbin;
00565    Double_t supmin = 0.5/total;
00566    
00567    x0 = h->GetBinLowEdge(TMath::Max(bin,1)); 
00568    x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1); 
00569 
00570    y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
00571    y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1));  // Y1 = F(x1);  Y1 <= 1
00572 
00573    if (bin == 0) {
00574       y0 = supmin;
00575       y1 = supmin;
00576    }
00577    if (bin == 1) {
00578       y0 = supmin;
00579    }
00580    if (bin > h->GetNbinsX()) { 
00581       y0 = 1.-supmin;
00582       y1 = 1.-supmin; 
00583    }
00584    if (bin == h->GetNbinsX()) { 
00585       y1 = 1.-supmin; 
00586    }
00587 
00588    if (x0 == x1) { 
00589       cumulant = y1;
00590    } else {
00591       cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
00592    }
00593 
00594    if (x <= h->GetBinLowEdge(1)){
00595       cumulant = supmin;
00596    }
00597    if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
00598       cumulant = 1-supmin;
00599    }
00600    return cumulant;
00601 }
00602 
00603 
00604 
00605 //_______________________________________________________________________
00606 void TMVA::VariableGaussTransform::PrintTransformation( ostream& ) 
00607 {
00608    // prints the transformation 
00609    Int_t cls = 0;
00610    Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
00611    cls++;
00612 }
00613 
00614 //_______________________________________________________________________
00615 void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TString& fcncName, 
00616                                                  Int_t part, UInt_t trCounter, Int_t ) 
00617 {
00618    // creates the transformation function
00619    //
00620    const UInt_t nvar = GetNVariables();
00621    UInt_t numDist  = GetNClasses() + 1;
00622    Int_t nBins = 1000; 
00623    // creates the gauss transformation function
00624    if (part==1) {
00625       fout << std::endl;
00626       // declare variables
00627       fout << "   double  cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
00628       fout << "   double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
00629       fout << "   double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
00630    }
00631    if (part==2) {
00632       fout << std::endl;
00633       fout << "#include \"math.h\"" << std::endl;
00634       fout << std::endl;
00635       fout << "//_______________________________________________________________________" << std::endl;
00636       fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
00637       fout << "{" << std::endl;
00638       // fill meat here
00639       // loop over nvar , cls, loop over nBins
00640       // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
00641       for (UInt_t icls=0; icls<numDist; icls++) {
00642          for (UInt_t ivar=0; ivar<nvar; ivar++) {
00643             Double_t xmn=(fCumulativePDF[ivar][icls])->GetXmin();
00644             Double_t xmx=(fCumulativePDF[ivar][icls])->GetXmax();
00645             fout << "    xMin["<<ivar<<"]["<<icls<<"]="<<xmn<<";"<<std::endl;
00646             fout << "    xMax["<<ivar<<"]["<<icls<<"]="<<xmx<<";"<<std::endl;
00647             for (Int_t ibin=0; ibin<=nBins; ibin++) {
00648                Double_t xval = xmn + (xmx-xmn) / nBins * (ibin+0.5);
00649                // ibin = (xval -xmin) / (xmax-xmin) *1000 
00650                fout << "  cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< (fCumulativePDF[ivar][icls])->GetVal(xval)<< ";"<<std::endl;
00651             }
00652          }
00653       }
00654       fout << "}" << std::endl;
00655       fout << std::endl;
00656       fout << "//_______________________________________________________________________" << std::endl;
00657       fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int cls) const" << std::endl;
00658       fout << "{" << std::endl;
00659       fout << "   if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
00660       fout << "       if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
00661       fout << "       else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
00662       fout << "   }"<< std::endl;
00663       
00664       fout << "   bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<";"<< std::endl;
00665       fout << "   double cumulant;"<< std::endl;
00666       fout << "   const int nvar = "<<GetNVariables()<<";"<< std::endl;
00667       fout << "   for (int ivar=0; ivar<nvar; ivar++) {"<< std::endl;
00668       // ibin = (xval -xmin) / (xmax-xmin) *1000 
00669       fout << "   int ibin1 = (int) ((iv[ivar]-xMin[ivar][cls])/(xMax[ivar][cls]-xMin[ivar][cls])*"<<nBins<<");"<<std::endl;
00670       fout << "   if (ibin1<=0) { cumulant = cumulativeDist[ivar][cls][0];}"<<std::endl;
00671       fout << "   else if (ibin1>="<<nBins<<") { cumulant = cumulativeDist[ivar][cls]["<<nBins<<"];}"<<std::endl;
00672       fout << "   else {"<<std::endl;
00673       fout << "           int ibin2 = ibin1+1;" << std::endl;
00674       fout << "           double dx = iv[ivar]-(xMin[ivar][cls]+"<< (1./nBins)
00675            << "           * ibin1* (xMax[ivar][cls]-xMin[ivar][cls]));"  
00676            << std::endl;
00677       fout << "           double eps=dx/(xMax[ivar][cls]-xMin[ivar][cls])*"<<nBins<<";"<<std::endl;
00678       fout << "           cumulant = eps*cumulativeDist[ivar][cls][ibin1] + (1-eps)*cumulativeDist[ivar][cls][ibin2];" << std::endl;
00679       fout << "           if (cumulant>1.-10e-10) cumulant = 1.-10e-10;"<< std::endl;
00680       fout << "           if (cumulant<10e-10)    cumulant = 10e-10;"<< std::endl;
00681       fout << "           if (FlatNotGauss) iv[ivar] = cumulant;"<< std::endl;
00682       fout << "           else {"<< std::endl;
00683       fout << "              double maxErfInvArgRange = 0.99999999;"<< std::endl;
00684       fout << "              double arg = 2.0*cumulant - 1.0;"<< std::endl;
00685       fout << "              if (arg >  maxErfInvArgRange) arg= maxErfInvArgRange;"<< std::endl;
00686       fout << "              if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange;"<< std::endl;
00687       fout << "              double inverf=0., stp=1. ;"<<std::endl;
00688       fout << "              while (stp >1.e-10){;"<<std::endl;
00689       fout << "                if (erf(inverf)>arg) inverf -=stp ;"<<std::endl;
00690       fout << "                else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ;"<<std::endl;
00691       fout << "                else inverf += stp;"<<std::endl;
00692       fout << "              } ;"<<std::endl;
00693       fout << "              //iv[ivar] = 1.414213562*TMath::ErfInverse(arg);"<< std::endl;
00694       fout << "              iv[ivar] = 1.414213562* inverf;"<< std::endl;
00695       fout << "           }"<< std::endl;
00696       fout << "       }"<< std::endl;
00697       fout << "   }"<< std::endl;
00698       fout << "}" << std::endl;
00699    }
00700 }