MethodFDA.cxx

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: MethodFDA.cxx 36966 2010-11-26 09:50:13Z evt $
00002 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer
00003 
00004 /**********************************************************************************
00005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00006  * Package: TMVA                                                                  *
00007  * Class  : MethodFDA                                                             *
00008  * Web    : http://tmva.sourceforge.net                                           *
00009  *                                                                                *
00010  * Description:                                                                   *
00011  *      Implementation                                                            *
00012  *                                                                                *
00013  * Authors (alphabetical):                                                        *
00014  *      Andreas Hoecker  <Andreas.Hocker@cern.ch> - CERN, Switzerland             *
00015  *      Peter Speckmayer <speckmay@mail.cern.ch>  - CERN, Switzerland             *
00016  *      Joerg Stelzer    <stelzer@cern.ch>        - DESY, Germany                 *
00017  *      Maciej Kruk      <mkruk@cern.ch>          - IFJ PAN & AGH, Poland         *
00018  *                                                                                *
00019  * Copyright (c) 2005-2006:                                                       *
00020  *      CERN, Switzerland                                                         *
00021  *      MPI-K Heidelberg, Germany                                                 *
00022  *                                                                                *
00023  * Redistribution and use in source and binary forms, with or without             *
00024  * modification, are permitted according to the terms listed in LICENSE           *
00025  * (http://tmva.sourceforge.net/LICENSE)                                          *
00026  **********************************************************************************/
00027 
00028 //_______________________________________________________________________
00029 //
00030 // Function discriminant analysis (FDA). This simple classifier         //
00031 // fits any user-defined TFormula (via option configuration string) to  //
00032 // the training data by requiring a formula response of 1 (0) to signal //
00033 // (background) events. The parameter fitting is done via the abstract  //
00034 // class FitterBase, featuring Monte Carlo sampling, Genetic            //
00035 // Algorithm, Simulated Annealing, MINUIT and combinations of these.    //
00036 //                                                                      //
00037 // Can compute regression value for one dimensional output              //
00038 //_______________________________________________________________________
00039 
00040 #include "Riostream.h"
00041 #include "TList.h"
00042 #include "TFormula.h"
00043 #include "TString.h"
00044 #include "TObjString.h"
00045 #include "TRandom3.h"
00046 #include "TMath.h"
00047 #include <sstream>
00048 
00049 #include <algorithm>
00050 #include <iterator>
00051 #include <stdexcept>
00052 
00053 #include "TMVA/ClassifierFactory.h"
00054 #include "TMVA/MethodFDA.h"
00055 #include "TMVA/Tools.h"
00056 #include "TMVA/Interval.h"
00057 #include "TMVA/Timer.h"
00058 #include "TMVA/GeneticFitter.h"
00059 #include "TMVA/SimulatedAnnealingFitter.h"
00060 #include "TMVA/MinuitFitter.h"
00061 #include "TMVA/MCFitter.h"
00062 #include "TMVA/Config.h"
00063 
00064 REGISTER_METHOD(FDA)
00065 
00066 ClassImp(TMVA::MethodFDA)
00067 
00068 //_______________________________________________________________________
00069 TMVA::MethodFDA::MethodFDA( const TString& jobName,
00070                             const TString& methodTitle,
00071                             DataSetInfo& theData,
00072                             const TString& theOption,
00073                             TDirectory* theTargetDir )
00074    : MethodBase( jobName, Types::kFDA, methodTitle, theData, theOption, theTargetDir ),
00075      IFitterTarget   (),
00076      fFormula        ( 0 ),
00077      fNPars          ( 0 ),
00078      fFitter         ( 0 ),
00079      fConvergerFitter( 0 ),
00080      fSumOfWeightsSig( 0 ),
00081      fSumOfWeightsBkg( 0 ),
00082      fSumOfWeights   ( 0 ),
00083      fOutputDimensions( 0 )
00084 {
00085    // standard constructor
00086 }
00087 
00088 //_______________________________________________________________________
00089 TMVA::MethodFDA::MethodFDA( DataSetInfo& theData,
00090                             const TString& theWeightFile,
00091                             TDirectory* theTargetDir )
00092    : MethodBase( Types::kFDA, theData, theWeightFile, theTargetDir ),
00093      IFitterTarget   (),
00094      fFormula        ( 0 ),
00095      fNPars          ( 0 ),
00096      fFitter         ( 0 ),
00097      fConvergerFitter( 0 ),
00098      fSumOfWeightsSig( 0 ),
00099      fSumOfWeightsBkg( 0 ),
00100      fSumOfWeights   ( 0 ),
00101      fOutputDimensions( 0 )
00102 {
00103    // constructor from weight file
00104 }
00105 
00106 //_______________________________________________________________________
00107 void TMVA::MethodFDA::Init( void )
00108 {
00109    // default initialisation
00110    fNPars    = 0;
00111 
00112    fBestPars.clear();
00113 
00114    fSumOfWeights    = 0;
00115    fSumOfWeightsSig = 0;
00116    fSumOfWeightsBkg = 0;
00117 
00118    fFormulaStringP  = "";
00119    fParRangeStringP = "";
00120    fFormulaStringT  = "";
00121    fParRangeStringT = "";
00122 
00123    fFitMethod       = "";
00124    fConverger       = "";
00125 
00126    if( DoMulticlass() )
00127       if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
00128 
00129 }
00130 
00131 //_______________________________________________________________________
00132 void TMVA::MethodFDA::DeclareOptions()
00133 {
00134    // define the options (their key words) that can be set in the option string
00135    //
00136    // format of function string:
00137    //    "x0*(0)+((1)/x1)**(2)..."
00138    // where "[i]" are the parameters, and "xi" the input variables
00139    //
00140    // format of parameter string:
00141    //    "(-1.2,3.4);(-2.3,4.55);..."
00142    // where the numbers in "(a,b)" correspond to the a=min, b=max parameter ranges;
00143    // each parameter defined in the function string must have a corresponding range
00144    //
00145    DeclareOptionRef( fFormulaStringP  = "(0)", "Formula",   "The discrimination formula" );
00146    DeclareOptionRef( fParRangeStringP = "()", "ParRanges", "Parameter ranges" );
00147 
00148    // fitter
00149    DeclareOptionRef( fFitMethod = "MINUIT", "FitMethod", "Optimisation Method");
00150    AddPreDefVal(TString("MC"));
00151    AddPreDefVal(TString("GA"));
00152    AddPreDefVal(TString("SA"));
00153    AddPreDefVal(TString("MINUIT"));
00154 
00155    DeclareOptionRef( fConverger = "None", "Converger", "FitMethod uses Converger to improve result");
00156    AddPreDefVal(TString("None"));
00157    AddPreDefVal(TString("MINUIT"));
00158 }
00159 
00160 //_______________________________________________________________________
00161 void TMVA::MethodFDA::CreateFormula()
00162 {
00163    // translate formula string into TFormula, and parameter string into par ranges
00164 
00165    // process transient strings
00166    fFormulaStringT  = fFormulaStringP;
00167 
00168    // intepret formula string
00169 
00170    // replace the parameters "(i)" by the TFormula style "[i]"
00171    for (UInt_t ipar=0; ipar<fNPars; ipar++) {
00172       fFormulaStringT.ReplaceAll( Form("(%i)",ipar), Form("[%i]",ipar) );
00173    }
00174 
00175    // sanity check, there should be no "(i)", with 'i' a number anymore
00176    for (Int_t ipar=fNPars; ipar<1000; ipar++) {
00177       if (fFormulaStringT.Contains( Form("(%i)",ipar) ))
00178          Log() << kFATAL
00179                  << "<CreateFormula> Formula contains expression: \"" << Form("(%i)",ipar) << "\", "
00180                << "which cannot be attributed to a parameter; "
00181                  << "it may be that the number of variable ranges given via \"ParRanges\" "
00182                  << "does not match the number of parameters in the formula expression, please verify!"
00183                  << Endl;
00184    }
00185 
00186    // write the variables "xi" as additional parameters "[npar+i]"
00187    for (Int_t ivar=GetNvar()-1; ivar >= 0; ivar--) {
00188       fFormulaStringT.ReplaceAll( Form("x%i",ivar), Form("[%i]",ivar+fNPars) );
00189    }
00190 
00191    // sanity check, there should be no "xi", with 'i' a number anymore
00192    for (UInt_t ivar=GetNvar(); ivar<1000; ivar++) {
00193       if (fFormulaStringT.Contains( Form("x%i",ivar) ))
00194          Log() << kFATAL
00195                  << "<CreateFormula> Formula contains expression: \"" << Form("x%i",ivar) << "\", "
00196                  << "which cannot be attributed to an input variable" << Endl;
00197    }
00198 
00199    Log() << "User-defined formula string       : \"" << fFormulaStringP << "\"" << Endl;
00200    Log() << "TFormula-compatible formula string: \"" << fFormulaStringT << "\"" << Endl;
00201    Log() << "Creating and compiling formula" << Endl;
00202 
00203    // create TF1
00204    if (fFormula) delete fFormula;
00205    fFormula = new TFormula( "FDA_Formula", fFormulaStringT );
00206 
00207 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,2,0)
00208    fFormula->Optimize();
00209 #endif
00210 
00211    // is formula correct ?
00212    if (fFormula->Compile() != 0)
00213       Log() << kFATAL << "<ProcessOptions> Formula expression could not be properly compiled" << Endl;
00214 
00215    // other sanity checks
00216    if (fFormula->GetNpar() > (Int_t)(fNPars + GetNvar()))
00217       Log() << kFATAL << "<ProcessOptions> Dubious number of parameters in formula expression: "
00218               << fFormula->GetNpar() << " - compared to maximum allowed: " << fNPars + GetNvar() << Endl;
00219 }
00220 
00221 //_______________________________________________________________________
00222 void TMVA::MethodFDA::ProcessOptions()
00223 {
00224    // the option string is decoded, for availabel options see "DeclareOptions"
00225 
00226    // process transient strings
00227    fParRangeStringT = fParRangeStringP;
00228 
00229    // interpret parameter string
00230    fParRangeStringT.ReplaceAll( " ", "" );
00231    fNPars = fParRangeStringT.CountChar( ')' );
00232 
00233    TList* parList = gTools().ParseFormatLine( fParRangeStringT, ";" );
00234    if ((UInt_t)parList->GetSize() != fNPars) {
00235       Log() << kFATAL << "<ProcessOptions> Mismatch in parameter string: "
00236             << "the number of parameters: " << fNPars << " != ranges defined: "
00237             << parList->GetSize() << "; the format of the \"ParRanges\" string "
00238             << "must be: \"(-1.2,3.4);(-2.3,4.55);...\", "
00239             << "where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; "
00240             << "each parameter defined in the function string must have a corresponding rang."
00241             << Endl;
00242    }
00243 
00244    fParRange.resize( fNPars );
00245    for (UInt_t ipar=0; ipar<fNPars; ipar++) fParRange[ipar] = 0;
00246 
00247    for (UInt_t ipar=0; ipar<fNPars; ipar++) {
00248       // parse (a,b)
00249       TString str = ((TObjString*)parList->At(ipar))->GetString();
00250       Ssiz_t istr = str.First( ',' );
00251       TString pminS(str(1,istr-1));
00252       TString pmaxS(str(istr+1,str.Length()-2-istr));
00253 
00254       stringstream stmin; Float_t pmin; stmin << pminS.Data(); stmin >> pmin;
00255       stringstream stmax; Float_t pmax; stmax << pmaxS.Data(); stmax >> pmax;
00256 
00257       // sanity check
00258       if (TMath::Abs(pmax-pmin) < 1.e-30) pmax = pmin;
00259       if (pmin > pmax) Log() << kFATAL << "<ProcessOptions> max > min in interval for parameter: ["
00260                                << ipar << "] : [" << pmin  << ", " << pmax << "] " << Endl;
00261 
00262       Log() << kINFO << "Create parameter interval for parameter " << ipar << " : [" << pmin << "," << pmax << "]" << Endl;
00263       fParRange[ipar] = new Interval( pmin, pmax );
00264    }
00265    delete parList;
00266 
00267    // create formula
00268    CreateFormula();
00269 
00270 
00271    // copy parameter ranges for each output dimension ==================
00272    fOutputDimensions = 1;
00273    if( DoRegression() )
00274       fOutputDimensions = DataInfo().GetNTargets();
00275    if( DoMulticlass() )
00276       fOutputDimensions = DataInfo().GetNClasses();
00277 
00278    for( Int_t dim = 1; dim < fOutputDimensions; ++dim ){
00279       for( UInt_t par = 0; par < fNPars; ++par ){
00280          fParRange.push_back( fParRange.at(par) );
00281       }
00282    }
00283    // ====================
00284 
00285    // create minimiser
00286    fConvergerFitter = (IFitterTarget*)this;
00287    if (fConverger == "MINUIT") {
00288       fConvergerFitter = new MinuitFitter( *this, Form("%s_Converger_Minuit", GetName()), fParRange, GetOptions() );
00289       SetOptions(dynamic_cast<Configurable*>(fConvergerFitter)->GetOptions());
00290    }
00291 
00292    if(fFitMethod == "MC")
00293       fFitter = new MCFitter( *fConvergerFitter, Form("%s_Fitter_MC", GetName()), fParRange, GetOptions() );
00294    else if (fFitMethod == "GA")
00295       fFitter = new GeneticFitter( *fConvergerFitter, Form("%s_Fitter_GA", GetName()), fParRange, GetOptions() );
00296    else if (fFitMethod == "SA")
00297       fFitter = new SimulatedAnnealingFitter( *fConvergerFitter, Form("%s_Fitter_SA", GetName()), fParRange, GetOptions() );
00298    else if (fFitMethod == "MINUIT")
00299       fFitter = new MinuitFitter( *fConvergerFitter, Form("%s_Fitter_Minuit", GetName()), fParRange, GetOptions() );
00300    else {
00301       Log() << kFATAL << "<Train> Do not understand fit method:" << fFitMethod << Endl;
00302    }
00303 
00304    fFitter->CheckForUnusedOptions();
00305 }
00306 
00307 //_______________________________________________________________________
00308 TMVA::MethodFDA::~MethodFDA( void )
00309 {
00310    // destructor
00311    ClearAll();
00312 }
00313 
00314 //_______________________________________________________________________
00315 Bool_t TMVA::MethodFDA::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ )
00316 {
00317    // FDA can handle classification with 2 classes and regression with one regression-target
00318    if (type == Types::kClassification && numberClasses == 2) return kTRUE;
00319    if (type == Types::kMulticlass ) return kTRUE;
00320    if (type == Types::kRegression ) return kTRUE;
00321    return kFALSE;
00322 }
00323 
00324 
00325 //_______________________________________________________________________
00326 void TMVA::MethodFDA::ClearAll( void )
00327 {
00328    // delete and clear all class members
00329    
00330    // if there is more than one output dimension, the paramater ranges are the same again (object has been copied).
00331    // hence, ... erase the copied pointers to assure, that they are deleted only once.
00332 //   fParRange.erase( fParRange.begin()+(fNPars), fParRange.end() );
00333    for (UInt_t ipar=0; ipar<fParRange.size() && ipar<fNPars; ipar++) {
00334       if (fParRange[ipar] != 0) { delete fParRange[ipar]; fParRange[ipar] = 0; }
00335    }
00336    fParRange.clear(); 
00337    
00338    if (fFormula  != 0) { delete fFormula; fFormula = 0; }
00339    fBestPars.clear();
00340 }
00341 
00342 //_______________________________________________________________________
00343 void TMVA::MethodFDA::Train( void )
00344 {
00345    // FDA training 
00346 
00347    // cache training events
00348    fSumOfWeights    = 0;
00349    fSumOfWeightsSig = 0;
00350    fSumOfWeightsBkg = 0;
00351 
00352    for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
00353 
00354       // read the training event 
00355       const Event* ev = GetEvent(ievt);
00356 
00357       // true event copy
00358       Float_t w  = GetTWeight(ev);
00359 
00360       if (!DoRegression()) {
00361          if (DataInfo().IsSignal(ev)) { fSumOfWeightsSig += w; }
00362          else                { fSumOfWeightsBkg += w; }
00363       }
00364       fSumOfWeights += w;
00365    }
00366 
00367    // sanity check
00368    if (!DoRegression()) {
00369       if (fSumOfWeightsSig <= 0 || fSumOfWeightsBkg <= 0) {
00370          Log() << kFATAL << "<Train> Troubles in sum of weights: " 
00371                  << fSumOfWeightsSig << " (S) : " << fSumOfWeightsBkg << " (B)" << Endl;
00372       }
00373    }
00374    else if (fSumOfWeights <= 0) {
00375       Log() << kFATAL << "<Train> Troubles in sum of weights: " 
00376               << fSumOfWeights << Endl;
00377    }
00378 
00379    // starting values (not used by all fitters)
00380    fBestPars.clear();
00381    for (std::vector<Interval*>::const_iterator parIt = fParRange.begin(); parIt != fParRange.end(); parIt++) {
00382       fBestPars.push_back( (*parIt)->GetMean() );
00383    }
00384 
00385    // execute the fit
00386    Double_t estimator = fFitter->Run( fBestPars );
00387       
00388    // print results
00389    PrintResults( fFitMethod, fBestPars, estimator );
00390 
00391    delete fFitter; fFitter = 0;
00392    if (fConvergerFitter!=0 && fConvergerFitter!=(IFitterTarget*)this) {
00393       delete fConvergerFitter;
00394       fConvergerFitter = 0;
00395    }
00396 }
00397 
00398 //_______________________________________________________________________
00399 void TMVA::MethodFDA::PrintResults( const TString& fitter, std::vector<Double_t>& pars, const Double_t estimator ) const
00400 {
00401    // display fit parameters
00402    // check maximum length of variable name
00403    Log() << kINFO;
00404    Log() << "Results for parameter fit using \"" << fitter << "\" fitter:" << Endl;
00405    vector<TString>  parNames;
00406    for (UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back( Form("Par(%i)",ipar ) );
00407    gTools().FormattedOutput( pars, parNames, "Parameter" , "Fit result", Log(), "%g" );   
00408    Log() << "Discriminator expression: \"" << fFormulaStringP << "\"" << Endl;
00409    Log() << "Value of estimator at minimum: " << estimator << Endl;
00410 }
00411 
00412 
00413 //_______________________________________________________________________
00414 Double_t TMVA::MethodFDA::EstimatorFunction( std::vector<Double_t>& pars )
00415 {
00416    // compute estimator for given parameter set (to be minimised)
00417    //   const Double_t sumOfWeights[]                = { fSumOfWeightsSig, fSumOfWeightsBkg, fSumOfWeights };
00418    const Double_t sumOfWeights[]                = { fSumOfWeightsBkg, fSumOfWeightsSig, fSumOfWeights };
00419    Double_t estimator[]                         = { 0, 0, 0 };
00420 
00421    Double_t result, deviation;
00422    Double_t desired = 0.0;
00423 
00424    // calculate the deviation from the desired value
00425    if( DoRegression() ){
00426       for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
00427          // read the training event 
00428          const TMVA::Event* ev = GetEvent(ievt);
00429 
00430          for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
00431             desired = ev->GetTarget( dim );
00432             result    = InterpretFormula( ev, pars.begin(), pars.end() );
00433             deviation = TMath::Power(result - desired, 2);
00434             estimator[2]  += deviation * ev->GetWeight();
00435          }
00436       }
00437       estimator[2] /= sumOfWeights[2];
00438       // return value is sum over normalised signal and background contributions
00439       return estimator[2];
00440 
00441    }else if( DoMulticlass() ){
00442       for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
00443          // read the training event 
00444          const TMVA::Event* ev = GetEvent(ievt);
00445 
00446          CalculateMulticlassValues( ev, pars, *fMulticlassReturnVal );
00447 
00448          Double_t crossEntropy = 0.0;
00449          for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
00450             Double_t y = fMulticlassReturnVal->at(dim);
00451             Double_t t = (ev->GetClass() == static_cast<UInt_t>(dim) ? 1.0 : 0.0 );
00452             crossEntropy += t*log(y);
00453          }
00454          estimator[2] += ev->GetWeight()*crossEntropy; 
00455       }
00456       estimator[2] /= sumOfWeights[2];
00457       // return value is sum over normalised signal and background contributions
00458       return estimator[2];
00459 
00460    }else{
00461       for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
00462          // read the training event 
00463          const TMVA::Event* ev = GetEvent(ievt);
00464 
00465          desired = (DataInfo().IsSignal(ev) ? 1.0 : 0.0);
00466          result    = InterpretFormula( ev, pars.begin(), pars.end() );
00467          deviation = TMath::Power(result - desired, 2);
00468          estimator[Int_t(desired)] += deviation * ev->GetWeight();
00469       }
00470       estimator[0] /= sumOfWeights[0];
00471       estimator[1] /= sumOfWeights[1];
00472       // return value is sum over normalised signal and background contributions
00473       return estimator[0] + estimator[1];
00474    }
00475 }
00476 
00477 //_______________________________________________________________________
00478 Double_t TMVA::MethodFDA::InterpretFormula( const Event* event, std::vector<Double_t>::iterator parBegin, std::vector<Double_t>::iterator parEnd )
00479 {
00480    // formula interpretation
00481    Int_t ipar = 0;
00482 //    std::cout << "pars ";
00483    for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
00484 //       std::cout << " i" << ipar << " val" << (*it);
00485       fFormula->SetParameter( ipar, (*it) );
00486       ++ipar;
00487    }
00488    for (UInt_t ivar=0;  ivar<GetNvar();  ivar++) fFormula->SetParameter( ivar+ipar, event->GetValue(ivar) );
00489 
00490    Double_t result = fFormula->Eval( 0 );
00491 //    std::cout << "  result " << result << std::endl;
00492    return result;
00493 }
00494 
00495 //_______________________________________________________________________
00496 Double_t TMVA::MethodFDA::GetMvaValue( Double_t* err, Double_t* errUpper )
00497 {
00498    // returns MVA value for given event
00499    const Event* ev = GetEvent();
00500 
00501    // cannot determine error
00502    NoErrorCalc(err, errUpper);
00503    
00504    return InterpretFormula( ev, fBestPars.begin(), fBestPars.end() );
00505 }
00506 
00507 //_______________________________________________________________________
00508 const std::vector<Float_t>& TMVA::MethodFDA::GetRegressionValues()
00509 {
00510    if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector<Float_t>();
00511    fRegressionReturnVal->clear();
00512 
00513    const Event* ev = GetEvent();
00514 
00515    Event* evT = new Event(*ev);
00516 
00517    for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
00518       Int_t offset = dim*fNPars;
00519       evT->SetTarget(dim,InterpretFormula( ev, fBestPars.begin()+offset, fBestPars.begin()+offset+fNPars ) ); 
00520    }
00521    const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
00522    fRegressionReturnVal->push_back(evT2->GetTarget(0));
00523 
00524    delete evT;
00525 
00526    return (*fRegressionReturnVal);
00527 }
00528   
00529 
00530 //_______________________________________________________________________
00531 const std::vector<Float_t>& TMVA::MethodFDA::GetMulticlassValues()
00532 {
00533    if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
00534    fMulticlassReturnVal->clear();
00535    std::vector<Float_t> temp;
00536 
00537    // returns MVA value for given event
00538    const TMVA::Event* evt = GetEvent();
00539 
00540    CalculateMulticlassValues( evt, fBestPars, temp );
00541 
00542    UInt_t nClasses = DataInfo().GetNClasses();
00543    for(UInt_t iClass=0; iClass<nClasses; iClass++){
00544       Double_t norm = 0.0;
00545       for(UInt_t j=0;j<nClasses;j++){
00546          if(iClass!=j)
00547             norm+=exp(temp[j]-temp[iClass]);
00548       }
00549       (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
00550    }
00551 
00552    return (*fMulticlassReturnVal);
00553 }
00554 
00555 
00556 //_______________________________________________________________________
00557 void TMVA::MethodFDA::CalculateMulticlassValues( const TMVA::Event*& evt, std::vector<Double_t>& parameters, std::vector<Float_t>& values)
00558 {
00559    // calculate the values for multiclass
00560    values.clear();
00561 
00562 //    std::copy( parameters.begin(), parameters.end(), std::ostream_iterator<double>( std::cout, " " ) );
00563 //    std::cout << std::endl;
00564 
00565 //    char inp;
00566 //    std::cin >> inp;
00567 
00568    Double_t sum=0;
00569    for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){ // check for all other dimensions (=classes)
00570       Int_t offset = dim*fNPars;
00571       Double_t value = InterpretFormula( evt, parameters.begin()+offset, parameters.begin()+offset+fNPars );
00572 //       std::cout << "dim : " << dim << " value " << value << "    offset " << offset << std::endl;
00573       values.push_back( value );
00574       sum += value;
00575    }
00576 
00577 //    // normalize to sum of value (commented out, .. have to think of how to treat negative classifier values)
00578 //    std::transform( fMulticlassReturnVal.begin(), fMulticlassReturnVal.end(), fMulticlassReturnVal.begin(), bind2nd( std::divides<float>(), sum) );
00579 }
00580 
00581 
00582 
00583 //_______________________________________________________________________
00584 void  TMVA::MethodFDA::ReadWeightsFromStream( istream& istr )
00585 {
00586    // read back the training results from a file (stream)
00587 
00588    // retrieve best function parameters
00589    // coverity[tainted_data_argument]
00590    istr >> fNPars;
00591 
00592    fBestPars.clear();
00593    fBestPars.resize( fNPars );
00594    for (UInt_t ipar=0; ipar<fNPars; ipar++) istr >> fBestPars[ipar];
00595 }
00596 
00597 //_______________________________________________________________________
00598 void TMVA::MethodFDA::AddWeightsXMLTo( void* parent ) const
00599 {
00600    // create XML description for LD classification and regression
00601    // (for arbitrary number of output classes/targets)
00602 
00603    void* wght = gTools().AddChild(parent, "Weights");
00604    gTools().AddAttr( wght, "NPars",  fNPars );
00605    gTools().AddAttr( wght, "NDim",   fOutputDimensions );
00606    for (UInt_t ipar=0; ipar<fNPars*fOutputDimensions; ipar++) {
00607       void* coeffxml = gTools().AddChild( wght, "Parameter" );
00608       gTools().AddAttr( coeffxml, "Index", ipar   );
00609       gTools().AddAttr( coeffxml, "Value", fBestPars[ipar] );
00610    }
00611 
00612    // write formula
00613    gTools().AddAttr( wght, "Formula", fFormulaStringP );
00614 }
00615 
00616 //_______________________________________________________________________
00617 void TMVA::MethodFDA::ReadWeightsFromXML( void* wghtnode )
00618 {
00619    // read coefficients from xml weight file
00620    gTools().ReadAttr( wghtnode, "NPars", fNPars );
00621 
00622    if(gTools().HasAttr( wghtnode, "NDim")) {
00623       gTools().ReadAttr( wghtnode, "NDim" , fOutputDimensions );
00624    } else {
00625       // older weight files don't have this attribute
00626       fOutputDimensions = 1;
00627    }
00628 
00629    fBestPars.clear();
00630    fBestPars.resize( fNPars*fOutputDimensions );
00631 
00632    void* ch = gTools().GetChild(wghtnode);
00633    Double_t par;
00634    UInt_t    ipar;
00635    while (ch) {
00636       gTools().ReadAttr( ch, "Index", ipar );
00637       gTools().ReadAttr( ch, "Value", par  );
00638 
00639       // sanity check
00640       if (ipar >= fNPars*fOutputDimensions) Log() << kFATAL << "<ReadWeightsFromXML> index out of range: "
00641                                   << ipar << " >= " << fNPars << Endl;
00642       fBestPars[ipar] = par;
00643 
00644       ch = gTools().GetNextChild(ch);
00645    }
00646 
00647    // read formula
00648    gTools().ReadAttr( wghtnode, "Formula", fFormulaStringP );
00649 
00650    // create the TFormula
00651    CreateFormula();
00652 }
00653 
00654 //_______________________________________________________________________
00655 void TMVA::MethodFDA::MakeClassSpecific( std::ostream& fout, const TString& className ) const
00656 {
00657    // write FDA-specific classifier response
00658    fout << "   double              fParameter[" << fNPars << "];" << endl;
00659    fout << "};" << endl;
00660    fout << "" << endl;
00661    fout << "inline void " << className << "::Initialize() " << endl;
00662    fout << "{" << endl;
00663    for(UInt_t ipar=0; ipar<fNPars; ipar++) {
00664       fout << "   fParameter[" << ipar << "] = " << fBestPars[ipar] << ";" << endl;
00665    }
00666    fout << "}" << endl;
00667    fout << endl;
00668    fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << endl;
00669    fout << "{" << endl;
00670    fout << "   // interpret the formula" << endl;
00671 
00672    // replace parameters
00673    TString str = fFormulaStringT;
00674    for (UInt_t ipar=0; ipar<fNPars; ipar++) {
00675       str.ReplaceAll( Form("[%i]", ipar), Form("fParameter[%i]", ipar) );
00676    }
00677 
00678    // replace input variables
00679    for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
00680       str.ReplaceAll( Form("[%i]", ivar+fNPars), Form("inputValues[%i]", ivar) );
00681    }
00682 
00683    fout << "   double retval = " << str << ";" << endl;
00684    fout << endl;
00685    fout << "   return retval; " << endl;
00686    fout << "}" << endl;
00687    fout << endl;
00688    fout << "// Clean up" << endl;
00689    fout << "inline void " << className << "::Clear() " << endl;
00690    fout << "{" << endl;
00691    fout << "   // nothing to clear" << endl;
00692    fout << "}" << endl;
00693 }
00694 
00695 //_______________________________________________________________________
00696 void TMVA::MethodFDA::GetHelpMessage() const
00697 {
00698    // get help message text
00699    //
00700    // typical length of text line:
00701    //         "|--------------------------------------------------------------|"
00702    Log() << Endl;
00703    Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
00704    Log() << Endl;
00705    Log() << "The function discriminant analysis (FDA) is a classifier suitable " << Endl;
00706    Log() << "to solve linear or simple nonlinear discrimination problems." << Endl;
00707    Log() << Endl;
00708    Log() << "The user provides the desired function with adjustable parameters" << Endl;
00709    Log() << "via the configuration option string, and FDA fits the parameters to" << Endl;
00710    Log() << "it, requiring the signal (background) function value to be as close" << Endl;
00711    Log() << "as possible to 1 (0). Its advantage over the more involved and" << Endl;
00712    Log() << "automatic nonlinear discriminators is the simplicity and transparency " << Endl;
00713    Log() << "of the discrimination expression. A shortcoming is that FDA will" << Endl;
00714    Log() << "underperform for involved problems with complicated, phase space" << Endl;
00715    Log() << "dependent nonlinear correlations." << Endl;
00716    Log() << Endl;
00717    Log() << "Please consult the Users Guide for the format of the formula string" << Endl;
00718    Log() << "and the allowed parameter ranges:" << Endl;
00719    if (gConfig().WriteOptionsReference()) {
00720       Log() << "<a href=\"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf\">"
00721             << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf</a>" << Endl;
00722    }
00723    else Log() << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf" << Endl;
00724    Log() << Endl;
00725    Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
00726    Log() << Endl;
00727    Log() << "The FDA performance depends on the complexity and fidelity of the" << Endl;
00728    Log() << "user-defined discriminator function. As a general rule, it should" << Endl;
00729    Log() << "be able to reproduce the discrimination power of any linear" << Endl;
00730    Log() << "discriminant analysis. To reach into the nonlinear domain, it is" << Endl;
00731    Log() << "useful to inspect the correlation profiles of the input variables," << Endl;
00732    Log() << "and add quadratic and higher polynomial terms between variables as" << Endl;
00733    Log() << "necessary. Comparison with more involved nonlinear classifiers can" << Endl;
00734    Log() << "be used as a guide." << Endl;
00735    Log() << Endl;
00736    Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
00737    Log() << Endl;
00738    Log() << "Depending on the function used, the choice of \"FitMethod\" is" << Endl;
00739    Log() << "crucial for getting valuable solutions with FDA. As a guideline it" << Endl;
00740    Log() << "is recommended to start with \"FitMethod=MINUIT\". When more complex" << Endl;
00741    Log() << "functions are used where MINUIT does not converge to reasonable" << Endl;
00742    Log() << "results, the user should switch to non-gradient FitMethods such" << Endl;
00743    Log() << "as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" << Endl;
00744    Log() << "useful to combine GA (or MC) with MINUIT by setting the option" << Endl;
00745    Log() << "\"Converger=MINUIT\". GA (MC) will then set the starting parameters" << Endl;
00746    Log() << "for MINUIT such that the basic quality of GA (MC) of finding global" << Endl;
00747    Log() << "minima is combined with the efficacy of MINUIT of finding local" << Endl;
00748    Log() << "minima." << Endl;
00749 }

Generated on Tue Jul 5 15:25:02 2011 for ROOT_528-00b_version by  doxygen 1.5.1