PDEFoam.h

Go to the documentation of this file.
00001 
00002 /**********************************************************************************
00003  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
00004  * Package: TMVA                                                                  *
00005  * Classes: PDEFoam                                                               *
00006  * Web    : http://tmva.sourceforge.net                                           *
00007  *                                                                                *
00008  * Description:                                                                   *
00009  *      Class for PDEFoam object                                                  *
00010  *                                                                                *
00011  * Authors (alphabetical):                                                        *
00012  *      S. Jadach        - Institute of Nuclear Physics, Cracow, Poland           *
00013  *      Tancredi Carli   - CERN, Switzerland                                      *
00014  *      Dominik Dannheim - CERN, Switzerland                                      *
00015  *      Alexander Voigt  - CERN, Switzerland                                      *
00016  *                                                                                *
00017  * Copyright (c) 2008:                                                            *
00018  *      CERN, Switzerland                                                         *
00019  *      MPI-K Heidelberg, Germany                                                 *
00020  *                                                                                *
00021  * Redistribution and use in source and binary forms, with or without             *
00022  * modification, are permitted according to the terms listed in LICENSE           *
00023  * (http://tmva.sourceforge.net/LICENSE)                                          *
00024  **********************************************************************************/
00025 
00026 #ifndef ROOT_TMVA_PDEFoam
00027 #define ROOT_TMVA_PDEFoam
00028 
00029 #include <iosfwd>
00030 #include <cassert>
00031 
00032 #ifndef ROOT_TH2D
00033 #include "TH2D.h"
00034 #endif
00035 #ifndef ROOT_TObjArray
00036 #include "TObjArray.h"
00037 #endif
00038 #ifndef ROOT_TObjString
00039 #include "TObjString.h"
00040 #endif
00041 #ifndef ROOT_TVectorT
00042 #include "TVectorT.h"
00043 #endif
00044 #ifndef ROOT_TString
00045 #include "TString.h"
00046 #endif
00047 #ifndef ROOT_TMVA_VariableInfo
00048 #include "TMVA/VariableInfo.h"
00049 #endif
00050 #ifndef ROOT_TMVA_Timer
00051 #include "TMVA/Timer.h"
00052 #endif
00053 #ifndef ROOT_TObject
00054 #include "TObject.h"
00055 #endif
00056 #ifndef ROOT_TRandom3
00057 #include "TRandom3.h"
00058 #endif
00059 
00060 namespace TMVA {
00061    class PDEFoamCell;
00062    class PDEFoamVect;
00063    class PDEFoamDistr;
00064    class PDEFoam;
00065 
00066    enum EFoamType { kSeparate, kDiscr, kMonoTarget, kMultiTarget };
00067 }
00068 
00069 #ifndef ROOT_TMVA_PDEFoamDistr
00070 #include "TMVA/PDEFoamDistr.h"
00071 #endif
00072 #ifndef ROOT_TMVA_PDEFoamVect
00073 #include "TMVA/PDEFoamVect.h"
00074 #endif
00075 #ifndef ROOT_TMVA_PDEFoamCell
00076 #include "TMVA/PDEFoamCell.h"
00077 #endif
00078 
00079 namespace TMVA {
00080    enum EKernel { kNone=0, kGaus=1, kLinN=2 };
00081    enum ETargetSelection { kMean=0, kMpv=1 };
00082    enum ECellType { kAll, kActive, kInActive };
00083 
00084    // enum type for possible foam cell values
00085    // kNev           : number of events (saved in cell element 0)
00086    // kDiscriminator : discriminator (saved in cell element 0)
00087    // kDiscriminatorError : error on discriminator (saved in cell element 1)
00088    // kTarget0       : target 0 (saved in cell element 0)
00089    // kTargetError   : error on target 0 (saved in cell element 1)
00090    // kMeanValue     : mean sampling value (saved in fIntegral)
00091    // kRms           : rms of sampling distribution (saved in fDriver)
00092    // kRmsOvMean     : rms/mean of sampling distribution (saved in
00093    //                  fDriver and fIntegral)
00094    // kDensity       : number of events/cell volume
00095    enum ECellValue { kNev, kDiscriminator, kDiscriminatorError, kTarget0,
00096                      kTarget0Error, kMeanValue, kRms, kRmsOvMean, kDensity };
00097    // separation quantity to use (kFoam: use PDEFoam algorithm)
00098    enum EDTSeparation { kFoam, kGiniIndex, kMisClassificationError, 
00099                         kCrossEntropy };
00100 }
00101 
00102 namespace TMVA {
00103 
00104    std::ostream& operator<< ( std::ostream& os, const PDEFoam& pdefoam );
00105    std::istream& operator>> ( std::istream& istr,     PDEFoam& pdefoam );
00106 
00107    class PDEFoam : public TObject {
00108    protected:
00109       // COMPONENTS //
00110       //-------------- Input parameters
00111       TString fName;             // Name of a given instance of the FOAM class
00112       Int_t   fDim;              // Dimension of the integration/simulation space
00113       Int_t   fNCells;           // Maximum number of cells
00114       //-------------------
00115       Int_t   fNBin;             // No. of bins in the edge histogram for cell MC exploration
00116       Int_t   fNSampl;           // No. of MC events, when dividing (exploring) cell
00117       Int_t   fEvPerBin;         // Maximum number of effective (wt=1) events per bin
00118       //-------------------  MULTI-BRANCHING ---------------------
00119       Int_t  *fMaskDiv;          //! [fDim] Dynamic Mask for cell division
00120       Int_t  *fInhiDiv;          //! [fDim] Flags for inhibiting cell division
00121       //-------------------  GEOMETRY ----------------------------
00122       Int_t   fNoAct;            // Number of active cells
00123       Int_t   fLastCe;           // Index of the last cell
00124       PDEFoamCell **fCells;      // [fNCells] Array of ALL cells
00125       //------------------ M.C. generation----------------------------
00126       TObjArray *fHistEdg;       // Histograms of wt, one for each cell edge
00127       Double_t *fRvec;           // [fDim] random number vector from r.n. generator fDim+1 maximum elements
00128       //----------- Procedures
00129       TRandom3        *fPseRan;  // Pointer to user-defined generator of pseudorandom numbers
00130       //----------  working space for CELL exploration -------------
00131       Double_t *fAlpha;          // [fDim] Internal parameters of the hyperrectangle
00132       // ---------  PDE-Foam specific variables
00133       EFoamType fFoamType;     // type of foam
00134       Double_t *fXmin;         // [fDim] minimum for variable transform
00135       Double_t *fXmax;         // [fDim] maximum for variable transform
00136       UInt_t fNElements;       // number of variables in every cell
00137       UInt_t fNmin;            // minimal number of events in cell to split cell
00138       UInt_t fMaxDepth;        // maximum depth of cell tree
00139       Float_t fVolFrac;        // volume fraction (with respect to total phase space
00140       Bool_t fFillFoamWithOrigWeights; // fill the foam with boost or orig. weights
00141       EDTSeparation fDTSeparation; // split cells according to decision tree logic
00142       Bool_t fPeekMax;         // peek up cell with max. driver integral for split
00143       PDEFoamDistr *fDistr;    //! distribution of training events
00144       Timer *fTimer;           // timer for graphical output
00145       TObjArray *fVariableNames;// collection of all variable names
00146       mutable MsgLogger* fLogger;                     //! message logger
00147 
00148       /////////////////////////////////////////////////////////////////
00149       //                            METHODS                          //
00150       /////////////////////////////////////////////////////////////////
00151    private:
00152       // Square function (fastest implementation)
00153       template<typename T> T Sqr(T x) const { return x*x; }
00154       PDEFoamDistr* GetDistr() const { assert(fDistr); return fDistr; }
00155 
00156    protected:
00157       // ---------- TMVA console output
00158 
00159       void OutputGrow(Bool_t finished = false ); // nice TMVA console output
00160 
00161       // ---------- Weighting functions for kernels
00162 
00163       Float_t WeightGaus(PDEFoamCell*, std::vector<Float_t>&, UInt_t dim=0);
00164 
00165       Double_t WeightLinNeighbors( std::vector<Float_t> &txvec, ECellValue cv, 
00166                                    Int_t dim1=-1, Int_t dim2=-1, 
00167                                    Bool_t TreatEmptyCells=kFALSE );
00168       
00169       // ---------- Foam build-up functions
00170 
00171       // Internal foam initialization functions
00172       void InitCells();                   // Initialisation of all foam cells
00173       Int_t CellFill(Int_t, PDEFoamCell*);// Allocates new empty cell and return its index
00174       void Explore(PDEFoamCell *Cell);    // Exploration of the new cell, determine <wt>, wtMax etc.
00175       void DTExplore(PDEFoamCell *Cell);  // Exploration of the new cell according to decision tree logic
00176       void Varedu(Double_t [], Int_t&, Double_t&,Double_t&); // Determines the best edge, variace reduction
00177       void MakeAlpha();             // Provides random point inside hyperrectangle
00178       void Grow();                  // build up foam
00179       Long_t PeekMax();             // peek cell with max. driver integral
00180       Long_t PeekLast();            // peek last created cell
00181       Int_t  Divide(PDEFoamCell *); // Divide iCell into two daughters; iCell retained, taged as inactive
00182       Double_t Eval(Double_t *xRand, Double_t &event_density); // evaluate distribution on point 'xRand'
00183       Float_t GetSeparation(Float_t s, Float_t b); // calculate separation
00184 
00185       // ---------- Cell value access functions
00186 
00187       // low level functions to access a certain cell value
00188       Double_t GetCellElement(PDEFoamCell *cell, UInt_t i);  // get Element 'i' in cell 'cell'
00189       void SetCellElement(PDEFoamCell *cell, UInt_t i, Double_t value); // set Element 'i' in cell 'cell' to value 'value'
00190 
00191       // helper functions to access cell data
00192       Double_t GetCellValue(PDEFoamCell*, ECellValue);
00193 
00194       // specific function used during evaluation; determines, whether a cell value is undefined
00195       Bool_t   CellValueIsUndefined( PDEFoamCell* );
00196 
00197       // finds cell according to given event variables
00198       PDEFoamCell* FindCell(std::vector<Float_t>&); //!
00199       std::vector<TMVA::PDEFoamCell*> FindCells(std::vector<Float_t>&); //!
00200 
00201       // find cells, which fit a given event vector
00202       void FindCellsRecursive(std::vector<Float_t>&, PDEFoamCell*, 
00203                               std::vector<PDEFoamCell*> &);
00204       
00205       // calculates the mean/ mpv target values for a given event 'tvals'
00206       std::vector<Float_t> GetCellTargets( std::vector<Float_t> &tvals, ETargetSelection ts );
00207       // get number of events in cell during foam build-up
00208       Double_t GetBuildUpCellEvents(PDEFoamCell* cell);
00209       
00210       PDEFoam(const PDEFoam&);    // Copy Constructor  NOT USED
00211 
00212       // ---------- Public functions ----------------------------------
00213    public:
00214       PDEFoam();                  // Default constructor (used only by ROOT streamer)
00215       PDEFoam(const TString&);    // Principal user-defined constructor
00216       virtual ~PDEFoam();         // Default destructor
00217 
00218       // ---------- Foam creation functions
00219 
00220       void Init();                    // initialize PDEFoamDistr
00221       void FillBinarySearchTree( const Event* ev, Bool_t NoNegWeights=kFALSE );
00222       void Create();              // build-up foam
00223 
00224       // function to fill created cell with given value
00225       void FillFoamCells(const Event* ev, Bool_t NoNegWeights=kFALSE);
00226 
00227       // functions to calc discriminators/ mean targets for every cell
00228       // using filled cell values
00229       void CalcCellDiscr();
00230       void CalcCellTarget();
00231 
00232       // init TObject pointer on cells
00233       void ResetCellElements(Bool_t allcells = false);
00234 
00235       // ---------- Getters and Setters
00236 
00237       void SetDim(Int_t kDim); // Sets dimension of cubical space
00238       void SetnCells(Long_t nCells){fNCells =nCells;}  // Sets maximum number of cells
00239       void SetnSampl(Long_t nSampl){fNSampl =nSampl;}  // Sets no of MC events in cell exploration
00240       void SetnBin(Int_t nBin){fNBin = nBin;}          // Sets no of bins in histogs in cell exploration
00241       void SetEvPerBin(Int_t EvPerBin){fEvPerBin =EvPerBin;} // Sets max. no. of effective events per bin
00242       void SetInhiDiv(Int_t, Int_t ); // Set inhibition of cell division along certain edge
00243       void SetNElements(UInt_t numb){fNElements = numb;} // init every cell element (TVectorD*)
00244       void SetVolumeFraction(Float_t vfr){fVolFrac = vfr;} // set VolFrac
00245       void SetFoamType(EFoamType ft);   // set foam type
00246       void SetFillFoamWithOrigWeights(Bool_t new_val){fFillFoamWithOrigWeights=new_val;}
00247       void SetDTSeparation(EDTSeparation new_val){fDTSeparation=new_val;}
00248       void SetPeekMax(Bool_t new_val){ fPeekMax = new_val; }
00249 
00250       // coverity[ -tainted_data_return ]
00251       Int_t    GetTotDim()    const {return fDim;  } // Get total dimension
00252       TString  GetFoamName()  const {return fName; } // Get name of foam
00253       UInt_t   GetNElements() const {return fNElements; } // returns number of elements, saved on every cell
00254       Float_t  GetVolumeFraction() const {return fVolFrac;} // get VolFrac from PDEFoam
00255       EFoamType GetFoamType()      const {return fFoamType;}; // get foam type
00256       UInt_t   GetNActiveCells()   const {return fNoAct;}; // returns number of active cells
00257       UInt_t   GetNInActiveCells() const {return GetNCells()-GetNActiveCells();}; // returns number of not active cells
00258       UInt_t   GetNCells()         const {return fNCells;};   // returns number of cells
00259       PDEFoamCell* GetRootCell()   const {return fCells[0];}; // get pointer to root cell
00260 
00261       // Getters and Setters for user cut options
00262       void     SetNmin(UInt_t val)     { fNmin=val;      }
00263       UInt_t   GetNmin()               { return fNmin;   }
00264       Bool_t   GetFillFoamWithOrigWeights() const { return fFillFoamWithOrigWeights; }
00265       void     SetMaxDepth(UInt_t maxdepth) { fMaxDepth = maxdepth; }
00266       UInt_t   GetMaxDepth() const { return fMaxDepth; }
00267 
00268       // Getters and Setters for foam boundaries
00269       void SetXmin(Int_t idim, Double_t wmin);
00270       void SetXmax(Int_t idim, Double_t wmax);
00271       Double_t GetXmin(Int_t idim) const {return fXmin[idim];}
00272       Double_t GetXmax(Int_t idim) const {return fXmax[idim];}
00273 
00274       // Getters and Setters for variable names
00275       void AddVariableName(const char *s) { AddVariableName(new TObjString(s)); }
00276       void AddVariableName(TObjString *s) { fVariableNames->Add(s); }
00277       TObjString* GetVariableName(Int_t idx) {return dynamic_cast<TObjString*>(fVariableNames->At(idx));}
00278 
00279       // Delete the fDistr object, which contains the binary search
00280       // tree
00281       void DeleteBinarySearchTree();
00282 
00283       // ---------- Transformation functions for event variables into foam boundaries
00284       // reason: foam allways has boundaries [0, 1]
00285 
00286       Float_t VarTransform(Int_t idim, Float_t x) const; // transform [xmin, xmax] --> [0, 1]
00287       std::vector<Float_t> VarTransform(std::vector<Float_t> &invec) const;
00288       Float_t VarTransformInvers(Int_t idim, Float_t x) const; // transform [0, 1] --> [xmin, xmax]
00289       std::vector<Float_t> VarTransformInvers(std::vector<Float_t> &invec) const;
00290 
00291       // ---------- Debug functions
00292 
00293       void     CheckAll(Int_t);  // Checks correctness of the entire data structure in the FOAM object
00294       void     PrintCell(Long_t iCell=0); // Print content of cell
00295       void     PrintCells();     // Prints content of all cells
00296       void     CheckCells(Bool_t remove_empty_cells=false);   // check all cells with respect to critical values
00297       void     RemoveEmptyCell(Int_t iCell); // removes iCell if its volume is zero
00298       void     PrintCellElements();          // print all cells with its elements
00299 
00300       // Message logger
00301       MsgLogger& Log() const { return *fLogger; }
00302 
00303       // ---------- Foam output
00304 
00305       friend std::ostream& operator<< ( std::ostream& os, const PDEFoam& pdefoam );
00306       friend std::istream& operator>> ( std::istream& istr,     PDEFoam& pdefoam );
00307 
00308       void ReadStream(istream &);         // read  foam from stream
00309       void PrintStream(ostream  &) const; // write foam from stream
00310       void ReadXML( void* parent );       // read  foam variables from xml
00311       void AddXMLTo( void* parent );      // write foam variables to xml
00312 
00313       // ---------- Foam projection methods
00314 
00315       // project foam to two-dimensional histogram
00316       TH2D* Project2(Int_t idim1, Int_t idim2, const char *opt="cell_value", 
00317                      const char *ker="kNone", UInt_t maxbins=50);
00318 
00319       // helper function for Project2()
00320       Double_t GetProjectionCellValue( PDEFoamCell* cell,
00321                                        Int_t idim1, Int_t idim2, ECellValue cv );
00322 
00323       // Project one-dimensional foam to a 1-dim histogram
00324       TH1D* Draw1Dim(const char *opt, Int_t nbin);
00325 
00326       // Generates C++ code (root macro) for drawing foam with boxes (only 2-dim!)
00327       void RootPlot2dim( const TString& filename, TString opt,
00328                          Bool_t CreateCanvas = kTRUE, Bool_t colors = kTRUE,
00329                          Bool_t log_colors = kFALSE  );
00330 
00331       // ---------- Foam evaluation functions
00332 
00333       // get cell value for a given event
00334       Double_t GetCellValue(std::vector<Float_t>&, ECellValue);
00335 
00336       // helper functions to access cell data with kernel
00337       Double_t GetCellDiscr(std::vector<Float_t> &xvec, EKernel kernel=kNone);
00338       Double_t GetCellDensity(std::vector<Float_t> &xvec, EKernel kernel=kNone);
00339 
00340       // calc mean cell value of neighbor cells
00341       Double_t GetAverageNeighborsValue(std::vector<Float_t> &txvec, ECellValue cv);
00342 
00343       // returns regression value (mono target regression)
00344       Double_t GetCellRegValue0(std::vector<Float_t>&, EKernel kernel=kNone);
00345 
00346       // returns regression value i, given all variables (multi target regression)
00347       std::vector<Float_t> GetProjectedRegValue(std::vector<Float_t> &vals, EKernel kernel=kNone, ETargetSelection ts=kMean);
00348 
00349       // ---------- ROOT class definition
00350       ClassDef(PDEFoam,5) // Tree of PDEFoamCells
00351    }; // end of PDEFoam
00352 
00353 }  // namespace TMVA
00354 
00355 // ---------- Inline functions
00356 
00357 //_____________________________________________________________________
00358 inline Float_t TMVA::PDEFoam::VarTransform(Int_t idim, Float_t x) const
00359 {
00360    // transform variable x from [xmin, xmax] --> [0, 1]
00361    return (x-fXmin[idim])/(fXmax[idim]-fXmin[idim]);
00362 }
00363 
00364 //_____________________________________________________________________
00365 inline std::vector<Float_t> TMVA::PDEFoam::VarTransform(std::vector<Float_t> &invec) const
00366 {
00367    // transform vector invec from [xmin, xmax] --> [0, 1]
00368    std::vector<Float_t> outvec;
00369    for(UInt_t i=0; i<invec.size(); i++)
00370       outvec.push_back(VarTransform(i, invec.at(i)));
00371    return outvec;
00372 }
00373 
00374 //_____________________________________________________________________
00375 inline Float_t TMVA::PDEFoam::VarTransformInvers(Int_t idim, Float_t x) const
00376 {
00377    // transform variable x from [0, 1] --> [xmin, xmax]
00378    return x*(fXmax[idim]-fXmin[idim]) + fXmin[idim];
00379 }
00380 
00381 //_____________________________________________________________________
00382 inline std::vector<Float_t> TMVA::PDEFoam::VarTransformInvers(std::vector<Float_t> &invec) const
00383 {
00384    // transform vector invec from [0, 1] --> [xmin, xmax]
00385    std::vector<Float_t> outvec;
00386    for(UInt_t i=0; i<invec.size(); i++)
00387       outvec.push_back(VarTransformInvers(i, invec.at(i)));
00388    return outvec;
00389 }
00390 
00391 #endif

Generated on Tue Jul 5 14:27:33 2011 for ROOT_528-00b_version by  doxygen 1.5.1