TRobustEstimator.h

Go to the documentation of this file.
00001 // @(#)root/physics:$Id: TRobustEstimator.h 22727 2008-03-19 09:54:16Z pcanal $
00002 // Author: Anna Kreshuk  08/10/2004
00003 
00004 
00005 //////////////////////////////////////////////////////////////////////////////
00006 //
00007 //  TRobustEstimator
00008 //
00009 // Minimum Covariance Determinant Estimator - a Fast Algorithm
00010 // invented by Peter J.Rousseeuw and Katrien Van Dreissen
00011 // "A Fast Algorithm for the Minimum covariance Determinant Estimator"
00012 // Technometrics, August 1999, Vol.41, NO.3
00013 //
00014 //////////////////////////////////////////////////////////////////////////////
00015 
00016 #ifndef ROOT_TRobustEstimator
00017 #define ROOT_TRobustEstimator
00018 
00019 #include "TArrayI.h"
00020 #include "TMatrixDSym.h"
00021 #include "TMatrixDSymEigen.h"
00022 
00023 class TRobustEstimator : public TObject {
00024 
00025 protected:
00026 
00027    Int_t        fNvar;          //number of variables
00028    Int_t        fH;             //algorithm parameter, determining the subsample size
00029    Int_t        fN;             //number of observations
00030 
00031    Int_t        fVarTemp;       //number of variables already added to the data matrix
00032    Int_t        fVecTemp;       //number of observations already added to the data matrix
00033 
00034    Int_t        fExact;         //if there was an exact fit, stores the number of points on a hyperplane 
00035 
00036    TVectorD     fMean;          //location estimate (mean values)
00037    TMatrixDSym  fCovariance;    //covariance matrix estimate
00038    TMatrixDSym  fInvcovariance; //inverse of the covariance matrix
00039    TMatrixDSym  fCorrelation;   //correlation matrix
00040    TVectorD     fRd;            //array of robust distances, size n
00041    TVectorD     fSd;            //array of standard deviations
00042    TArrayI      fOut;           //array of indexes of ouliers, size <0.5*n
00043    TVectorD     fHyperplane;    //in case more than fH observations lie on a hyperplane
00044                                //the equation of this hyperplane is stored here
00045  
00046    TMatrixD fData;              //the original data
00047 
00048    //functions needed for evaluation
00049 
00050    void     AddToSscp(TMatrixD &sscp, TVectorD &vec);
00051    void     ClearSscp(TMatrixD &sscp); 
00052 
00053    void     Classic();
00054    void     Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec); 
00055    void     Correl();
00056 
00057    void     CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data, 
00058                     TMatrixD &sscp, Double_t *ndist);
00059    void     CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist);
00060 
00061    Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist);
00062 
00063    Int_t    Exact(Double_t *ndist); 
00064    Int_t    Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane,
00065                Double_t *deti, Int_t nbest,Int_t kgroup, 
00066                TMatrixD &sscp, Double_t *ndist);
00067 
00068    Int_t    Partition(Int_t nmini, Int_t *indsubdat); 
00069    Int_t    RDist(TMatrixD &sscp);
00070    void     RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat);
00071 
00072    Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work);
00073 
00074 public:
00075 
00076    TRobustEstimator();
00077    TRobustEstimator(Int_t nvectors, Int_t nvariables, Int_t hh=0);
00078    virtual ~TRobustEstimator(){;}
00079 
00080    void    AddColumn(Double_t *col);         //adds a column to the data matrix
00081    void    AddRow(Double_t *row);            //adds a row to the data matrix
00082 
00083    void    Evaluate();
00084    void    EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0);
00085 
00086    Int_t   GetBDPoint();                     //returns the breakdown point of the algorithm
00087 
00088    void    GetCovariance(TMatrixDSym &matr); //returns robust covariance matrix estimate
00089    const   TMatrixDSym* GetCovariance() const{return &fCovariance;}
00090    void    GetCorrelation(TMatrixDSym &matr); //returns robust correlation matrix estimate
00091    const   TMatrixDSym* GetCorrelation() const{return &fCorrelation;}
00092    void    GetHyperplane(TVectorD &vec);      //if the data lies on a hyperplane, returns this hyperplane
00093    const   TVectorD* GetHyperplane() const;   //if the data lies on a hyperplane, returns this hyperplane
00094    Int_t   GetNHyp() {return fExact;}         //returns the number of points on a hyperplane
00095    void    GetMean(TVectorD &means);                        //returns robust mean vector estimate
00096    const   TVectorD* GetMean() const {return &fMean;}       //returns robust mean vector estimate
00097    void    GetRDistances(TVectorD &rdist);                  //returns robust distances of all observations
00098    const   TVectorD* GetRDistances() const {return &fRd;}   //returns robust distances of all observations
00099    Int_t   GetNumberObservations() const {return fN;}
00100    Int_t   GetNvar() const {return fNvar;}
00101    const   TArrayI* GetOuliers() const{return &fOut;}       //returns an array of outlier indexes
00102    Int_t   GetNOut(); //returns the number of points outside the tolerance ellipsoid.
00103                       //ONLY those with robust distances significantly larger than the
00104                       //cutoff value, should be considered outliers!
00105    Double_t GetChiQuant(Int_t i) const;
00106    
00107    ClassDef(TRobustEstimator,1)  //Minimum Covariance Determinant Estimator
00108  
00109 };
00110 
00111 
00112 #endif
00113 

Generated on Tue Jul 5 14:28:08 2011 for ROOT_528-00b_version by  doxygen 1.5.1