ROOT_528-00b_version: include/Fit/BinData.h Source File

00001 // @(#)root/mathcore:$Id: BinData.h 33180 2010-04-25 10:14:07Z moneta $
00002 // Author: L. Moneta Wed Aug 30 11:15:23 2006
00003 
00004 /**********************************************************************
00005  *                                                                    *
00006  * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
00007  *                                                                    *
00008  *                                                                    *
00009  **********************************************************************/
00010 
00011 // Header file for class BinData
00012 
00013 #ifndef ROOT_Fit_BinData
00014 #define ROOT_Fit_BinData
00015 
00016 #ifndef ROOT_Fit_DataVector
00017 #include "Fit/DataVector.h"
00018 #endif
00019 
00020 
00021 #ifdef USE_BINPOINT_CLASS
00022 
00023 #ifndef ROOT_Fit_BinPoint
00024 #include "Fit/BinPoint.h"
00025 #endif
00026 
00027 #endif
00028 
00029 
00030 namespace ROOT { 
00031 
00032    namespace Fit { 
00033 
00034 
00035 
00036 //___________________________________________________________________________________
00037 /** 
00038    Class describing the binned data sets : 
00039               vectors of  x coordinates, y values and optionally error on y values and error on coordinates 
00040               The dimension of the coordinate is free
00041               There are 4 different options: 
00042               - only coordinates and values  (for binned likelihood fits)  : kNoError 
00043               - coordinate, values and error on  values (for normal least square fits)  : kValueError
00044               - coordinate, values, error on values and coordinates (for effective least square fits) : kCoordError
00045               - corrdinate, values, error on coordinates and asymmettric error on valyes : kAsymError
00046 
00047               In addition there is the option to construct Bindata copying the data in (using the DataVector class) 
00048               or using pointer to external data (DataWrapper) class. 
00049               In general is found to be more efficient to copy the data. 
00050               In case of really large data sets for limiting memory consumption then the other option can be used
00051               Specialized constructor exists for data up to 3 dimensions. 
00052 
00053               When the data are copying in the number of points can be set later (or re-set) using Initialize and 
00054               the data are inserted one by one using the Add method. 
00055               It is mandatory to set the size before using the Add method.  
00056 
00057              @ingroup  FitData  
00058 */ 
00059 
00060 
00061 class BinData  : public FitData  { 
00062 
00063 public : 
00064 
00065    enum ErrorType { kNoError, kValueError, kCoordError, kAsymError };
00066 
00067    static unsigned int GetPointSize(ErrorType err, unsigned int dim) { 
00068       if (dim == 0 || dim > MaxSize() ) return 0;
00069       if (err == kNoError) return dim + 1;   // no errors
00070       if (err == kValueError) return dim + 2;  // error only on the value
00071       if (err == kCoordError) return 2 * dim + 2 ;  // error on value and coordinate
00072       return 2 * dim + 3;   // error on value (low and high)  and error on coordinate
00073     }
00074 
00075    ErrorType GetErrorType() const { 
00076       if (fPointSize == fDim + 1) return kNoError; 
00077       if (fPointSize == fDim + 2) return kValueError; 
00078       if (fPointSize == 2 * fDim + 2) return kCoordError; 
00079       assert( fPointSize == 2 * fDim + 3 ) ; 
00080       return kAsymError; 
00081    }
00082 
00083       
00084 
00085    /**
00086       constructor from dimension of point  and max number of points (to pre-allocate vector)
00087       Give a zero value and then use Initialize later one if the size is not known
00088     */
00089 
00090    explicit BinData(unsigned int maxpoints = 0, unsigned int dim = 1, ErrorType err = kValueError); 
00091 
00092    /**
00093       constructor from option and default range
00094     */
00095    explicit BinData (const DataOptions & opt, unsigned int maxpoints = 0, unsigned int dim = 1, ErrorType err = kValueError);
00096 
00097    /**
00098       constructor from options and range
00099       efault is 1D and value errors
00100     */
00101    BinData (const DataOptions & opt, const DataRange & range, unsigned int maxpoints = 0, unsigned int dim = 1, ErrorType err = kValueError ); 
00102 
00103    /** constructurs using external data */
00104    
00105    /**
00106       constructor from external data for 1D with errors on  coordinate and value
00107     */
00108    BinData(unsigned int n, const double * dataX, const double * val, const double * ex , const double * eval ); 
00109    
00110    /**
00111       constructor from external data for 2D with errors on  coordinate and value
00112     */
00113    BinData(unsigned int n, const double * dataX, const double * dataY, const double * val, const double * ex , const double * ey, const double * eval  ); 
00114 
00115    /**
00116       constructor from external data for 3D with errors on  coordinate and value
00117     */
00118    BinData(unsigned int n, const double * dataX, const double * dataY, const double * dataZ, const double * val, const double * ex , const double * ey , const double * ez , const double * eval   );
00119 
00120    /**
00121       copy constructor  
00122    */
00123    BinData(const BinData &);
00124 
00125    /** 
00126        assignment operator 
00127    */ 
00128    BinData & operator= (const BinData &);
00129 
00130 
00131    /**
00132       destructor
00133    */
00134    virtual ~BinData(); 
00135 
00136    /**
00137       preallocate a data set with given size ,  dimension and error type (to get the full point size)
00138       If the data set already exists and it is having the compatible point size space for the new points 
00139       is created in the data sets, while if not compatible the old data are erased and new space of 
00140       new size is allocated. 
00141       (i.e if exists initialize is equivalent to a resize( NPoints() + maxpoints) 
00142     */
00143    void Initialize(unsigned int maxpoints, unsigned int dim = 1, ErrorType err = kValueError ); 
00144 
00145 
00146    /**
00147       return the size of a fit point (is the coordinate dimension + 1 for the value and eventually 
00148       the number of all errors
00149     */
00150    unsigned int PointSize() const { 
00151       return fPointSize; 
00152    }
00153 
00154    /**
00155       return the size of internal data  (number of fit points)
00156       if data are not copied in but used externally the size is 0
00157     */
00158    unsigned int DataSize() const { 
00159       if (fDataVector) return fDataVector->Size(); 
00160       return 0; 
00161    }
00162 
00163    /**
00164       flag to control if data provides error on the coordinates
00165     */
00166    bool HaveCoordErrors() const { 
00167       if (fPointSize > fDim +2) return true; 
00168       return false;
00169    }
00170 
00171    /**
00172       flag to control if data provides asymmetric errors on the value
00173     */
00174    bool HaveAsymErrors() const { 
00175       if (fPointSize > 2 * fDim +2) return true; 
00176       return false;
00177    }
00178 
00179 
00180    /**
00181       add one dim data with only coordinate and values
00182    */
00183    void Add(double x, double y ); 
00184 
00185    /**
00186       add one dim data with no error in the coordinate (x)
00187       in this case store the inverse of the error in the value (y)
00188    */
00189    void Add(double x, double y, double ey);
00190 
00191    /**
00192       add one dim data with  error in the coordinate (x)
00193       in this case store the value (y)  error and not the inverse 
00194    */
00195    void Add(double x, double y, double ex, double ey);
00196 
00197    /**
00198       add one dim data with  error in the coordinate (x) and asymmetric errors in the value (y)
00199       in this case store the y errors and not the inverse 
00200    */
00201    void Add(double x, double y, double ex, double eyl , double eyh);
00202 
00203    /**
00204       add multi-dim coordinate data with only value (no errors)
00205    */
00206    void Add(const double *x, double val); 
00207 
00208    /**
00209       add multi-dim coordinate data with only error in value 
00210    */
00211    void Add(const double *x, double val, double  eval); 
00212 
00213    /**
00214       add multi-dim coordinate data with both error in coordinates and value 
00215    */
00216    void Add(const double *x, double val, const double * ex, double  eval); 
00217 
00218    /**
00219       add multi-dim coordinate data with both error in coordinates and value 
00220    */
00221    void Add(const double *x, double val, const double * ex, double  elval, double  ehval); 
00222 
00223    /**
00224       return a pointer to the coordinates data for the given fit point 
00225     */
00226    const double * Coords(unsigned int ipoint) const { 
00227       if (fDataVector) 
00228          return &((fDataVector->Data())[ ipoint*fPointSize ] );
00229       
00230       return fDataWrapper->Coords(ipoint);
00231    }
00232 
00233    /**
00234       return the value for the given fit point
00235     */
00236    double Value(unsigned int ipoint) const { 
00237       if (fDataVector)       
00238          return (fDataVector->Data())[ ipoint*fPointSize + fDim ];
00239      
00240       return fDataWrapper->Value(ipoint);
00241    }
00242 
00243 
00244    /**
00245       return error on the value for the given fit point
00246       Safe (but slower) method returning correctly the error on the value 
00247       in case of asymm errors return the average 0.5(eu + el)
00248     */ 
00249    double Error(unsigned int ipoint) const { 
00250       if (fDataVector) { 
00251          ErrorType type = GetErrorType(); 
00252          if (type == kNoError ) return 1; 
00253          // error on the value is the last element in the point structure
00254          double eval =  (fDataVector->Data())[ (ipoint+1)*fPointSize - 1];
00255          if (type == kValueError ) // need to invert (inverror is stored) 
00256             return eval != 0 ? 1.0/eval : 0; 
00257          else if (type == kAsymError) {  // return 1/2(el + eh) 
00258             double el = (fDataVector->Data())[ (ipoint+1)*fPointSize - 2];
00259             return 0.5 * (el+eval); 
00260          }
00261          return eval; // case of coord errors
00262       }
00263 
00264       return fDataWrapper->Error(ipoint);
00265    } 
00266 
00267    /**
00268       Return the inverse of error on the value for the given fit point
00269       useful when error in the coordinates are not stored and then this is used directly this as the weight in 
00270       the least square function
00271     */
00272    double InvError(unsigned int ipoint) const {
00273       if (fDataVector) { 
00274          // error on the value is the last element in the point structure
00275          double eval =  (fDataVector->Data())[ (ipoint+1)*fPointSize - 1];
00276          return eval; 
00277 //          if (!fWithCoordError) return eval; 
00278 //          // when error in the coordinate is stored, need to invert it 
00279 //          return eval != 0 ? 1.0/eval : 0; 
00280       }
00281       //case data wrapper 
00282 
00283       double eval = fDataWrapper->Error(ipoint);
00284       return eval != 0 ? 1.0/eval : 0; 
00285    }
00286 
00287 
00288    /**
00289       Return a pointer to the errors in the coordinates for the given fit point
00290     */
00291    const double * CoordErrors(unsigned int ipoint) const {
00292       if (fDataVector) { 
00293          // error on the value is the last element in the point structure
00294          return  &(fDataVector->Data())[ (ipoint)*fPointSize + fDim + 1];
00295       }
00296 
00297       return fDataWrapper->CoordErrors(ipoint);
00298    }
00299 
00300    /**
00301       retrieve at the same time a  pointer to the coordinate data and the fit value
00302       More efficient than calling Coords(i) and Value(i)
00303     */
00304    const double * GetPoint(unsigned int ipoint, double & value) const {
00305       if (fDataVector) { 
00306          unsigned int j = ipoint*fPointSize;
00307          const std::vector<double> & v = (fDataVector->Data());
00308          const double * x = &v[j];
00309          value = v[j+fDim];
00310          return x;
00311       } 
00312       value = fDataWrapper->Value(ipoint);
00313       return fDataWrapper->Coords(ipoint);
00314    }
00315 
00316    /**
00317       retrieve in a single call a pointer to the coordinate data, value and inverse error for 
00318       the given fit point. 
00319       To be used only when type is kValueError or kNoError. In the last case the value 1 is returned 
00320       for the error. 
00321    */
00322    const double * GetPoint(unsigned int ipoint, double & value, double & invError) const {
00323       if (fDataVector) { 
00324          const std::vector<double> & v = (fDataVector->Data());
00325          unsigned int j = ipoint*fPointSize;
00326          const double * x = &v[j];
00327          j += fDim;
00328          value = v[j];
00329          if (fPointSize == fDim +1) // value error (type=kNoError)
00330             invError = 1;
00331          else if (fPointSize == fDim +2) // value error (type=kNoError)
00332             invError = v[j+1];
00333          else 
00334             assert(0); // cannot be here
00335 
00336          return x;
00337       } 
00338       value = fDataWrapper->Value(ipoint);
00339       double e = fDataWrapper->Error(ipoint);
00340       invError = ( e > 0 ) ? 1.0/e : 1.0; 
00341       return fDataWrapper->Coords(ipoint);
00342    }
00343 
00344    /**
00345       Retrieve the errors on the point (coordinate and value) for the given fit point
00346       It must be called only when the coordinate errors are stored otherwise it will produce an 
00347       assert.
00348    */
00349    const double * GetPointError(unsigned int ipoint, double & errvalue) const {
00350       if (fDataVector) { 
00351          assert(fPointSize > fDim + 2); 
00352          unsigned int j = ipoint*fPointSize;
00353          const std::vector<double> & v = (fDataVector->Data());
00354          const double * ex = &v[j+fDim+1];
00355          errvalue = v[j + 2*fDim +1];
00356          return ex;
00357       } 
00358       errvalue = fDataWrapper->Error(ipoint);
00359       return fDataWrapper->CoordErrors(ipoint);
00360    }
00361 
00362    /**
00363       Get errors on the point (coordinate errors and asymmetric value errors) for the 
00364       given fit point. 
00365       It must be called only when the coordinate errors and asymmetric errors are stored 
00366       otherwise it will produce an assert.
00367    */
00368    const double * GetPointError(unsigned int ipoint, double & errlow, double & errhigh) const {
00369       // external data is not supported for asymmetric errors
00370       assert(fDataVector); 
00371 
00372       assert(fPointSize > 2 * fDim + 2); 
00373       unsigned int j = ipoint*fPointSize;
00374       const std::vector<double> & v = (fDataVector->Data());
00375       const double * ex = &v[j+fDim+1];
00376       errlow  = v[j + 2*fDim +1];
00377       errhigh = v[j + 2*fDim +2];
00378       return ex;
00379    }
00380 
00381 
00382 #ifdef USE_BINPOINT_CLASS
00383    const BinPoint & GetPoint(unsigned int ipoint) const { 
00384       if (fDataVector) { 
00385          unsigned int j = ipoint*fPointSize;
00386          const std::vector<double> & v = (fDataVector->Data());
00387          const double * x = &v[j];
00388          double value = v[j+fDim];
00389          if (fPointSize > fDim + 2) {
00390             const double * ex = &v[j+fDim+1];
00391             double err = v[j + 2*fDim +1];
00392             fPoint.Set(x,value,ex,err);
00393          } 
00394          else {
00395             double invError = v[j+fDim+1];
00396             fPoint.Set(x,value,invError);
00397          }
00398 
00399       } 
00400       else { 
00401          double value = fDataWrapper->Value(ipoint);
00402          double e = fDataWrapper->Error(ipoint);
00403          if (fPointSize > fDim + 2) {
00404             fPoint.Set(fDataWrapper->Coords(ipoint), value, fDataWrapper->CoordErrors(ipoint), e);
00405          } else { 
00406             double invError = ( e != 0 ) ? 1.0/e : 0; 
00407             fPoint.Set(fDataWrapper->Coords(ipoint), value, invError);
00408          }
00409       }
00410       return fPoint; 
00411    }      
00412 
00413 
00414    const BinPoint & GetPointError(unsigned int ipoint) const { 
00415       if (fDataVector) { 
00416          unsigned int j = ipoint*fPointSize;
00417          const std::vector<double> & v = (fDataVector->Data());
00418          const double * x = &v[j];
00419          double value = v[j+fDim];
00420          double invError = v[j+fDim+1];
00421          fPoint.Set(x,value,invError);
00422       } 
00423       else { 
00424          double value = fDataWrapper->Value(ipoint);
00425          double e = fDataWrapper->Error(ipoint);
00426          double invError = ( e != 0 ) ? 1.0/e : 0; 
00427          fPoint.Set(fDataWrapper->Coords(ipoint), value, invError);
00428       }
00429       return fPoint; 
00430    }      
00431 #endif
00432 
00433    /**
00434       resize the vector to the new given npoints
00435       if vector does not exists is created using existing point size
00436     */
00437    void Resize (unsigned int npoints);  
00438 
00439    /**
00440       return number of fit points
00441     */
00442    unsigned int NPoints() const { return fNPoints; } 
00443 
00444    /**
00445       return number of fit points 
00446     */ 
00447    unsigned int Size() const { return fNPoints; }
00448 
00449    /**
00450       return coordinate data dimension
00451     */
00452    unsigned int NDim() const { return fDim; } 
00453 
00454    /**
00455       apply a Log transformation of the data values 
00456       can be used for example when fitting an exponential or gaussian
00457       Transform the data in place need to copy if want to preserve original data
00458       The data sets must not contain negative values. IN case it does, 
00459       an empty data set is returned
00460     */
00461    BinData & LogTransform();
00462 
00463 
00464    /** 
00465        return an array containing the upper edge of the bin for coordinate i
00466        In case of empty bin they could be merged in a single larger bin
00467        Return a NULL pointer  if the bin width  is not stored 
00468    */
00469    const double * BinUpEdge(unsigned int icoord) const { 
00470       if (fBinEdge.size() == 0 || icoord*fDim > fBinEdge.size() ) return 0; 
00471       return &fBinEdge[ icoord * fDim];
00472    }
00473    
00474    /**
00475       query if the data store the bin edges instead of the center
00476    */
00477    bool HasBinEdges() const {
00478       return fBinEdge.size() > 0 && fBinEdge.size() == fDim*fNPoints;
00479    }
00480 
00481    /** 
00482        add the bin width data, a pointer to an array with the bin upper edge information.
00483        This is needed when fitting with integral options
00484        The information is added for the previously inserted point. 
00485        BinData::Add  must be called before
00486    */
00487    void AddBinUpEdge(const double * xup); 
00488 
00489    /** 
00490        retrieve the reference volume used to normalize the data when the option bin volume is set
00491     */ 
00492    double RefVolume() const { return fRefVolume; }
00493 
00494    /**
00495       set the reference volume used to normalize the data when the option bin volume is set
00496     */
00497    void SetRefVolume(double value) { fRefVolume = value; }
00498 
00499 protected: 
00500 
00501    void SetNPoints(unsigned int n) { fNPoints = n; }
00502 
00503 private: 
00504 
00505 
00506    unsigned int fDim;       // coordinate dimension
00507    unsigned int fPointSize; // total point size including value and errors (= fDim + 2 for error in only Y ) 
00508    unsigned int fNPoints;   // number of contained points in the data set (can be different than size of vector)
00509    double fRefVolume;  // reference bin volume - used to normalize the bins in case of variable bins data
00510 
00511    DataVector * fDataVector;  // pointer to the copied in data vector
00512    DataWrapper * fDataWrapper;  // pointer to the external data wrapper structure
00513 
00514    std::vector<double> fBinEdge;  // vector containing the bin upper edge (coordinate will contain low edge) 
00515 
00516 
00517 #ifdef USE_BINPOINT_CLASS
00518    mutable BinPoint fPoint; 
00519 #endif
00520 
00521 }; 
00522 
00523   
00524    } // end namespace Fit
00525 
00526 } // end namespace ROOT
00527 
00528 
00529 
00530 #endif /* ROOT_Fit_BinData */
00531 
00532