TPrincipal.cxx

Go to the documentation of this file.
00001 // @(#)root/hist:$Id: TPrincipal.cxx 35247 2010-09-13 14:10:41Z brun $
00002 // Author: Christian Holm Christensen    1/8/2000
00003 
00004 /*************************************************************************
00005  * Copyright (C) 1995-2004, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 //____________________________________________________________________
00013 //Begin_Html <!--
00014 /* -->
00015    </pre>
00016 <H1><A NAME="SECTION00010000000000000000"></A>
00017 <A NAME="sec:lintra"></A>
00018 <BR>
00019 Principal Components Analysis (PCA)
00020 </H1>
00021 
00022 <P>
00023 The current implementation is based on the LINTRA package from CERNLIB
00024 by R. Brun, H. Hansroul, and J. Kubler.
00025 The class has been implemented by Christian Holm Christensen in August 2000.
00026 
00027 <P>
00028 
00029 <H2><A NAME="SECTION00011000000000000000"></A>
00030 <A NAME="sec:intro1"></A>
00031 <BR>
00032 Introduction
00033 </H2>
00034 
00035 <P>
00036 In many applications of various fields of research, the treatment of
00037 large amounts of data requires powerful techniques capable of rapid
00038 data reduction and analysis. Usually, the quantities most
00039 conveniently measured by the experimentalist, are not necessarily the
00040 most significant for classification and analysis of the data. It is
00041 then useful to have a way of selecting an optimal set of variables
00042 necessary for the recognition process and reducing the dimensionality
00043 of the problem, resulting in an easier classification procedure.
00044 
00045 <P>
00046 This paper describes the implementation of one such method of
00047 feature selection, namely the principal components analysis. This
00048 multidimensional technique is well known in the field of pattern
00049 recognition and and its use in Particle Physics has been documented
00050 elsewhere (cf. H. Wind, <I>Function Parameterization</I>, CERN
00051 72-21).
00052 
00053 <P>
00054 
00055 <H2><A NAME="SECTION00012000000000000000"></A>
00056 <A NAME="sec:overview"></A>
00057 <BR>
00058 Overview
00059 </H2>
00060 
00061 <P>
00062 Suppose we have prototypes which are trajectories of particles,
00063 passing through a spectrometer. If one measures the passage of the
00064 particle at say 8 fixed planes, the trajectory is described by an
00065 8-component vector:
00066 <BR><P></P>
00067 <DIV ALIGN="CENTER">
00068 
00069 <!-- MATH
00070  \begin{displaymath}
00071 \mathbf{x} = \left(x_0, x_1, \ldots, x_7\right)
00072 \end{displaymath}
00073  -->
00074 
00075 
00076 <IMG
00077  WIDTH="145" HEIGHT="31" BORDER="0"
00078  SRC="gif/principal_img1.gif"
00079  ALT="\begin{displaymath}
00080 \mathbf{x} = \left(x_0, x_1, \ldots, x_7\right)
00081 \end{displaymath}">
00082 </DIV>
00083 <BR CLEAR="ALL">
00084 <P></P>
00085 in 8-dimensional pattern space.
00086 
00087 <P>
00088 One proceeds by generating a a representative tracks sample and
00089 building up the covariance matrix <IMG
00090  WIDTH="16" HEIGHT="16" ALIGN="BOTTOM" BORDER="0"
00091  SRC="gif/principal_img2.gif"
00092  ALT="$\mathsf{C}$">. Its eigenvectors and
00093 eigenvalues are computed by standard methods, and thus a new basis is
00094 obtained for the original 8-dimensional space the expansion of the
00095 prototypes,
00096 <BR><P></P>
00097 <DIV ALIGN="CENTER">
00098 
00099 <!-- MATH
00100  \begin{displaymath}
00101 \mathbf{x}_m = \sum^7_{i=0} a_{m_i} \mathbf{e}_i
00102 \quad
00103 \mbox{where}
00104 \quad
00105 a_{m_i} = \mathbf{x}^T\bullet\mathbf{e}_i
00106 \end{displaymath}
00107  -->
00108 
00109 
00110 <IMG
00111  WIDTH="295" HEIGHT="58" BORDER="0"
00112  SRC="gif/principal_img3.gif"
00113  ALT="\begin{displaymath}
00114 \mathbf{x}_m = \sum^7_{i=0} a_{m_i} \mathbf{e}_i
00115 \quad
00116 \mbox{where}
00117 \quad
00118 a_{m_i} = \mathbf{x}^T\bullet\mathbf{e}_i
00119 \end{displaymath}">
00120 </DIV>
00121 <BR CLEAR="ALL">
00122 <P></P>
00123 
00124 <P>
00125 allows the study of the behavior of the coefficients <IMG
00126  WIDTH="31" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00127  SRC="gif/principal_img4.gif"
00128  ALT="$a_{m_i}$"> for all
00129 the tracks of the sample. The eigenvectors which are insignificant for
00130 the trajectory description in the expansion will have their
00131 corresponding coefficients <IMG
00132  WIDTH="31" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00133  SRC="gif/principal_img4.gif"
00134  ALT="$a_{m_i}$"> close to zero for all the
00135 prototypes.
00136 
00137 <P>
00138 On one hand, a reduction of the dimensionality is then obtained by
00139 omitting these least significant vectors in the subsequent analysis.
00140 
00141 <P>
00142 On the other hand, in the analysis of real data, these least
00143 significant variables(?) can be used for the pattern
00144 recognition problem of extracting the valid combinations of
00145 coordinates describing a true trajectory from the set of all possible
00146 wrong combinations.
00147 
00148 <P>
00149 The program described here performs this principal components analysis
00150 on a sample of data provided by the user. It computes the covariance
00151 matrix, its eigenvalues ands corresponding eigenvectors and exhibits
00152 the behavior of the principal components (<IMG
00153  WIDTH="31" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00154  SRC="gif/principal_img4.gif"
00155  ALT="$a_{m_i}$">), thus providing
00156 to the user all the means of understanding his data.
00157 
00158 <P>
00159 
00160 <H2><A NAME="SECTION00013000000000000000"></A>
00161 <A NAME="sec:method"></A>
00162 <BR>
00163 Principal Components Method
00164 </H2>
00165 
00166 <P>
00167 Let's consider a sample of <IMG
00168  WIDTH="23" HEIGHT="15" ALIGN="BOTTOM" BORDER="0"
00169  SRC="gif/principal_img5.gif"
00170  ALT="$M$"> prototypes each being characterized by
00171 <IMG
00172  WIDTH="18" HEIGHT="15" ALIGN="BOTTOM" BORDER="0"
00173  SRC="gif/principal_img6.gif"
00174  ALT="$P$"> variables
00175 <!-- MATH
00176  $x_0, x_1, \ldots, x_{P-1}$
00177  -->
00178 <IMG
00179  WIDTH="107" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00180  SRC="gif/principal_img7.gif"
00181  ALT="$x_0, x_1, \ldots, x_{P-1}$">. Each prototype is a point, or a
00182 column vector, in a <IMG
00183  WIDTH="18" HEIGHT="15" ALIGN="BOTTOM" BORDER="0"
00184  SRC="gif/principal_img6.gif"
00185  ALT="$P$">-dimensional <I>pattern space</I>.
00186 <BR>
00187 <DIV ALIGN="RIGHT">
00188 
00189 
00190 <!-- MATH
00191  \begin{equation}
00192 \mathbf{x} = \left[\begin{array}{c}
00193     x_0\\x_1\\\vdots\\x_{P-1}\end{array}\right]\,,
00194 \end{equation}
00195  -->
00196 
00197 <TABLE WIDTH="100%" ALIGN="CENTER">
00198 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><IMG
00199  WIDTH="102" HEIGHT="102" BORDER="0"
00200  SRC="gif/principal_img8.gif"
00201  ALT="\begin{displaymath}
00202 \mathbf{x} = \left[\begin{array}{c}
00203 x_0\\ x_1\\ \vdots\\ x_{P-1}\end{array}\right]\,,
00204 \end{displaymath}"></TD>
00205 <TD WIDTH=10 ALIGN="RIGHT">
00206 (1)</TD></TR>
00207 </TABLE>
00208 <BR CLEAR="ALL"></DIV><P></P>
00209 where each <IMG
00210  WIDTH="23" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00211  SRC="gif/principal_img9.gif"
00212  ALT="$x_n$"> represents the particular value associated with the
00213 <IMG
00214  WIDTH="15" HEIGHT="16" ALIGN="BOTTOM" BORDER="0"
00215  SRC="gif/principal_img10.gif"
00216  ALT="$n$">-dimension.
00217 
00218 <P>
00219 Those <IMG
00220  WIDTH="18" HEIGHT="15" ALIGN="BOTTOM" BORDER="0"
00221  SRC="gif/principal_img6.gif"
00222  ALT="$P$"> variables are the quantities accessible to the
00223 experimentalist, but are not necessarily the most significant for the
00224 classification purpose.
00225 
00226 <P>
00227 The <I>Principal Components Method</I> consists of applying a
00228 <I>linear</I> transformation to the original variables. This
00229 transformation is described by an orthogonal matrix and is equivalent
00230 to a rotation of the original pattern space into a new set of
00231 coordinate vectors, which hopefully provide easier feature
00232 identification and dimensionality reduction.
00233 
00234 <P>
00235 Let's define the covariance matrix:
00236 <BR>
00237 <DIV ALIGN="RIGHT">
00238 
00239 
00240 <!-- MATH
00241  \begin{equation}
00242 \mathsf{C} = \left\langle\mathbf{y}\mathbf{y}^T\right\rangle
00243   \quad\mbox{where}\quad
00244   \mathbf{y} = \mathbf{x} - \left\langle\mathbf{x}\right\rangle\,,
00245 \end{equation}
00246  -->
00247 
00248 <TABLE WIDTH="100%" ALIGN="CENTER">
00249 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:C"></A><IMG
00250  WIDTH="267" HEIGHT="37" BORDER="0"
00251  SRC="gif/principal_img11.gif"
00252  ALT="\begin{displaymath}
00253 \mathsf{C} = \left\langle\mathbf{y}\mathbf{y}^T\right\rangl...
00254 ...athbf{y} = \mathbf{x} - \left\langle\mathbf{x}\right\rangle\,,
00255 \end{displaymath}"></TD>
00256 <TD WIDTH=10 ALIGN="RIGHT">
00257 (2)</TD></TR>
00258 </TABLE>
00259 <BR CLEAR="ALL"></DIV><P></P>
00260 and the brackets indicate mean value over the sample of <IMG
00261  WIDTH="23" HEIGHT="15" ALIGN="BOTTOM" BORDER="0"
00262  SRC="gif/principal_img5.gif"
00263  ALT="$M$">
00264 prototypes.
00265 
00266 <P>
00267 This matrix <IMG
00268  WIDTH="16" HEIGHT="16" ALIGN="BOTTOM" BORDER="0"
00269  SRC="gif/principal_img2.gif"
00270  ALT="$\mathsf{C}$"> is real, positive definite, symmetric, and will
00271 have all its eigenvalues greater then zero. It will now be show that
00272 among the family of all the complete orthonormal bases of the pattern
00273 space, the base formed by the eigenvectors of the covariance matrix
00274 and belonging to the largest eigenvalues, corresponds to the most
00275 significant features of the description of the original prototypes.
00276 
00277 <P>
00278 let the prototypes be expanded on into a set of <IMG
00279  WIDTH="20" HEIGHT="15" ALIGN="BOTTOM" BORDER="0"
00280  SRC="gif/principal_img12.gif"
00281  ALT="$N$"> basis vectors
00282 
00283 <!-- MATH
00284  $\mathbf{e}_n, n=0,\ldots,N,N+1, \ldots, P-1$
00285  -->
00286 <IMG
00287  WIDTH="233" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
00288  SRC="gif/principal_img13.gif"
00289  ALT="$\mathbf{e}_n, n=0,\ldots,N,N+1, \ldots, P-1$">,
00290 <BR>
00291 <DIV ALIGN="RIGHT">
00292 
00293 
00294 <!-- MATH
00295  \begin{equation}
00296 \mathbf{y}_i = \sum^N_{i=0} a_{i_n} \mathbf{e}_n,
00297   \quad
00298   i = 1, \ldots, M,
00299   \quad
00300   N < P-1
00301 \end{equation}
00302  -->
00303 
00304 <TABLE WIDTH="100%" ALIGN="CENTER">
00305 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:yi"></A><IMG
00306  WIDTH="303" HEIGHT="58" BORDER="0"
00307  SRC="gif/principal_img14.gif"
00308  ALT="\begin{displaymath}
00309 \mathbf{y}_i = \sum^N_{i=0} a_{i_n} \mathbf{e}_n,
00310 \quad
00311 i = 0, \ldots, M,
00312 \quad
00313 N &lt; P-1
00314 \end{displaymath}"></TD>
00315 <TD WIDTH=10 ALIGN="RIGHT">
00316 (3)</TD></TR>
00317 </TABLE>
00318 <BR CLEAR="ALL"></DIV><P></P>
00319 
00320 <P>
00321 The `best' feature coordinates <IMG
00322  WIDTH="23" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00323  SRC="gif/principal_img15.gif"
00324  ALT="$\mathbf{e}_n$">, spanning a <I>feature
00325   space</I>,  will be obtained by minimizing the error due to this
00326 truncated expansion, i.e.,
00327 <BR>
00328 <DIV ALIGN="RIGHT">
00329 
00330 
00331 <!-- MATH
00332  \begin{equation}
00333 \min\left(E_N\right) =
00334   \min\left[\left\langle\left(\mathbf{y}_i - \sum^N_{i=0} a_{i_n} \mathbf{e}_n\right)^2\right\rangle\right]
00335 \end{equation}
00336  -->
00337 
00338 <TABLE WIDTH="100%" ALIGN="CENTER">
00339 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:mini"></A><IMG
00340  WIDTH="306" HEIGHT="65" BORDER="0"
00341  SRC="gif/principal_img16.gif"
00342  ALT="\begin{displaymath}
00343 \min\left(E_N\right) =
00344 \min\left[\left\langle\left(\mathb...
00345 ...\sum^N_{i=0} a_{i_n} \mathbf{e}_n\right)^2\right\rangle\right]
00346 \end{displaymath}"></TD>
00347 <TD WIDTH=10 ALIGN="RIGHT">
00348 (4)</TD></TR>
00349 </TABLE>
00350 <BR CLEAR="ALL"></DIV><P></P>
00351 with the conditions:
00352 <BR>
00353 <DIV ALIGN="RIGHT">
00354 
00355 
00356 <!-- MATH
00357  \begin{equation}
00358 \mathbf{e}_k\bullet\mathbf{e}_j = \delta_{jk} =
00359   \left\{\begin{array}{rcl}
00360     1 & \mbox{for} & k = j\\
00361     0 & \mbox{for} & k \neq j
00362   \end{array}\right.
00363 \end{equation}
00364  -->
00365 
00366 <TABLE WIDTH="100%" ALIGN="CENTER">
00367 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:ortocond"></A><IMG
00368  WIDTH="240" HEIGHT="54" BORDER="0"
00369  SRC="gif/principal_img17.gif"
00370  ALT="\begin{displaymath}
00371 \mathbf{e}_k\bullet\mathbf{e}_j = \delta_{jk} =
00372 \left\{\b...
00373 ...for} &amp; k = j\\
00374 0 &amp; \mbox{for} &amp; k \neq j
00375 \end{array}\right.
00376 \end{displaymath}"></TD>
00377 <TD WIDTH=10 ALIGN="RIGHT">
00378 (5)</TD></TR>
00379 </TABLE>
00380 <BR CLEAR="ALL"></DIV><P></P>
00381 
00382 <P>
00383 Multiplying (<A HREF="prin_node1.html#eq:yi">3</A>) by
00384 <!-- MATH
00385  $\mathbf{e}^T_n$
00386  -->
00387 <IMG
00388  WIDTH="24" HEIGHT="38" ALIGN="MIDDLE" BORDER="0"
00389  SRC="gif/principal_img18.gif"
00390  ALT="$\mathbf{e}^T_n$"> using (<A HREF="prin_node1.html#eq:ortocond">5</A>),
00391 we get
00392 <BR>
00393 <DIV ALIGN="RIGHT">
00394 
00395 
00396 <!-- MATH
00397  \begin{equation}
00398 a_{i_n} = \mathbf{y}_i^T\bullet\mathbf{e}_n\,,
00399 \end{equation}
00400  -->
00401 
00402 <TABLE WIDTH="100%" ALIGN="CENTER">
00403 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:ai"></A><IMG
00404  WIDTH="108" HEIGHT="31" BORDER="0"
00405  SRC="gif/principal_img19.gif"
00406  ALT="\begin{displaymath}
00407 a_{i_n} = \mathbf{y}_i^T\bullet\mathbf{e}_n\,,
00408 \end{displaymath}"></TD>
00409 <TD WIDTH=10 ALIGN="RIGHT">
00410 (6)</TD></TR>
00411 </TABLE>
00412 <BR CLEAR="ALL"></DIV><P></P>
00413 so the error becomes
00414 <BR>
00415 <DIV ALIGN="CENTER"><A NAME="eq:error"></A>
00416 
00417 <!-- MATH
00418  \begin{eqnarray}
00419 E_N &=&
00420   \left\langle\left[\sum_{n=N+1}^{P-1}  a_{i_n}\mathbf{e}_n\right]^2\right\rangle\nonumber\\
00421   &=&
00422   \left\langle\left[\sum_{n=N+1}^{P-1}  \mathbf{y}_i^T\bullet\mathbf{e}_n\mathbf{e}_n\right]^2\right\rangle\nonumber\\
00423   &=&
00424   \left\langle\sum_{n=N+1}^{P-1}  \mathbf{e}_n^T\mathbf{y}_i\mathbf{y}_i^T\mathbf{e}_n\right\rangle\nonumber\\
00425   &=&
00426   \sum_{n=N+1}^{P-1}  \mathbf{e}_n^T\mathsf{C}\mathbf{e}_n
00427 \end{eqnarray}
00428  -->
00429 
00430 <TABLE ALIGN="CENTER" CELLPADDING="0" WIDTH="100%">
00431 <TR VALIGN="MIDDLE"><TD NOWRAP ALIGN="RIGHT"><IMG
00432  WIDTH="30" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
00433  SRC="gif/principal_img20.gif"
00434  ALT="$\displaystyle E_N$"></TD>
00435 <TD ALIGN="CENTER" NOWRAP><IMG
00436  WIDTH="18" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00437  SRC="gif/principal_img21.gif"
00438  ALT="$\textstyle =$"></TD>
00439 <TD ALIGN="LEFT" NOWRAP><IMG
00440  WIDTH="151" HEIGHT="80" ALIGN="MIDDLE" BORDER="0"
00441  SRC="gif/principal_img22.gif"
00442  ALT="$\displaystyle \left\langle\left[\sum_{n=N+1}^{P-1} a_{i_n}\mathbf{e}_n\right]^2\right\rangle$"></TD>
00443 <TD WIDTH=10 ALIGN="RIGHT">
00444 &nbsp;</TD></TR>
00445 <TR VALIGN="MIDDLE"><TD NOWRAP ALIGN="RIGHT">&nbsp;</TD>
00446 <TD ALIGN="CENTER" NOWRAP><IMG
00447  WIDTH="18" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00448  SRC="gif/principal_img21.gif"
00449  ALT="$\textstyle =$"></TD>
00450 <TD ALIGN="LEFT" NOWRAP><IMG
00451  WIDTH="184" HEIGHT="80" ALIGN="MIDDLE" BORDER="0"
00452  SRC="gif/principal_img23.gif"
00453  ALT="$\displaystyle \left\langle\left[\sum_{n=N+1}^{P-1} \mathbf{y}_i^T\bullet\mathbf{e}_n\mathbf{e}_n\right]^2\right\rangle$"></TD>
00454 <TD WIDTH=10 ALIGN="RIGHT">
00455 &nbsp;</TD></TR>
00456 <TR VALIGN="MIDDLE"><TD NOWRAP ALIGN="RIGHT">&nbsp;</TD>
00457 <TD ALIGN="CENTER" NOWRAP><IMG
00458  WIDTH="18" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00459  SRC="gif/principal_img21.gif"
00460  ALT="$\textstyle =$"></TD>
00461 <TD ALIGN="LEFT" NOWRAP><IMG
00462  WIDTH="156" HEIGHT="69" ALIGN="MIDDLE" BORDER="0"
00463  SRC="gif/principal_img24.gif"
00464  ALT="$\displaystyle \left\langle\sum_{n=N+1}^{P-1} \mathbf{e}_n^T\mathbf{y}_i\mathbf{y}_i^T\mathbf{e}_n\right\rangle$"></TD>
00465 <TD WIDTH=10 ALIGN="RIGHT">
00466 &nbsp;</TD></TR>
00467 <TR VALIGN="MIDDLE"><TD NOWRAP ALIGN="RIGHT">&nbsp;</TD>
00468 <TD ALIGN="CENTER" NOWRAP><IMG
00469  WIDTH="18" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00470  SRC="gif/principal_img21.gif"
00471  ALT="$\textstyle =$"></TD>
00472 <TD ALIGN="LEFT" NOWRAP><IMG
00473  WIDTH="104" HEIGHT="69" ALIGN="MIDDLE" BORDER="0"
00474  SRC="gif/principal_img25.gif"
00475  ALT="$\displaystyle \sum_{n=N+1}^{P-1} \mathbf{e}_n^T\mathsf{C}\mathbf{e}_n$"></TD>
00476 <TD WIDTH=10 ALIGN="RIGHT">
00477 (7)</TD></TR>
00478 </TABLE></DIV>
00479 <BR CLEAR="ALL"><P></P>
00480 
00481 <P>
00482 The minimization of the sum in (<A HREF="prin_node1.html#eq:error">7</A>) is obtained when each
00483 term
00484 <!-- MATH
00485  $\mathbf{e}_n^\mathsf{C}\mathbf{e}_n$
00486  -->
00487 <IMG
00488  WIDTH="41" HEIGHT="38" ALIGN="MIDDLE" BORDER="0"
00489  SRC="gif/principal_img26.gif"
00490  ALT="$\mathbf{e}_n^\mathsf{C}\mathbf{e}_n$"> is minimum, since <IMG
00491  WIDTH="16" HEIGHT="16" ALIGN="BOTTOM" BORDER="0"
00492  SRC="gif/principal_img2.gif"
00493  ALT="$\mathsf{C}$"> is
00494 positive definite. By the method of Lagrange multipliers, and the
00495 condition&nbsp;(<A HREF="prin_node1.html#eq:ortocond">5</A>), we get
00496 
00497 <P>
00498 
00499 <BR>
00500 <DIV ALIGN="RIGHT">
00501 
00502 
00503 <!-- MATH
00504  \begin{equation}
00505 E_N = \sum^{P-1}_{n=N+1} \left(\mathbf{e}_n^T\mathsf{C}\mathbf{e}_n -
00506     l_n\mathbf{e}_n^T\bullet\mathbf{e}_n + l_n\right)
00507 \end{equation}
00508  -->
00509 
00510 <TABLE WIDTH="100%" ALIGN="CENTER">
00511 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:minerror"></A><IMG
00512  WIDTH="291" HEIGHT="60" BORDER="0"
00513  SRC="gif/principal_img27.gif"
00514  ALT="\begin{displaymath}
00515 E_N = \sum^{P-1}_{n=N+1} \left(\mathbf{e}_n^T\mathsf{C}\mathbf{e}_n -
00516 l_n\mathbf{e}_n^T\bullet\mathbf{e}_n + l_n\right)
00517 \end{displaymath}"></TD>
00518 <TD WIDTH=10 ALIGN="RIGHT">
00519 (8)</TD></TR>
00520 </TABLE>
00521 <BR CLEAR="ALL"></DIV><P></P>
00522 The minimum condition
00523 <!-- MATH
00524  $\frac{dE_N}{d\mathbf{e}^T_n} = 0$
00525  -->
00526 <IMG
00527  WIDTH="68" HEIGHT="40" ALIGN="MIDDLE" BORDER="0"
00528  SRC="gif/principal_img28.gif"
00529  ALT="$\frac{dE_N}{d\mathbf{e}^T_n} = 0$"> leads to the
00530 equation
00531 <BR>
00532 <DIV ALIGN="RIGHT">
00533 
00534 
00535 <!-- MATH
00536  \begin{equation}
00537 \mathsf{C}\mathbf{e}_n = l_n\mathbf{e}_n\,,
00538 \end{equation}
00539  -->
00540 
00541 <TABLE WIDTH="100%" ALIGN="CENTER">
00542 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:Ce"></A><IMG
00543  WIDTH="91" HEIGHT="30" BORDER="0"
00544  SRC="gif/principal_img29.gif"
00545  ALT="\begin{displaymath}
00546 \mathsf{C}\mathbf{e}_n = l_n\mathbf{e}_n\,,
00547 \end{displaymath}"></TD>
00548 <TD WIDTH=10 ALIGN="RIGHT">
00549 (9)</TD></TR>
00550 </TABLE>
00551 <BR CLEAR="ALL"></DIV><P></P>
00552 which shows that <IMG
00553  WIDTH="23" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00554  SRC="gif/principal_img15.gif"
00555  ALT="$\mathbf{e}_n$"> is an eigenvector of the covariance
00556 matrix <IMG
00557  WIDTH="16" HEIGHT="16" ALIGN="BOTTOM" BORDER="0"
00558  SRC="gif/principal_img2.gif"
00559  ALT="$\mathsf{C}$"> with eigenvalue <IMG
00560  WIDTH="19" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
00561  SRC="gif/principal_img30.gif"
00562  ALT="$l_n$">. The estimated minimum error is
00563 then given by
00564 <BR>
00565 <DIV ALIGN="RIGHT">
00566 
00567 
00568 <!-- MATH
00569  \begin{equation}
00570 E_N \sim \sum^{P-1}_{n=N+1} \mathbf{e}_n^T\bullet l_n\mathbf{e}_n
00571       = \sum^{P-1}_{n=N+1}  l_n\,,
00572 \end{equation}
00573  -->
00574 
00575 <TABLE WIDTH="100%" ALIGN="CENTER">
00576 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:esterror"></A><IMG
00577  WIDTH="264" HEIGHT="60" BORDER="0"
00578  SRC="gif/principal_img31.gif"
00579  ALT="\begin{displaymath}
00580 E_N \sim \sum^{P-1}_{n=N+1} \mathbf{e}_n^T\bullet l_n\mathbf{e}_n
00581 = \sum^{P-1}_{n=N+1} l_n\,,
00582 \end{displaymath}"></TD>
00583 <TD WIDTH=10 ALIGN="RIGHT">
00584 (10)</TD></TR>
00585 </TABLE>
00586 <BR CLEAR="ALL"></DIV><P></P>
00587 where
00588 <!-- MATH
00589  $l_n,\,n=N+1,\ldots,P$
00590  -->
00591 <IMG
00592  WIDTH="161" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
00593  SRC="gif/principal_img32.gif"
00594  ALT="$l_n,\,n=N+1,\ldots,P-1$"> are the eigenvalues associated with the
00595 omitted eigenvectors in the expansion&nbsp;(<A HREF="prin_node1.html#eq:yi">3</A>). Thus, by choosing
00596 the <IMG
00597  WIDTH="20" HEIGHT="15" ALIGN="BOTTOM" BORDER="0"
00598  SRC="gif/principal_img12.gif"
00599  ALT="$N$"> largest eigenvalues, and their associated eigenvectors, the
00600 error <IMG
00601  WIDTH="30" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
00602  SRC="gif/principal_img33.gif"
00603  ALT="$E_N$"> is minimized.
00604 
00605 <P>
00606 The transformation matrix to go from the pattern space to the feature
00607 space consists of the ordered eigenvectors
00608 
00609 <!-- MATH
00610  $\mathbf{e}_1,\ldots,\mathbf{e}_P$
00611  -->
00612 <IMG
00613  WIDTH="80" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
00614  SRC="gif/principal_img34.gif"
00615  ALT="$\mathbf{e}_0,\ldots,\mathbf{e}_{P-1}$"> for its columns
00616 <BR>
00617 <DIV ALIGN="RIGHT">
00618 
00619 
00620 <!-- MATH
00621  \begin{equation}
00622 \mathsf{T} = \left[
00623     \begin{array}{cccc}
00624       \mathbf{e}_0 &
00625       \mathbf{e}_1 &
00626       \vdots &
00627       \mathbf{e}_{P-1}
00628     \end{array}\right]
00629   = \left[
00630     \begin{array}{cccc}
00631       \mathbf{e}_{0_0} &  \mathbf{e}_{1_0} & \cdots &  \mathbf{e}_{{P-1}_0}\\
00632       \mathbf{e}_{0_1} &  \mathbf{e}_{1_1} & \cdots &  \mathbf{e}_{{P-1}_1}\\
00633       \vdots        &  \vdots        & \ddots &  \vdots \\
00634       \mathbf{e}_{0_{P-1}} &  \mathbf{e}_{1_{P-1}} & \cdots &  \mathbf{e}_{{P-1}_{P-1}}\\
00635     \end{array}\right]
00636 \end{equation}
00637  -->
00638 
00639 <TABLE WIDTH="100%" ALIGN="CENTER">
00640 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:trans"></A><IMG
00641  WIDTH="378" HEIGHT="102" BORDER="0"
00642  SRC="gif/principal_img35.gif"
00643  ALT="\begin{displaymath}
00644 \mathsf{T} = \left[
00645 \begin{array}{cccc}
00646 \mathbf{e}_0 &amp;
00647 \...
00648 ...bf{e}_{1_{P-1}} &amp; \cdots &amp; \mathbf{e}_{{P-1}_{P-1}}\\
00649 \end{array}\right]
00650 \end{displaymath}"></TD>
00651 <TD WIDTH=10 ALIGN="RIGHT">
00652 (11)</TD></TR>
00653 </TABLE>
00654 <BR CLEAR="ALL"></DIV><P></P>
00655 This is an orthogonal transformation, or rotation, of the pattern
00656 space and feature selection results in ignoring certain coordinates
00657 in the transformed space.
00658    <p>
00659    <DIV ALIGN="RIGHT">
00660    Christian Holm<br>
00661    August 2000, CERN
00662    </DIV>
00663 <!--*/
00664 // -->End_Html
00665 
00666 // $Id: TPrincipal.cxx 35247 2010-09-13 14:10:41Z brun $
00667 // $Date: 2006/05/24 14:55:26 $
00668 // $Author: brun $
00669 
00670 #include "TPrincipal.h"
00671 
00672 #include "TVectorD.h"
00673 #include "TMatrixD.h"
00674 #include "TMatrixDSymEigen.h"
00675 #include "TMath.h"
00676 #include "TList.h"
00677 #include "TH2.h"
00678 #include "TDatime.h"
00679 #include "TBrowser.h"
00680 #include "TROOT.h"
00681 #include "Riostream.h"
00682 
00683 
00684 ClassImp(TPrincipal);
00685 
00686 //____________________________________________________________________
00687 TPrincipal::TPrincipal()
00688   : fMeanValues(0),
00689     fSigmas(0),
00690     fCovarianceMatrix(1,1),
00691     fEigenVectors(1,1),
00692     fEigenValues(0),
00693     fOffDiagonal(0),
00694     fStoreData(kFALSE)
00695 {
00696   // Empty CTOR, Do not use.
00697 
00698    fTrace              = 0;
00699    fHistograms         = 0;
00700    fIsNormalised       = kFALSE;
00701    fNumberOfDataPoints = 0;
00702    fNumberOfVariables  = 0;
00703 }
00704 
00705 //____________________________________________________________________
00706 TPrincipal::TPrincipal(Int_t nVariables, Option_t *opt)
00707   : fMeanValues(nVariables),
00708     fSigmas(nVariables),
00709     fCovarianceMatrix(nVariables,nVariables),
00710     fEigenVectors(nVariables,nVariables),
00711     fEigenValues(nVariables),
00712     fOffDiagonal(nVariables),
00713     fStoreData(kFALSE)
00714 {
00715    // Ctor. Argument is number of variables in the sample of data
00716    // Options are:
00717    //   N       Normalize the covariance matrix (default)
00718    //   D       Store input data (default)
00719    //
00720    // The created object is  named "principal" by default.
00721    if (nVariables <= 1) {
00722       Error("TPrincipal", "You can't be serious - nVariables == 1!!!");
00723       return;
00724    }
00725 
00726    SetName("principal");
00727 
00728    fTrace              = 0;
00729    fHistograms         = 0;
00730    fIsNormalised       = kFALSE;
00731    fNumberOfDataPoints = 0;
00732    fNumberOfVariables  = nVariables;
00733    while (strlen(opt) > 0) {
00734       switch(*opt++) {
00735          case 'N':
00736          case 'n':
00737             fIsNormalised = kTRUE;
00738             break;
00739          case 'D':
00740          case 'd':
00741             fStoreData    = kTRUE;
00742             break;
00743          default:
00744             break;
00745       }
00746    }
00747 
00748    if (!fMeanValues.IsValid())
00749       Error("TPrincipal","Couldn't create vector mean values");
00750    if (!fSigmas.IsValid())
00751       Error("TPrincipal","Couldn't create vector sigmas");
00752    if (!fCovarianceMatrix.IsValid())
00753       Error("TPrincipal","Couldn't create covariance matrix");
00754    if (!fEigenVectors.IsValid())
00755       Error("TPrincipal","Couldn't create eigenvector matrix");
00756    if (!fEigenValues.IsValid())
00757       Error("TPrincipal","Couldn't create eigenvalue vector");
00758    if (!fOffDiagonal.IsValid())
00759       Error("TPrincipal","Couldn't create offdiagonal vector");
00760    if (fStoreData) {
00761       fUserData.ResizeTo(nVariables*1000);
00762       fUserData.Zero();
00763       if (!fUserData.IsValid())
00764          Error("TPrincipal","Couldn't create user data vector");
00765    }
00766 }
00767 
00768 //____________________________________________________________________
00769 TPrincipal::TPrincipal(const TPrincipal& pr) : 
00770   TNamed(pr),
00771   fNumberOfDataPoints(pr.fNumberOfDataPoints),
00772   fNumberOfVariables(pr.fNumberOfVariables),
00773   fMeanValues(pr.fMeanValues),
00774   fSigmas(pr.fSigmas),
00775   fCovarianceMatrix(pr.fCovarianceMatrix),
00776   fEigenVectors(pr.fEigenVectors),
00777   fEigenValues(pr.fEigenValues),
00778   fOffDiagonal(pr.fOffDiagonal),
00779   fUserData(pr.fUserData),
00780   fTrace(pr.fTrace),
00781   fHistograms(pr.fHistograms),
00782   fIsNormalised(pr.fIsNormalised),
00783   fStoreData(pr.fStoreData)
00784 { 
00785    //copy constructor
00786 }
00787 
00788 //____________________________________________________________________
00789 TPrincipal& TPrincipal::operator=(const TPrincipal& pr)
00790 {
00791    //assignement operator
00792    if(this!=&pr) {
00793       TNamed::operator=(pr);
00794       fNumberOfDataPoints=pr.fNumberOfDataPoints;
00795       fNumberOfVariables=pr.fNumberOfVariables;
00796       fMeanValues=pr.fMeanValues;
00797       fSigmas=pr.fSigmas;
00798       fCovarianceMatrix=pr.fCovarianceMatrix;
00799       fEigenVectors=pr.fEigenVectors;
00800       fEigenValues=pr.fEigenValues;
00801       fOffDiagonal=pr.fOffDiagonal;
00802       fUserData=pr.fUserData;
00803       fTrace=pr.fTrace;
00804       fHistograms=pr.fHistograms;
00805       fIsNormalised=pr.fIsNormalised;
00806       fStoreData=pr.fStoreData;
00807    } 
00808    return *this;
00809 }
00810 
00811 //____________________________________________________________________
00812 TPrincipal::~TPrincipal()
00813 {
00814    // destructor
00815 
00816    if (fHistograms) {
00817       fHistograms->Delete();
00818       delete fHistograms;
00819    }
00820 }
00821 
00822 //____________________________________________________________________
00823 void TPrincipal::AddRow(const Double_t *p)
00824 {
00825   // Begin_Html
00826   /*
00827      </PRE>
00828 Add a data point and update the covariance matrix. The input
00829 array must be <TT>fNumberOfVariables</TT> long.
00830 
00831 <P>
00832 The Covariance matrix and mean values of the input data is caculated
00833 on the fly by the following equations:
00834 <BR><P></P>
00835 <DIV ALIGN="CENTER">
00836 
00837 <!-- MATH
00838  \begin{displaymath}
00839 \left<x_i\right>^{(0)}  = x_{i0}
00840 \end{displaymath}
00841  -->
00842 
00843 
00844 <IMG
00845  WIDTH="90" HEIGHT="31" BORDER="0"
00846  SRC="gif/principal_img36.gif"
00847  ALT="\begin{displaymath}
00848 \left&lt;x_i\right&gt;^{(0)} = x_{i0}
00849 \end{displaymath}">
00850 </DIV>
00851 <BR CLEAR="ALL">
00852 <P></P>
00853 <BR><P></P>
00854 <DIV ALIGN="CENTER">
00855 
00856 <!-- MATH
00857  \begin{displaymath}
00858 \left<x_i\right>^{(n)} = \left<x_i\right>^{(n-1)}
00859 + \frac1n \left(x_{in} - \left<x_i\right>^{(n-1)}\right)
00860 \end{displaymath}
00861  -->
00862 
00863 
00864 <IMG
00865  WIDTH="302" HEIGHT="42" BORDER="0"
00866  SRC="gif/principal_img37.gif"
00867  ALT="\begin{displaymath}
00868 \left&lt;x_i\right&gt;^{(n)} = \left&lt;x_i\right&gt;^{(n-1)}
00869 + \frac1n \left(x_{in} - \left&lt;x_i\right&gt;^{(n-1)}\right)
00870 \end{displaymath}">
00871 </DIV>
00872 <BR CLEAR="ALL">
00873 <P></P>
00874 <BR><P></P>
00875 <DIV ALIGN="CENTER">
00876 
00877 <!-- MATH
00878  \begin{displaymath}
00879 C_{ij}^{(0)} = 0
00880 \end{displaymath}
00881  -->
00882 
00883 
00884 <IMG
00885  WIDTH="62" HEIGHT="34" BORDER="0"
00886  SRC="gif/principal_img38.gif"
00887  ALT="\begin{displaymath}
00888 C_{ij}^{(0)} = 0
00889 \end{displaymath}">
00890 </DIV>
00891 <BR CLEAR="ALL">
00892 <P></P>
00893 <BR><P></P>
00894 <DIV ALIGN="CENTER">
00895 
00896 <!-- MATH
00897  \begin{displaymath}
00898 C_{ij}^{(n)} = C_{ij}^{(n-1)}
00899 + \frac1{n-1}\left[\left(x_{in} - \left<x_i\right>^{(n)}\right)
00900   \left(x_{jn} - \left<x_j\right>^{(n)}\right)\right]
00901 - \frac1n C_{ij}^{(n-1)}
00902 \end{displaymath}
00903  -->
00904 
00905 
00906 <IMG
00907  WIDTH="504" HEIGHT="43" BORDER="0"
00908  SRC="gif/principal_img39.gif"
00909  ALT="\begin{displaymath}
00910 C_{ij}^{(n)} = C_{ij}^{(n-1)}
00911 + \frac1{n-1}\left[\left(x_{i...
00912 ...\left&lt;x_j\right&gt;^{(n)}\right)\right]
00913 - \frac1n C_{ij}^{(n-1)}
00914 \end{displaymath}">
00915 </DIV>
00916 <BR CLEAR="ALL">
00917 <P></P>
00918 since this is a really fast method, with no rounding errors (please
00919 refer to CERN 72-21 pp. 54-106).
00920 
00921 <P>
00922 The data is stored internally in a <TT>TVectorD</TT>, in the following
00923 way:
00924 <BR><P></P>
00925 <DIV ALIGN="CENTER">
00926 
00927 <!-- MATH
00928  \begin{displaymath}
00929 \mathbf{x} = \left[\left(x_{0_0},\ldots,x_{{P-1}_0}\right),\ldots,
00930     \left(x_{0_i},\ldots,x_{{P-1}_i}\right), \ldots\right]
00931 \end{displaymath}
00932  -->
00933 
00934 
00935 <IMG
00936  WIDTH="319" HEIGHT="31" BORDER="0"
00937  SRC="gif/principal_img40.gif"
00938  ALT="\begin{displaymath}
00939 \mathbf{x} = \left[\left(x_{0_0},\ldots,x_{{P-1}_0}\right),\ldots,
00940 \left(x_{0_i},\ldots,x_{{P-1}_i}\right), \ldots\right]
00941 \end{displaymath}">
00942 </DIV>
00943 <BR CLEAR="ALL">
00944 <P></P>
00945 With <IMG
00946  WIDTH="18" HEIGHT="15" ALIGN="BOTTOM" BORDER="0"
00947  SRC="gif/principal_img6.gif"
00948  ALT="$P$"> as defined in the class description.
00949      <PRE>
00950   */
00951   // End_Html
00952    if (!p)
00953       return;
00954 
00955    // Increment the data point counter
00956    Int_t i,j;
00957    if (++fNumberOfDataPoints == 1) {
00958       for (i = 0; i < fNumberOfVariables; i++)
00959          fMeanValues(i) = p[i];
00960    }
00961    else {
00962 
00963       Double_t cor = 1 - 1./Double_t(fNumberOfDataPoints);
00964       for (i = 0; i < fNumberOfVariables; i++) {
00965 
00966          fMeanValues(i) *= cor;
00967          fMeanValues(i) += p[i] / Double_t(fNumberOfDataPoints);
00968          Double_t t1 = (p[i] - fMeanValues(i)) / (fNumberOfDataPoints - 1);
00969 
00970          // Setting Matrix (lower triangle) elements
00971          for (j = 0; j < i + 1; j++) {
00972             fCovarianceMatrix(i,j) *= cor;
00973             fCovarianceMatrix(i,j) += t1 * (p[j] - fMeanValues(j));
00974          }
00975       }
00976    }
00977 
00978    // Store data point in internal vector
00979    // If the vector isn't big enough to hold the new data, then
00980    // expand the vector by half it's size.
00981    if (!fStoreData)
00982       return;
00983    Int_t size = fUserData.GetNrows();
00984    if (fNumberOfDataPoints * fNumberOfVariables > size)
00985       fUserData.ResizeTo(size + size/2);
00986 
00987    for (i = 0; i < fNumberOfVariables; i++) {
00988       j = (fNumberOfDataPoints-1) * fNumberOfVariables + i;
00989       fUserData(j) = p[i];
00990    }
00991 
00992 }
00993 
00994 //____________________________________________________________________
00995 void TPrincipal::Browse(TBrowser *b)
00996 {
00997    // Browse the TPrincipal object in the TBrowser.
00998    if (fHistograms) {
00999       TIter next(fHistograms);
01000       TH1* h = 0;
01001       while ((h = (TH1*)next()))
01002          b->Add(h,h->GetName());
01003    }
01004 
01005    if (fStoreData)
01006       b->Add(&fUserData,"User Data");
01007    b->Add(&fCovarianceMatrix,"Covariance Matrix");
01008    b->Add(&fMeanValues,"Mean value vector");
01009    b->Add(&fSigmas,"Sigma value vector");
01010    b->Add(&fEigenValues,"Eigenvalue vector");
01011    b->Add(&fEigenVectors,"Eigenvector Matrix");
01012 
01013 }
01014 
01015 //____________________________________________________________________
01016 void TPrincipal::Clear(Option_t *opt)
01017 {
01018    // Clear the data in Object. Notice, that's not possible to change
01019    // the dimension of the original data.
01020    if (fHistograms) {
01021       fHistograms->Delete(opt);
01022    }
01023 
01024    fNumberOfDataPoints = 0;
01025    fTrace              = 0;
01026    fCovarianceMatrix.Zero();
01027    fEigenVectors.Zero();
01028    fEigenValues.Zero();
01029    fMeanValues.Zero();
01030    fSigmas.Zero();
01031    fOffDiagonal.Zero();
01032 
01033    if (fStoreData) {
01034       fUserData.ResizeTo(fNumberOfVariables * 1000);
01035       fUserData.Zero();
01036    }
01037 }
01038 
01039 //____________________________________________________________________
01040 const Double_t *TPrincipal::GetRow(Int_t row)
01041 {
01042    // Return a row of the user supplied data.
01043    // If row is out of bounds, 0 is returned.
01044    // It's up to the user to delete the returned array.
01045    // Row 0 is the first row;
01046    if (row >= fNumberOfDataPoints)
01047       return 0;
01048 
01049    if (!fStoreData)
01050       return 0;
01051 
01052    Int_t index   = row  * fNumberOfVariables;
01053    return &fUserData(index);
01054 }
01055 
01056 
01057 //____________________________________________________________________
01058 void TPrincipal::MakeCode(const char *filename, Option_t *opt)
01059 {
01060    // Generates the file <filename>, with .C appended if it does
01061    // argument doesn't end in .cxx or .C.
01062    //
01063    // The file contains the implementation of two functions
01064    //
01065    //    void X2P(Double_t *x, Double *p)
01066    //    void P2X(Double_t *p, Double *x, Int_t nTest)
01067    //
01068    // which does the same as  TPrincipal::X2P and TPrincipal::P2X
01069    // respectively. Please refer to these methods.
01070    //
01071    // Further, the static variables:
01072    //
01073    //    Int_t    gNVariables
01074    //    Double_t gEigenValues[]
01075    //    Double_t gEigenVectors[]
01076    //    Double_t gMeanValues[]
01077    //    Double_t gSigmaValues[]
01078    //
01079    // are initialized. The only ROOT header file needed is Rtypes.h
01080    //
01081    // See TPrincipal::MakeRealCode for a list of options
01082 
01083    TString outName(filename);
01084    if (!outName.EndsWith(".C") && !outName.EndsWith(".cxx"))
01085       outName += ".C";
01086 
01087    MakeRealCode(outName.Data(),"",opt);
01088 }
01089 
01090 //____________________________________________________________________
01091 void TPrincipal::MakeHistograms(const char *name, Option_t *opt)
01092 {
01093    // Make histograms of the result of the analysis.
01094    // The option string say which histograms to create
01095    //      X         Histogram original data
01096    //      P         Histogram principal components corresponding to
01097    //                original data
01098    //      D         Histogram the difference between the original data
01099    //                and the projection of principal unto a lower
01100    //                dimensional subspace (2D histograms)
01101    //      E         Histogram the eigenvalues
01102    //      S         Histogram the square of the residues
01103    //                (see TPrincipal::SumOfSquareResidues)
01104    // The histograms will be named <name>_<type><number>, where <name>
01105    // is the first argument, <type> is one of X,P,D,E,S, and <number>
01106    // is the variable.
01107    Bool_t makeX  = kFALSE;
01108    Bool_t makeD  = kFALSE;
01109    Bool_t makeP  = kFALSE;
01110    Bool_t makeE  = kFALSE;
01111    Bool_t makeS  = kFALSE;
01112 
01113    Int_t len     = strlen(opt);
01114    Int_t i,j,k;
01115    for (i = 0; i < len; i++) {
01116       switch (opt[i]) {
01117          case 'X':
01118          case 'x':
01119             if (fStoreData)
01120                makeX = kTRUE;
01121             break;
01122          case 'd':
01123          case 'D':
01124             if (fStoreData)
01125                makeD = kTRUE;
01126             break;
01127          case 'P':
01128          case 'p':
01129             if (fStoreData)
01130                makeP = kTRUE;
01131             break;
01132          case 'E':
01133          case 'e':
01134             makeE = kTRUE;
01135             break;
01136          case 's':
01137          case 'S':
01138             if (fStoreData)
01139                makeS = kTRUE;
01140             break;
01141          default:
01142             Warning("MakeHistograms","Unknown option: %c",opt[i]);
01143       }
01144    }
01145 
01146    // If no option was given, then exit gracefully
01147    if (!makeX && !makeD && !makeP && !makeE && !makeS)
01148       return;
01149 
01150    // If the list of histograms doesn't exist, create it.
01151    if (!fHistograms)
01152       fHistograms = new TList;
01153 
01154    // Don't create the histograms if they are already in the TList.
01155    if (makeX && fHistograms->FindObject(Form("%s_x000",name)))
01156       makeX = kFALSE;
01157    if (makeD && fHistograms->FindObject(Form("%s_d000",name)))
01158       makeD = kFALSE;
01159    if (makeP && fHistograms->FindObject(Form("%s_p000",name)))
01160       makeP = kFALSE;
01161    if (makeE && fHistograms->FindObject(Form("%s_e",name)))
01162       makeE = kFALSE;
01163    if (makeS && fHistograms->FindObject(Form("%s_s",name)))
01164       makeS = kFALSE;
01165 
01166    TH1F **hX  = 0;
01167    TH2F **hD  = 0;
01168    TH1F **hP  = 0;
01169    TH1F *hE   = 0;
01170    TH1F *hS   = 0;
01171 
01172    // Initialize the arrays of histograms needed
01173    if (makeX)
01174       hX = new TH1F * [fNumberOfVariables];
01175 
01176    if (makeD)
01177       hD = new TH2F * [fNumberOfVariables];
01178 
01179    if (makeP)
01180       hP = new TH1F * [fNumberOfVariables];
01181 
01182    if (makeE){
01183       hE = new TH1F(Form("%s_e",name), "Eigenvalues of Covariance matrix",
01184          fNumberOfVariables,0,fNumberOfVariables);
01185       hE->SetXTitle("Eigenvalue");
01186       fHistograms->Add(hE);
01187    }
01188 
01189    if (makeS) {
01190       hS = new TH1F(Form("%s_s",name),"E_{N}",
01191          fNumberOfVariables-1,1,fNumberOfVariables);
01192       hS->SetXTitle("N");
01193       hS->SetYTitle("#sum_{i=1}^{M} (x_{i} - x'_{N,i})^{2}");
01194       fHistograms->Add(hS);
01195    }
01196 
01197    // Initialize sub elements of the histogram arrays
01198    for (i = 0; i < fNumberOfVariables; i++) {
01199       if (makeX) {
01200          // We allow 4 sigma spread in the original data in our
01201          // histogram.
01202          Double_t xlowb  = fMeanValues(i) - 4 * fSigmas(i);
01203          Double_t xhighb = fMeanValues(i) + 4 * fSigmas(i);
01204          Int_t    xbins  = fNumberOfDataPoints/100;
01205          hX[i]           = new TH1F(Form("%s_x%03d", name, i),
01206             Form("Pattern space, variable %d", i),
01207             xbins,xlowb,xhighb);
01208          hX[i]->SetXTitle(Form("x_{%d}",i));
01209          fHistograms->Add(hX[i]);
01210       }
01211 
01212       if(makeD) {
01213          // The upper limit below is arbitrary!!!
01214          Double_t dlowb  = 0;
01215          Double_t dhighb = 20;
01216          Int_t    dbins  = fNumberOfDataPoints/100;
01217          hD[i]           = new TH2F(Form("%s_d%03d", name, i),
01218             Form("Distance from pattern to "
01219             "feature space, variable %d", i),
01220             dbins,dlowb,dhighb,
01221             fNumberOfVariables-1,
01222             1,
01223             fNumberOfVariables);
01224          hD[i]->SetXTitle(Form("|x_{%d} - x'_{%d,N}|/#sigma_{%d}",i,i,i));
01225          hD[i]->SetYTitle("N");
01226          fHistograms->Add(hD[i]);
01227       }
01228 
01229       if(makeP) {
01230          // For some reason, the trace of the none-scaled matrix
01231          // (see TPrincipal::MakeNormalised) should enter here. Taken
01232          // from LINTRA code.
01233          Double_t et = TMath::Abs(fEigenValues(i) * fTrace);
01234          Double_t plowb   = -10 * TMath::Sqrt(et);
01235          Double_t phighb  = -plowb;
01236          Int_t    pbins   = 100;
01237          hP[i]            = new TH1F(Form("%s_p%03d", name, i),
01238             Form("Feature space, variable %d", i),
01239             pbins,plowb,phighb);
01240          hP[i]->SetXTitle(Form("p_{%d}",i));
01241          fHistograms->Add(hP[i]);
01242       }
01243 
01244       if (makeE)
01245          // The Eigenvector histogram is easy
01246          hE->Fill(i,fEigenValues(i));
01247 
01248    }
01249    if (!makeX && !makeP && !makeD && !makeS)
01250       return;
01251 
01252    Double_t *x = 0;
01253    Double_t *p = new Double_t[fNumberOfVariables];
01254    Double_t *d = new Double_t[fNumberOfVariables];
01255    for (i = 0; i < fNumberOfDataPoints; i++) {
01256 
01257       // Zero arrays
01258       for (j = 0; j < fNumberOfVariables; j++)
01259          p[j] = d[j] = 0;
01260 
01261       // update the original data histogram
01262       x  = (Double_t*)(GetRow(i));
01263 
01264       if (makeP||makeD||makeS)
01265          // calculate the corresponding principal component
01266          X2P(x,p);
01267 
01268       if (makeD || makeS) {
01269          // Calculate the difference between the original data, and the
01270          // same project onto principal components, and then to a lower
01271          // dimensional sub-space
01272          for (j = fNumberOfVariables; j > 0; j--) {
01273             P2X(p,d,j);
01274 
01275             for (k = 0; k < fNumberOfVariables; k++) {
01276                // We use the absolute value of the difference!
01277                d[k] = x[k] - d[k];
01278 
01279                if (makeS)
01280                   hS->Fill(j,d[k]*d[k]);
01281 
01282                if (makeD) {
01283                   d[k] = TMath::Abs(d[k]) / (fIsNormalised ? fSigmas(k) : 1);
01284                   (hD[k])->Fill(d[k],j);
01285                }
01286             }
01287          }
01288       }
01289 
01290       if (makeX||makeP) {
01291          // If we are asked to make any of these histograms, we have to
01292          // go here
01293          for (j = 0; j < fNumberOfVariables; j++) {
01294             if (makeX)
01295                (hX[j])->Fill(x[j]);
01296 
01297             if (makeP)
01298                (hP[j])->Fill(p[j]);
01299          }
01300       }
01301    }
01302    // Clean up
01303    if (hX)
01304       delete [] hX;
01305    if (hD)
01306       delete [] hD;
01307    if (hP)
01308       delete [] hP;
01309    if (d)
01310       delete [] d;
01311    if (p)
01312       delete [] p;
01313 
01314    // Normalize the residues
01315    if (makeS)
01316       hS->Scale(Double_t(1.)/fNumberOfDataPoints);
01317 }
01318 
01319 //____________________________________________________________________
01320 void TPrincipal::MakeNormalised()
01321 {
01322    // PRIVATE METHOD: Normalize the covariance matrix
01323 
01324    Int_t i,j;
01325    for (i = 0; i < fNumberOfVariables; i++) {
01326       fSigmas(i) = TMath::Sqrt(fCovarianceMatrix(i,i));
01327       if (fIsNormalised)
01328          for (j = 0; j <= i; j++)
01329             fCovarianceMatrix(i,j) /= (fSigmas(i) * fSigmas(j));
01330 
01331       fTrace += fCovarianceMatrix(i,i);
01332    }
01333 
01334    // Fill remaining parts of matrix, and scale.
01335    for (i = 0; i < fNumberOfVariables; i++)
01336       for (j = 0; j <= i; j++) {
01337          fCovarianceMatrix(i,j) /= fTrace;
01338          fCovarianceMatrix(j,i) = fCovarianceMatrix(i,j);
01339       }
01340 
01341 }
01342 
01343 //____________________________________________________________________
01344 void TPrincipal::MakeMethods(const char *classname, Option_t *opt)
01345 {
01346    // Generate the file <classname>PCA.cxx which contains the
01347    // implementation of two methods:
01348    //
01349    //    void <classname>::X2P(Double_t *x, Double *p)
01350    //    void <classname>::P2X(Double_t *p, Double *x, Int_t nTest)
01351    //
01352    // which does the same as  TPrincipal::X2P and TPrincipal::P2X
01353    // respectivly. Please refer to these methods.
01354    //
01355    // Further, the public static members:
01356    //
01357    //    Int_t    <classname>::fgNVariables
01358    //    Double_t <classname>::fgEigenValues[]
01359    //    Double_t <classname>::fgEigenVectors[]
01360    //    Double_t <classname>::fgMeanValues[]
01361    //    Double_t <classname>::fgSigmaValues[]
01362    //
01363    // are initialized, and assumed to exist. The class declaration is
01364    // assumed to be in <classname>.h and assumed to be provided by the
01365    // user.
01366    //
01367    // See TPrincipal::MakeRealCode for a list of options
01368    //
01369    // The minimal class definition is:
01370    //
01371    //   class <classname> {
01372    //   public:
01373    //     static Int_t    fgNVariables;
01374    //     static Double_t fgEigenVectors[];
01375    //     static Double_t fgEigenValues[];
01376    //     static Double_t fgMeanValues[];
01377    //     static Double_t fgSigmaValues[];
01378    //
01379    //     void X2P(Double_t *x, Double_t *p);
01380    //     void P2X(Double_t *p, Double_t *x, Int_t nTest);
01381    //   };
01382    //
01383    // Whether the methods <classname>::X2P and <classname>::P2X should
01384    // be static or not, is up to the user.
01385 
01386 
01387    MakeRealCode(Form("%sPCA.cxx", classname), classname, opt);
01388 }
01389 
01390 
01391 //____________________________________________________________________
01392 void TPrincipal::MakePrincipals()
01393 {
01394    // Perform the principal components analysis.
01395    // This is done in several stages in the TMatrix::EigenVectors method:
01396    // * Transform the covariance matrix into a tridiagonal matrix.
01397    // * Find the eigenvalues and vectors of the tridiagonal matrix.
01398 
01399    // Normalize covariance matrix
01400    MakeNormalised();
01401 
01402    TMatrixDSym sym; sym.Use(fCovarianceMatrix.GetNrows(),fCovarianceMatrix.GetMatrixArray());
01403    TMatrixDSymEigen eigen(sym);
01404    fEigenVectors = eigen.GetEigenVectors();
01405    fEigenValues  = eigen.GetEigenValues();
01406    //make sure that eigenvalues are positive
01407    for (Int_t i = 0; i < fNumberOfVariables; i++) {
01408       if (fEigenValues[i] < 0) fEigenValues[i] = -fEigenValues[i];
01409    }
01410 }
01411 
01412 //____________________________________________________________________
01413 void TPrincipal::MakeRealCode(const char *filename, const char *classname,
01414                               Option_t *)
01415 {
01416    // PRIVATE METHOD:
01417    // This is the method that actually generates the code for the
01418    // transformations to and from feature space and pattern space
01419    // It's called by TPrincipal::MakeCode and TPrincipal::MakeMethods.
01420    //
01421    // The options are: NONE so far
01422 
01423    Bool_t  isMethod = (classname[0] == '\0' ? kFALSE : kTRUE);
01424    const char *prefix   = (isMethod ? Form("%s::", classname) : "");
01425    const char *cv_qual  = (isMethod ? "" : "static ");
01426 
01427    ofstream outFile(filename,ios::out|ios::trunc);
01428    if (!outFile) {
01429       Error("MakeRealCode","couldn't open output file '%s'",filename);
01430       return;
01431    }
01432 
01433    cout << "Writing on file \"" << filename << "\" ... " << flush;
01434    //
01435    // Write header of file
01436    //
01437    // Emacs mode line ;-)
01438    outFile << "// -*- mode: c++ -*-" << endl;
01439    // Info about creator
01440    outFile << "// " << endl
01441       << "// File " << filename
01442       << " generated by TPrincipal::MakeCode" << endl;
01443    // Time stamp
01444    TDatime date;
01445    outFile << "// on " << date.AsString() << endl;
01446    // ROOT version info
01447    outFile << "// ROOT version " << gROOT->GetVersion()
01448       << endl << "//" << endl;
01449    // General information on the code
01450    outFile << "// This file contains the functions " << endl
01451       << "//" << endl
01452       << "//    void  " << prefix
01453       << "X2P(Double_t *x, Double_t *p); " << endl
01454       << "//    void  " << prefix
01455       << "P2X(Double_t *p, Double_t *x, Int_t nTest);"
01456       << endl << "//" << endl
01457       << "// The first for transforming original data x in " << endl
01458       << "// pattern space, to principal components p in " << endl
01459       << "// feature space. The second function is for the" << endl
01460       << "// inverse transformation, but using only nTest" << endl
01461       << "// of the principal components in the expansion" << endl
01462       << "// " << endl
01463       << "// See TPrincipal class documentation for more "
01464       << "information " << endl << "// " << endl;
01465    // Header files
01466    outFile << "#ifndef __CINT__" << endl;
01467    if (isMethod)
01468       // If these are methods, we need the class header
01469       outFile << "#include \"" << classname << ".h\"" << endl;
01470    else
01471       // otherwise, we need the typedefs of Int_t and Double_t
01472       outFile << "#include <Rtypes.h> // needed for Double_t etc" << endl;
01473    // Finish the preprocessor block
01474    outFile << "#endif" << endl << endl;
01475 
01476    //
01477    // Now for the data
01478    //
01479    // We make the Eigenvector matrix, Eigenvalue vector, Sigma vector,
01480    // and Mean value vector static, since all are needed in both
01481    // functions. Also ,the number of variables are stored in a static
01482    // variable.
01483    outFile << "//" << endl
01484       << "// Static data variables"  << endl
01485       << "//" << endl;
01486    outFile << cv_qual << "Int_t    " << prefix << "gNVariables = "
01487       << fNumberOfVariables << ";" << endl;
01488 
01489    // Assign the values to the Eigenvector matrix. The elements are
01490    // stored row-wise, that is element
01491    //    M[i][j] = e[i * nVariables + j]
01492    // where i and j are zero-based.
01493    outFile << endl << "// Assignment of eigenvector matrix." << endl
01494       << "// Elements are stored row-wise, that is" << endl
01495       << "//    M[i][j] = e[i * nVariables + j] " << endl
01496       << "// where i and j are zero-based" << endl;
01497    outFile << cv_qual << "Double_t " << prefix
01498       << "gEigenVectors[] = {" << flush;
01499    Int_t i,j;
01500    for (i = 0; i < fNumberOfVariables; i++) {
01501       for (j = 0; j < fNumberOfVariables; j++) {
01502          Int_t index = i * fNumberOfVariables + j;
01503          outFile << (index != 0 ? "," : "" ) << endl
01504             << "  "  << fEigenVectors(i,j) << flush;
01505       }
01506    }
01507    outFile << "};" << endl << endl;
01508 
01509    // Assignment to eigenvalue vector. Zero-based.
01510    outFile << "// Assignment to eigen value vector. Zero-based." << endl;
01511    outFile << cv_qual << "Double_t " << prefix
01512       << "gEigenValues[] = {" << flush;
01513    for (i = 0; i < fNumberOfVariables; i++)
01514       outFile << (i != 0 ? "," : "") << endl
01515       << "  " << fEigenValues(i) << flush;
01516    outFile << endl << "};" << endl << endl;
01517 
01518    // Assignment to mean Values vector. Zero-based.
01519    outFile << "// Assignment to mean value vector. Zero-based." << endl;
01520    outFile << cv_qual << "Double_t " << prefix
01521       << "gMeanValues[] = {" << flush;
01522    for (i = 0; i < fNumberOfVariables; i++)
01523       outFile << (i != 0 ? "," : "") << endl
01524       << "  " << fMeanValues(i) << flush;
01525    outFile << endl << "};" << endl << endl;
01526 
01527    // Assignment to mean Values vector. Zero-based.
01528    outFile << "// Assignment to sigma value vector. Zero-based." << endl;
01529    outFile << cv_qual << "Double_t " << prefix
01530       << "gSigmaValues[] = {" << flush;
01531    for (i = 0; i < fNumberOfVariables; i++)
01532       outFile << (i != 0 ? "," : "") << endl
01533       << "  " << (fIsNormalised ? fSigmas(i) : 1) << flush;
01534    //    << "  " << fSigmas(i) << flush;
01535    outFile << endl << "};" << endl << endl;
01536 
01537    //
01538    // Finally we reach the functions themselves
01539    //
01540    // First: void x2p(Double_t *x, Double_t *p);
01541    //
01542    outFile << "// " << endl
01543       << "// The "
01544       << (isMethod ? "method " : "function ")
01545       << "  void " << prefix
01546       << "X2P(Double_t *x, Double_t *p)"
01547       << endl << "// " << endl;
01548    outFile << "void " << prefix
01549       << "X2P(Double_t *x, Double_t *p) {" << endl
01550       << "  for (Int_t i = 0; i < gNVariables; i++) {" << endl
01551       << "    p[i] = 0;" << endl
01552       << "    for (Int_t j = 0; j < gNVariables; j++)" << endl
01553       << "      p[i] += (x[j] - gMeanValues[j]) " << endl
01554       << "        * gEigenVectors[j *  gNVariables + i] "
01555       << "/ gSigmaValues[j];" << endl << endl << "  }"
01556       << endl << "}" << endl << endl;
01557    //
01558    // Now: void p2x(Double_t *p, Double_t *x, Int_t nTest);
01559    //
01560    outFile << "// " << endl << "// The "
01561       << (isMethod ? "method " : "function ")
01562       << "  void " << prefix
01563       << "P2X(Double_t *p, Double_t *x, Int_t nTest)"
01564       << endl << "// " << endl;
01565    outFile << "void " << prefix
01566       << "P2X(Double_t *p, Double_t *x, Int_t nTest) {" << endl
01567       << "  for (Int_t i = 0; i < gNVariables; i++) {" << endl
01568       << "    x[i] = gMeanValues[i];" << endl
01569       << "    for (Int_t j = 0; j < nTest; j++)" << endl
01570       << "      x[i] += p[j] * gSigmaValues[i] " << endl
01571       << "        * gEigenVectors[i *  gNVariables + j];" << endl
01572       << "  }" << endl << "}" << endl << endl;
01573 
01574    // EOF
01575    outFile << "// EOF for " << filename << endl;
01576 
01577    // Close the file
01578    outFile.close();
01579 
01580    cout << "done" << endl;
01581 }
01582 
01583 //____________________________________________________________________
01584 void TPrincipal::P2X(const Double_t *p, Double_t *x, Int_t nTest)
01585 {
01586    // Calculate x as a function of nTest of the most significant
01587    // principal components p, and return it in x.
01588    // It's the users responsibility to make sure that both x and p are
01589    // of the right size (i.e., memory must be allocated for x).
01590 
01591    for (Int_t i = 0; i < fNumberOfVariables; i++){
01592       x[i] = fMeanValues(i);
01593       for (Int_t j = 0; j < nTest; j++)
01594          x[i] += p[j] * (fIsNormalised ? fSigmas(i) : 1)
01595          * fEigenVectors(i,j);
01596    }
01597 
01598 }
01599 
01600 //____________________________________________________________________
01601 void TPrincipal::Print(Option_t *opt) const
01602 {
01603    // Print the statistics
01604    // Options are
01605    //      M            Print mean values of original data
01606    //      S            Print sigma values of original data
01607    //      E            Print eigenvalues of covariance matrix
01608    //      V            Print eigenvectors of covariance matrix
01609    // Default is MSE
01610 
01611    Bool_t printV = kFALSE;
01612    Bool_t printM = kFALSE;
01613    Bool_t printS = kFALSE;
01614    Bool_t printE = kFALSE;
01615 
01616    Int_t len     = strlen(opt);
01617    for (Int_t i = 0; i < len; i++) {
01618       switch (opt[i]) {
01619          case 'V':
01620          case 'v':
01621             printV = kTRUE;
01622             break;
01623          case 'M':
01624          case 'm':
01625             printM = kTRUE;
01626             break;
01627          case 'S':
01628          case 's':
01629             printS = kTRUE;
01630             break;
01631          case 'E':
01632          case 'e':
01633             printE = kTRUE;
01634             break;
01635          default:
01636             Warning("Print", "Unknown option '%c'",opt[i]);
01637             break;
01638       }
01639    }
01640 
01641    if (printM||printS||printE) {
01642       cout << " Variable #  " << flush;
01643       if (printM)
01644          cout << "| Mean Value " << flush;
01645       if (printS)
01646          cout << "|   Sigma    " << flush;
01647       if (printE)
01648          cout << "| Eigenvalue" << flush;
01649       cout << endl;
01650 
01651       cout << "-------------" << flush;
01652       if (printM)
01653          cout << "+------------" << flush;
01654       if (printS)
01655          cout << "+------------" << flush;
01656       if (printE)
01657          cout << "+------------" << flush;
01658       cout << endl;
01659 
01660       for (Int_t i = 0; i < fNumberOfVariables; i++) {
01661          cout << setw(12) << i << " " << flush;
01662          if (printM)
01663             cout << "| " << setw(10) << setprecision(4)
01664             << fMeanValues(i) << " " << flush;
01665          if (printS)
01666             cout << "| " << setw(10) << setprecision(4)
01667             << fSigmas(i) << " " << flush;
01668          if (printE)
01669             cout << "| " << setw(10) << setprecision(4)
01670             << fEigenValues(i) << " " << flush;
01671          cout << endl;
01672       }
01673       cout << endl;
01674    }
01675 
01676    if(printV) {
01677       for (Int_t i = 0; i < fNumberOfVariables; i++) {
01678          cout << "Eigenvector # " << i << flush;
01679          TVectorD v(fNumberOfVariables);
01680          v = TMatrixDColumn_const(fEigenVectors,i);
01681          v.Print();
01682       }
01683    }
01684 }
01685 
01686 //____________________________________________________________________
01687 void TPrincipal::SumOfSquareResiduals(const Double_t *x, Double_t *s)
01688 {
01689    // PRIVATE METHOD:
01690    // Begin_html
01691    /*
01692     </PRE>
01693     Calculates the sum of the square residuals, that is
01694     <BR><P></P>
01695     <DIV ALIGN="CENTER">
01696 
01697     <!-- MATH
01698     \begin{displaymath}
01699     E_N = \sum_{i=0}^{P-1} \left(x_i - x^\prime_i\right)^2
01700     \end{displaymath}
01701     -->
01702 
01703 
01704     <IMG
01705     WIDTH="147" HEIGHT="58" BORDER="0"
01706     SRC="gif/principal_img52.gif"
01707     ALT="\begin{displaymath}
01708     E_N = \sum_{i=0}^{P-1} \left(x_i - x^\prime_i\right)^2
01709     \end{displaymath}">
01710     </DIV>
01711     <BR CLEAR="ALL">
01712     <P></P>
01713     where
01714     <!-- MATH
01715     $x^\prime_i = \sum_{j=i}^N p_i e_{n_j}$
01716     -->
01717     <IMG
01718     WIDTH="122" HEIGHT="40" ALIGN="MIDDLE" BORDER="0"
01719     SRC="gif/principal_img53.gif"
01720     ALT="$x^\prime_i = \sum_{j=i}^N p_i e_{n_j}$">, <IMG
01721     WIDTH="19" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
01722     SRC="gif/principal_img54.gif"
01723     ALT="$p_i$"> is the
01724     <IMG
01725     WIDTH="28" HEIGHT="23" ALIGN="BOTTOM" BORDER="0"
01726     SRC="gif/principal_img55.gif"
01727     ALT="$i^{\mbox{th}}$"> component of the principal vector, corresponding to
01728     <IMG
01729     WIDTH="20" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
01730     SRC="gif/principal_img56.gif"
01731     ALT="$x_i$">, the original data; I.e., the square distance to the space
01732     spanned by <IMG
01733     WIDTH="20" HEIGHT="15" ALIGN="BOTTOM" BORDER="0"
01734     SRC="gif/principal_img12.gif"
01735     ALT="$N$"> eigenvectors.
01736     <BR>
01737     <PRE>
01738    */
01739    // End_Html
01740    if (!x)
01741       return;
01742 
01743    Double_t p[100];
01744    Double_t xp[100];
01745 
01746    X2P(x,p);
01747    for (Int_t i = fNumberOfVariables-1; i >= 0; i--) {
01748       P2X(p,xp,i);
01749       for (Int_t j = 0; j < fNumberOfVariables; j++) {
01750          s[i] += (x[j] - xp[j])*(x[j] - xp[j]);
01751       }
01752    }
01753 }
01754 
01755 //____________________________________________________________________
01756 void TPrincipal::Test(Option_t *)
01757 {
01758    // Test the PCA, bye calculating the sum square of residuals
01759    // (see method SumOfSquareResiduals), and display the histogram
01760    MakeHistograms("pca","S");
01761 
01762    if (!fStoreData)
01763       return;
01764 
01765    TH1 *pca_s = 0;
01766    if (fHistograms) pca_s = (TH1*)fHistograms->FindObject("pca_s");
01767    if (!pca_s) {
01768       Warning("Test", "Couldn't get histogram of square residuals");
01769       return;
01770    }
01771 
01772    pca_s->Draw();
01773 }
01774 
01775 //____________________________________________________________________
01776 void TPrincipal::X2P(const Double_t *x, Double_t *p)
01777 {
01778    // Calculate the principal components from the original data vector
01779    // x, and return it in p.
01780    // It's the users responsibility to make sure that both x and p are
01781    // of the right size (i.e., memory must be allocated for p).
01782    for (Int_t i = 0; i < fNumberOfVariables; i++){
01783       p[i] = 0;
01784       for (Int_t j = 0; j < fNumberOfVariables; j++)
01785          p[i] += (x[j] - fMeanValues(j)) * fEigenVectors(j,i) /
01786          (fIsNormalised ? fSigmas(j) : 1);
01787    }
01788 
01789 }

Generated on Tue Jul 5 14:24:21 2011 for ROOT_528-00b_version by  doxygen 1.5.1