00001 // @(#)root/hist:$Id: TPrincipal.cxx 35247 2010-09-13 14:10:41Z brun $ 00002 // Author: Christian Holm Christensen 1/8/2000 00003 00004 /************************************************************************* 00005 * Copyright (C) 1995-2004, Rene Brun and Fons Rademakers. * 00006 * All rights reserved. * 00007 * * 00008 * For the licensing terms see $ROOTSYS/LICENSE. * 00009 * For the list of contributors see $ROOTSYS/README/CREDITS. * 00010 *************************************************************************/ 00011 00012 //____________________________________________________________________ 00013 //Begin_Html <!-- 00014 /* --> 00015 </pre> 00016 <H1><A NAME="SECTION00010000000000000000"></A> 00017 <A NAME="sec:lintra"></A> 00018 <BR> 00019 Principal Components Analysis (PCA) 00020 </H1> 00021 00022 <P> 00023 The current implementation is based on the LINTRA package from CERNLIB 00024 by R. Brun, H. Hansroul, and J. Kubler. 00025 The class has been implemented by Christian Holm Christensen in August 2000. 00026 00027 <P> 00028 00029 <H2><A NAME="SECTION00011000000000000000"></A> 00030 <A NAME="sec:intro1"></A> 00031 <BR> 00032 Introduction 00033 </H2> 00034 00035 <P> 00036 In many applications of various fields of research, the treatment of 00037 large amounts of data requires powerful techniques capable of rapid 00038 data reduction and analysis. Usually, the quantities most 00039 conveniently measured by the experimentalist, are not necessarily the 00040 most significant for classification and analysis of the data. It is 00041 then useful to have a way of selecting an optimal set of variables 00042 necessary for the recognition process and reducing the dimensionality 00043 of the problem, resulting in an easier classification procedure. 00044 00045 <P> 00046 This paper describes the implementation of one such method of 00047 feature selection, namely the principal components analysis. This 00048 multidimensional technique is well known in the field of pattern 00049 recognition and and its use in Particle Physics has been documented 00050 elsewhere (cf. H. Wind, <I>Function Parameterization</I>, CERN 00051 72-21). 00052 00053 <P> 00054 00055 <H2><A NAME="SECTION00012000000000000000"></A> 00056 <A NAME="sec:overview"></A> 00057 <BR> 00058 Overview 00059 </H2> 00060 00061 <P> 00062 Suppose we have prototypes which are trajectories of particles, 00063 passing through a spectrometer. If one measures the passage of the 00064 particle at say 8 fixed planes, the trajectory is described by an 00065 8-component vector: 00066 <BR><P></P> 00067 <DIV ALIGN="CENTER"> 00068 00069 <!-- MATH 00070 \begin{displaymath} 00071 \mathbf{x} = \left(x_0, x_1, \ldots, x_7\right) 00072 \end{displaymath} 00073 --> 00074 00075 00076 <IMG 00077 WIDTH="145" HEIGHT="31" BORDER="0" 00078 SRC="gif/principal_img1.gif" 00079 ALT="\begin{displaymath} 00080 \mathbf{x} = \left(x_0, x_1, \ldots, x_7\right) 00081 \end{displaymath}"> 00082 </DIV> 00083 <BR CLEAR="ALL"> 00084 <P></P> 00085 in 8-dimensional pattern space. 00086 00087 <P> 00088 One proceeds by generating a a representative tracks sample and 00089 building up the covariance matrix <IMG 00090 WIDTH="16" HEIGHT="16" ALIGN="BOTTOM" BORDER="0" 00091 SRC="gif/principal_img2.gif" 00092 ALT="$\mathsf{C}$">. Its eigenvectors and 00093 eigenvalues are computed by standard methods, and thus a new basis is 00094 obtained for the original 8-dimensional space the expansion of the 00095 prototypes, 00096 <BR><P></P> 00097 <DIV ALIGN="CENTER"> 00098 00099 <!-- MATH 00100 \begin{displaymath} 00101 \mathbf{x}_m = \sum^7_{i=0} a_{m_i} \mathbf{e}_i 00102 \quad 00103 \mbox{where} 00104 \quad 00105 a_{m_i} = \mathbf{x}^T\bullet\mathbf{e}_i 00106 \end{displaymath} 00107 --> 00108 00109 00110 <IMG 00111 WIDTH="295" HEIGHT="58" BORDER="0" 00112 SRC="gif/principal_img3.gif" 00113 ALT="\begin{displaymath} 00114 \mathbf{x}_m = \sum^7_{i=0} a_{m_i} \mathbf{e}_i 00115 \quad 00116 \mbox{where} 00117 \quad 00118 a_{m_i} = \mathbf{x}^T\bullet\mathbf{e}_i 00119 \end{displaymath}"> 00120 </DIV> 00121 <BR CLEAR="ALL"> 00122 <P></P> 00123 00124 <P> 00125 allows the study of the behavior of the coefficients <IMG 00126 WIDTH="31" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00127 SRC="gif/principal_img4.gif" 00128 ALT="$a_{m_i}$"> for all 00129 the tracks of the sample. The eigenvectors which are insignificant for 00130 the trajectory description in the expansion will have their 00131 corresponding coefficients <IMG 00132 WIDTH="31" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00133 SRC="gif/principal_img4.gif" 00134 ALT="$a_{m_i}$"> close to zero for all the 00135 prototypes. 00136 00137 <P> 00138 On one hand, a reduction of the dimensionality is then obtained by 00139 omitting these least significant vectors in the subsequent analysis. 00140 00141 <P> 00142 On the other hand, in the analysis of real data, these least 00143 significant variables(?) can be used for the pattern 00144 recognition problem of extracting the valid combinations of 00145 coordinates describing a true trajectory from the set of all possible 00146 wrong combinations. 00147 00148 <P> 00149 The program described here performs this principal components analysis 00150 on a sample of data provided by the user. It computes the covariance 00151 matrix, its eigenvalues ands corresponding eigenvectors and exhibits 00152 the behavior of the principal components (<IMG 00153 WIDTH="31" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00154 SRC="gif/principal_img4.gif" 00155 ALT="$a_{m_i}$">), thus providing 00156 to the user all the means of understanding his data. 00157 00158 <P> 00159 00160 <H2><A NAME="SECTION00013000000000000000"></A> 00161 <A NAME="sec:method"></A> 00162 <BR> 00163 Principal Components Method 00164 </H2> 00165 00166 <P> 00167 Let's consider a sample of <IMG 00168 WIDTH="23" HEIGHT="15" ALIGN="BOTTOM" BORDER="0" 00169 SRC="gif/principal_img5.gif" 00170 ALT="$M$"> prototypes each being characterized by 00171 <IMG 00172 WIDTH="18" HEIGHT="15" ALIGN="BOTTOM" BORDER="0" 00173 SRC="gif/principal_img6.gif" 00174 ALT="$P$"> variables 00175 <!-- MATH 00176 $x_0, x_1, \ldots, x_{P-1}$ 00177 --> 00178 <IMG 00179 WIDTH="107" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00180 SRC="gif/principal_img7.gif" 00181 ALT="$x_0, x_1, \ldots, x_{P-1}$">. Each prototype is a point, or a 00182 column vector, in a <IMG 00183 WIDTH="18" HEIGHT="15" ALIGN="BOTTOM" BORDER="0" 00184 SRC="gif/principal_img6.gif" 00185 ALT="$P$">-dimensional <I>pattern space</I>. 00186 <BR> 00187 <DIV ALIGN="RIGHT"> 00188 00189 00190 <!-- MATH 00191 \begin{equation} 00192 \mathbf{x} = \left[\begin{array}{c} 00193 x_0\\x_1\\\vdots\\x_{P-1}\end{array}\right]\,, 00194 \end{equation} 00195 --> 00196 00197 <TABLE WIDTH="100%" ALIGN="CENTER"> 00198 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><IMG 00199 WIDTH="102" HEIGHT="102" BORDER="0" 00200 SRC="gif/principal_img8.gif" 00201 ALT="\begin{displaymath} 00202 \mathbf{x} = \left[\begin{array}{c} 00203 x_0\\ x_1\\ \vdots\\ x_{P-1}\end{array}\right]\,, 00204 \end{displaymath}"></TD> 00205 <TD WIDTH=10 ALIGN="RIGHT"> 00206 (1)</TD></TR> 00207 </TABLE> 00208 <BR CLEAR="ALL"></DIV><P></P> 00209 where each <IMG 00210 WIDTH="23" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00211 SRC="gif/principal_img9.gif" 00212 ALT="$x_n$"> represents the particular value associated with the 00213 <IMG 00214 WIDTH="15" HEIGHT="16" ALIGN="BOTTOM" BORDER="0" 00215 SRC="gif/principal_img10.gif" 00216 ALT="$n$">-dimension. 00217 00218 <P> 00219 Those <IMG 00220 WIDTH="18" HEIGHT="15" ALIGN="BOTTOM" BORDER="0" 00221 SRC="gif/principal_img6.gif" 00222 ALT="$P$"> variables are the quantities accessible to the 00223 experimentalist, but are not necessarily the most significant for the 00224 classification purpose. 00225 00226 <P> 00227 The <I>Principal Components Method</I> consists of applying a 00228 <I>linear</I> transformation to the original variables. This 00229 transformation is described by an orthogonal matrix and is equivalent 00230 to a rotation of the original pattern space into a new set of 00231 coordinate vectors, which hopefully provide easier feature 00232 identification and dimensionality reduction. 00233 00234 <P> 00235 Let's define the covariance matrix: 00236 <BR> 00237 <DIV ALIGN="RIGHT"> 00238 00239 00240 <!-- MATH 00241 \begin{equation} 00242 \mathsf{C} = \left\langle\mathbf{y}\mathbf{y}^T\right\rangle 00243 \quad\mbox{where}\quad 00244 \mathbf{y} = \mathbf{x} - \left\langle\mathbf{x}\right\rangle\,, 00245 \end{equation} 00246 --> 00247 00248 <TABLE WIDTH="100%" ALIGN="CENTER"> 00249 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:C"></A><IMG 00250 WIDTH="267" HEIGHT="37" BORDER="0" 00251 SRC="gif/principal_img11.gif" 00252 ALT="\begin{displaymath} 00253 \mathsf{C} = \left\langle\mathbf{y}\mathbf{y}^T\right\rangl... 00254 ...athbf{y} = \mathbf{x} - \left\langle\mathbf{x}\right\rangle\,, 00255 \end{displaymath}"></TD> 00256 <TD WIDTH=10 ALIGN="RIGHT"> 00257 (2)</TD></TR> 00258 </TABLE> 00259 <BR CLEAR="ALL"></DIV><P></P> 00260 and the brackets indicate mean value over the sample of <IMG 00261 WIDTH="23" HEIGHT="15" ALIGN="BOTTOM" BORDER="0" 00262 SRC="gif/principal_img5.gif" 00263 ALT="$M$"> 00264 prototypes. 00265 00266 <P> 00267 This matrix <IMG 00268 WIDTH="16" HEIGHT="16" ALIGN="BOTTOM" BORDER="0" 00269 SRC="gif/principal_img2.gif" 00270 ALT="$\mathsf{C}$"> is real, positive definite, symmetric, and will 00271 have all its eigenvalues greater then zero. It will now be show that 00272 among the family of all the complete orthonormal bases of the pattern 00273 space, the base formed by the eigenvectors of the covariance matrix 00274 and belonging to the largest eigenvalues, corresponds to the most 00275 significant features of the description of the original prototypes. 00276 00277 <P> 00278 let the prototypes be expanded on into a set of <IMG 00279 WIDTH="20" HEIGHT="15" ALIGN="BOTTOM" BORDER="0" 00280 SRC="gif/principal_img12.gif" 00281 ALT="$N$"> basis vectors 00282 00283 <!-- MATH 00284 $\mathbf{e}_n, n=0,\ldots,N,N+1, \ldots, P-1$ 00285 --> 00286 <IMG 00287 WIDTH="233" HEIGHT="32" ALIGN="MIDDLE" BORDER="0" 00288 SRC="gif/principal_img13.gif" 00289 ALT="$\mathbf{e}_n, n=0,\ldots,N,N+1, \ldots, P-1$">, 00290 <BR> 00291 <DIV ALIGN="RIGHT"> 00292 00293 00294 <!-- MATH 00295 \begin{equation} 00296 \mathbf{y}_i = \sum^N_{i=0} a_{i_n} \mathbf{e}_n, 00297 \quad 00298 i = 1, \ldots, M, 00299 \quad 00300 N < P-1 00301 \end{equation} 00302 --> 00303 00304 <TABLE WIDTH="100%" ALIGN="CENTER"> 00305 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:yi"></A><IMG 00306 WIDTH="303" HEIGHT="58" BORDER="0" 00307 SRC="gif/principal_img14.gif" 00308 ALT="\begin{displaymath} 00309 \mathbf{y}_i = \sum^N_{i=0} a_{i_n} \mathbf{e}_n, 00310 \quad 00311 i = 0, \ldots, M, 00312 \quad 00313 N < P-1 00314 \end{displaymath}"></TD> 00315 <TD WIDTH=10 ALIGN="RIGHT"> 00316 (3)</TD></TR> 00317 </TABLE> 00318 <BR CLEAR="ALL"></DIV><P></P> 00319 00320 <P> 00321 The `best' feature coordinates <IMG 00322 WIDTH="23" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00323 SRC="gif/principal_img15.gif" 00324 ALT="$\mathbf{e}_n$">, spanning a <I>feature 00325 space</I>, will be obtained by minimizing the error due to this 00326 truncated expansion, i.e., 00327 <BR> 00328 <DIV ALIGN="RIGHT"> 00329 00330 00331 <!-- MATH 00332 \begin{equation} 00333 \min\left(E_N\right) = 00334 \min\left[\left\langle\left(\mathbf{y}_i - \sum^N_{i=0} a_{i_n} \mathbf{e}_n\right)^2\right\rangle\right] 00335 \end{equation} 00336 --> 00337 00338 <TABLE WIDTH="100%" ALIGN="CENTER"> 00339 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:mini"></A><IMG 00340 WIDTH="306" HEIGHT="65" BORDER="0" 00341 SRC="gif/principal_img16.gif" 00342 ALT="\begin{displaymath} 00343 \min\left(E_N\right) = 00344 \min\left[\left\langle\left(\mathb... 00345 ...\sum^N_{i=0} a_{i_n} \mathbf{e}_n\right)^2\right\rangle\right] 00346 \end{displaymath}"></TD> 00347 <TD WIDTH=10 ALIGN="RIGHT"> 00348 (4)</TD></TR> 00349 </TABLE> 00350 <BR CLEAR="ALL"></DIV><P></P> 00351 with the conditions: 00352 <BR> 00353 <DIV ALIGN="RIGHT"> 00354 00355 00356 <!-- MATH 00357 \begin{equation} 00358 \mathbf{e}_k\bullet\mathbf{e}_j = \delta_{jk} = 00359 \left\{\begin{array}{rcl} 00360 1 & \mbox{for} & k = j\\ 00361 0 & \mbox{for} & k \neq j 00362 \end{array}\right. 00363 \end{equation} 00364 --> 00365 00366 <TABLE WIDTH="100%" ALIGN="CENTER"> 00367 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:ortocond"></A><IMG 00368 WIDTH="240" HEIGHT="54" BORDER="0" 00369 SRC="gif/principal_img17.gif" 00370 ALT="\begin{displaymath} 00371 \mathbf{e}_k\bullet\mathbf{e}_j = \delta_{jk} = 00372 \left\{\b... 00373 ...for} & k = j\\ 00374 0 & \mbox{for} & k \neq j 00375 \end{array}\right. 00376 \end{displaymath}"></TD> 00377 <TD WIDTH=10 ALIGN="RIGHT"> 00378 (5)</TD></TR> 00379 </TABLE> 00380 <BR CLEAR="ALL"></DIV><P></P> 00381 00382 <P> 00383 Multiplying (<A HREF="prin_node1.html#eq:yi">3</A>) by 00384 <!-- MATH 00385 $\mathbf{e}^T_n$ 00386 --> 00387 <IMG 00388 WIDTH="24" HEIGHT="38" ALIGN="MIDDLE" BORDER="0" 00389 SRC="gif/principal_img18.gif" 00390 ALT="$\mathbf{e}^T_n$"> using (<A HREF="prin_node1.html#eq:ortocond">5</A>), 00391 we get 00392 <BR> 00393 <DIV ALIGN="RIGHT"> 00394 00395 00396 <!-- MATH 00397 \begin{equation} 00398 a_{i_n} = \mathbf{y}_i^T\bullet\mathbf{e}_n\,, 00399 \end{equation} 00400 --> 00401 00402 <TABLE WIDTH="100%" ALIGN="CENTER"> 00403 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:ai"></A><IMG 00404 WIDTH="108" HEIGHT="31" BORDER="0" 00405 SRC="gif/principal_img19.gif" 00406 ALT="\begin{displaymath} 00407 a_{i_n} = \mathbf{y}_i^T\bullet\mathbf{e}_n\,, 00408 \end{displaymath}"></TD> 00409 <TD WIDTH=10 ALIGN="RIGHT"> 00410 (6)</TD></TR> 00411 </TABLE> 00412 <BR CLEAR="ALL"></DIV><P></P> 00413 so the error becomes 00414 <BR> 00415 <DIV ALIGN="CENTER"><A NAME="eq:error"></A> 00416 00417 <!-- MATH 00418 \begin{eqnarray} 00419 E_N &=& 00420 \left\langle\left[\sum_{n=N+1}^{P-1} a_{i_n}\mathbf{e}_n\right]^2\right\rangle\nonumber\\ 00421 &=& 00422 \left\langle\left[\sum_{n=N+1}^{P-1} \mathbf{y}_i^T\bullet\mathbf{e}_n\mathbf{e}_n\right]^2\right\rangle\nonumber\\ 00423 &=& 00424 \left\langle\sum_{n=N+1}^{P-1} \mathbf{e}_n^T\mathbf{y}_i\mathbf{y}_i^T\mathbf{e}_n\right\rangle\nonumber\\ 00425 &=& 00426 \sum_{n=N+1}^{P-1} \mathbf{e}_n^T\mathsf{C}\mathbf{e}_n 00427 \end{eqnarray} 00428 --> 00429 00430 <TABLE ALIGN="CENTER" CELLPADDING="0" WIDTH="100%"> 00431 <TR VALIGN="MIDDLE"><TD NOWRAP ALIGN="RIGHT"><IMG 00432 WIDTH="30" HEIGHT="32" ALIGN="MIDDLE" BORDER="0" 00433 SRC="gif/principal_img20.gif" 00434 ALT="$\displaystyle E_N$"></TD> 00435 <TD ALIGN="CENTER" NOWRAP><IMG 00436 WIDTH="18" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00437 SRC="gif/principal_img21.gif" 00438 ALT="$\textstyle =$"></TD> 00439 <TD ALIGN="LEFT" NOWRAP><IMG 00440 WIDTH="151" HEIGHT="80" ALIGN="MIDDLE" BORDER="0" 00441 SRC="gif/principal_img22.gif" 00442 ALT="$\displaystyle \left\langle\left[\sum_{n=N+1}^{P-1} a_{i_n}\mathbf{e}_n\right]^2\right\rangle$"></TD> 00443 <TD WIDTH=10 ALIGN="RIGHT"> 00444 </TD></TR> 00445 <TR VALIGN="MIDDLE"><TD NOWRAP ALIGN="RIGHT"> </TD> 00446 <TD ALIGN="CENTER" NOWRAP><IMG 00447 WIDTH="18" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00448 SRC="gif/principal_img21.gif" 00449 ALT="$\textstyle =$"></TD> 00450 <TD ALIGN="LEFT" NOWRAP><IMG 00451 WIDTH="184" HEIGHT="80" ALIGN="MIDDLE" BORDER="0" 00452 SRC="gif/principal_img23.gif" 00453 ALT="$\displaystyle \left\langle\left[\sum_{n=N+1}^{P-1} \mathbf{y}_i^T\bullet\mathbf{e}_n\mathbf{e}_n\right]^2\right\rangle$"></TD> 00454 <TD WIDTH=10 ALIGN="RIGHT"> 00455 </TD></TR> 00456 <TR VALIGN="MIDDLE"><TD NOWRAP ALIGN="RIGHT"> </TD> 00457 <TD ALIGN="CENTER" NOWRAP><IMG 00458 WIDTH="18" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00459 SRC="gif/principal_img21.gif" 00460 ALT="$\textstyle =$"></TD> 00461 <TD ALIGN="LEFT" NOWRAP><IMG 00462 WIDTH="156" HEIGHT="69" ALIGN="MIDDLE" BORDER="0" 00463 SRC="gif/principal_img24.gif" 00464 ALT="$\displaystyle \left\langle\sum_{n=N+1}^{P-1} \mathbf{e}_n^T\mathbf{y}_i\mathbf{y}_i^T\mathbf{e}_n\right\rangle$"></TD> 00465 <TD WIDTH=10 ALIGN="RIGHT"> 00466 </TD></TR> 00467 <TR VALIGN="MIDDLE"><TD NOWRAP ALIGN="RIGHT"> </TD> 00468 <TD ALIGN="CENTER" NOWRAP><IMG 00469 WIDTH="18" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00470 SRC="gif/principal_img21.gif" 00471 ALT="$\textstyle =$"></TD> 00472 <TD ALIGN="LEFT" NOWRAP><IMG 00473 WIDTH="104" HEIGHT="69" ALIGN="MIDDLE" BORDER="0" 00474 SRC="gif/principal_img25.gif" 00475 ALT="$\displaystyle \sum_{n=N+1}^{P-1} \mathbf{e}_n^T\mathsf{C}\mathbf{e}_n$"></TD> 00476 <TD WIDTH=10 ALIGN="RIGHT"> 00477 (7)</TD></TR> 00478 </TABLE></DIV> 00479 <BR CLEAR="ALL"><P></P> 00480 00481 <P> 00482 The minimization of the sum in (<A HREF="prin_node1.html#eq:error">7</A>) is obtained when each 00483 term 00484 <!-- MATH 00485 $\mathbf{e}_n^\mathsf{C}\mathbf{e}_n$ 00486 --> 00487 <IMG 00488 WIDTH="41" HEIGHT="38" ALIGN="MIDDLE" BORDER="0" 00489 SRC="gif/principal_img26.gif" 00490 ALT="$\mathbf{e}_n^\mathsf{C}\mathbf{e}_n$"> is minimum, since <IMG 00491 WIDTH="16" HEIGHT="16" ALIGN="BOTTOM" BORDER="0" 00492 SRC="gif/principal_img2.gif" 00493 ALT="$\mathsf{C}$"> is 00494 positive definite. By the method of Lagrange multipliers, and the 00495 condition (<A HREF="prin_node1.html#eq:ortocond">5</A>), we get 00496 00497 <P> 00498 00499 <BR> 00500 <DIV ALIGN="RIGHT"> 00501 00502 00503 <!-- MATH 00504 \begin{equation} 00505 E_N = \sum^{P-1}_{n=N+1} \left(\mathbf{e}_n^T\mathsf{C}\mathbf{e}_n - 00506 l_n\mathbf{e}_n^T\bullet\mathbf{e}_n + l_n\right) 00507 \end{equation} 00508 --> 00509 00510 <TABLE WIDTH="100%" ALIGN="CENTER"> 00511 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:minerror"></A><IMG 00512 WIDTH="291" HEIGHT="60" BORDER="0" 00513 SRC="gif/principal_img27.gif" 00514 ALT="\begin{displaymath} 00515 E_N = \sum^{P-1}_{n=N+1} \left(\mathbf{e}_n^T\mathsf{C}\mathbf{e}_n - 00516 l_n\mathbf{e}_n^T\bullet\mathbf{e}_n + l_n\right) 00517 \end{displaymath}"></TD> 00518 <TD WIDTH=10 ALIGN="RIGHT"> 00519 (8)</TD></TR> 00520 </TABLE> 00521 <BR CLEAR="ALL"></DIV><P></P> 00522 The minimum condition 00523 <!-- MATH 00524 $\frac{dE_N}{d\mathbf{e}^T_n} = 0$ 00525 --> 00526 <IMG 00527 WIDTH="68" HEIGHT="40" ALIGN="MIDDLE" BORDER="0" 00528 SRC="gif/principal_img28.gif" 00529 ALT="$\frac{dE_N}{d\mathbf{e}^T_n} = 0$"> leads to the 00530 equation 00531 <BR> 00532 <DIV ALIGN="RIGHT"> 00533 00534 00535 <!-- MATH 00536 \begin{equation} 00537 \mathsf{C}\mathbf{e}_n = l_n\mathbf{e}_n\,, 00538 \end{equation} 00539 --> 00540 00541 <TABLE WIDTH="100%" ALIGN="CENTER"> 00542 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:Ce"></A><IMG 00543 WIDTH="91" HEIGHT="30" BORDER="0" 00544 SRC="gif/principal_img29.gif" 00545 ALT="\begin{displaymath} 00546 \mathsf{C}\mathbf{e}_n = l_n\mathbf{e}_n\,, 00547 \end{displaymath}"></TD> 00548 <TD WIDTH=10 ALIGN="RIGHT"> 00549 (9)</TD></TR> 00550 </TABLE> 00551 <BR CLEAR="ALL"></DIV><P></P> 00552 which shows that <IMG 00553 WIDTH="23" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00554 SRC="gif/principal_img15.gif" 00555 ALT="$\mathbf{e}_n$"> is an eigenvector of the covariance 00556 matrix <IMG 00557 WIDTH="16" HEIGHT="16" ALIGN="BOTTOM" BORDER="0" 00558 SRC="gif/principal_img2.gif" 00559 ALT="$\mathsf{C}$"> with eigenvalue <IMG 00560 WIDTH="19" HEIGHT="32" ALIGN="MIDDLE" BORDER="0" 00561 SRC="gif/principal_img30.gif" 00562 ALT="$l_n$">. The estimated minimum error is 00563 then given by 00564 <BR> 00565 <DIV ALIGN="RIGHT"> 00566 00567 00568 <!-- MATH 00569 \begin{equation} 00570 E_N \sim \sum^{P-1}_{n=N+1} \mathbf{e}_n^T\bullet l_n\mathbf{e}_n 00571 = \sum^{P-1}_{n=N+1} l_n\,, 00572 \end{equation} 00573 --> 00574 00575 <TABLE WIDTH="100%" ALIGN="CENTER"> 00576 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:esterror"></A><IMG 00577 WIDTH="264" HEIGHT="60" BORDER="0" 00578 SRC="gif/principal_img31.gif" 00579 ALT="\begin{displaymath} 00580 E_N \sim \sum^{P-1}_{n=N+1} \mathbf{e}_n^T\bullet l_n\mathbf{e}_n 00581 = \sum^{P-1}_{n=N+1} l_n\,, 00582 \end{displaymath}"></TD> 00583 <TD WIDTH=10 ALIGN="RIGHT"> 00584 (10)</TD></TR> 00585 </TABLE> 00586 <BR CLEAR="ALL"></DIV><P></P> 00587 where 00588 <!-- MATH 00589 $l_n,\,n=N+1,\ldots,P$ 00590 --> 00591 <IMG 00592 WIDTH="161" HEIGHT="32" ALIGN="MIDDLE" BORDER="0" 00593 SRC="gif/principal_img32.gif" 00594 ALT="$l_n,\,n=N+1,\ldots,P-1$"> are the eigenvalues associated with the 00595 omitted eigenvectors in the expansion (<A HREF="prin_node1.html#eq:yi">3</A>). Thus, by choosing 00596 the <IMG 00597 WIDTH="20" HEIGHT="15" ALIGN="BOTTOM" BORDER="0" 00598 SRC="gif/principal_img12.gif" 00599 ALT="$N$"> largest eigenvalues, and their associated eigenvectors, the 00600 error <IMG 00601 WIDTH="30" HEIGHT="32" ALIGN="MIDDLE" BORDER="0" 00602 SRC="gif/principal_img33.gif" 00603 ALT="$E_N$"> is minimized. 00604 00605 <P> 00606 The transformation matrix to go from the pattern space to the feature 00607 space consists of the ordered eigenvectors 00608 00609 <!-- MATH 00610 $\mathbf{e}_1,\ldots,\mathbf{e}_P$ 00611 --> 00612 <IMG 00613 WIDTH="80" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 00614 SRC="gif/principal_img34.gif" 00615 ALT="$\mathbf{e}_0,\ldots,\mathbf{e}_{P-1}$"> for its columns 00616 <BR> 00617 <DIV ALIGN="RIGHT"> 00618 00619 00620 <!-- MATH 00621 \begin{equation} 00622 \mathsf{T} = \left[ 00623 \begin{array}{cccc} 00624 \mathbf{e}_0 & 00625 \mathbf{e}_1 & 00626 \vdots & 00627 \mathbf{e}_{P-1} 00628 \end{array}\right] 00629 = \left[ 00630 \begin{array}{cccc} 00631 \mathbf{e}_{0_0} & \mathbf{e}_{1_0} & \cdots & \mathbf{e}_{{P-1}_0}\\ 00632 \mathbf{e}_{0_1} & \mathbf{e}_{1_1} & \cdots & \mathbf{e}_{{P-1}_1}\\ 00633 \vdots & \vdots & \ddots & \vdots \\ 00634 \mathbf{e}_{0_{P-1}} & \mathbf{e}_{1_{P-1}} & \cdots & \mathbf{e}_{{P-1}_{P-1}}\\ 00635 \end{array}\right] 00636 \end{equation} 00637 --> 00638 00639 <TABLE WIDTH="100%" ALIGN="CENTER"> 00640 <TR VALIGN="MIDDLE"><TD ALIGN="CENTER" NOWRAP><A NAME="eq:trans"></A><IMG 00641 WIDTH="378" HEIGHT="102" BORDER="0" 00642 SRC="gif/principal_img35.gif" 00643 ALT="\begin{displaymath} 00644 \mathsf{T} = \left[ 00645 \begin{array}{cccc} 00646 \mathbf{e}_0 & 00647 \... 00648 ...bf{e}_{1_{P-1}} & \cdots & \mathbf{e}_{{P-1}_{P-1}}\\ 00649 \end{array}\right] 00650 \end{displaymath}"></TD> 00651 <TD WIDTH=10 ALIGN="RIGHT"> 00652 (11)</TD></TR> 00653 </TABLE> 00654 <BR CLEAR="ALL"></DIV><P></P> 00655 This is an orthogonal transformation, or rotation, of the pattern 00656 space and feature selection results in ignoring certain coordinates 00657 in the transformed space. 00658 <p> 00659 <DIV ALIGN="RIGHT"> 00660 Christian Holm<br> 00661 August 2000, CERN 00662 </DIV> 00663 <!--*/ 00664 // -->End_Html 00665 00666 // $Id: TPrincipal.cxx 35247 2010-09-13 14:10:41Z brun $ 00667 // $Date: 2006/05/24 14:55:26 $ 00668 // $Author: brun $ 00669 00670 #include "TPrincipal.h" 00671 00672 #include "TVectorD.h" 00673 #include "TMatrixD.h" 00674 #include "TMatrixDSymEigen.h" 00675 #include "TMath.h" 00676 #include "TList.h" 00677 #include "TH2.h" 00678 #include "TDatime.h" 00679 #include "TBrowser.h" 00680 #include "TROOT.h" 00681 #include "Riostream.h" 00682 00683 00684 ClassImp(TPrincipal); 00685 00686 //____________________________________________________________________ 00687 TPrincipal::TPrincipal() 00688 : fMeanValues(0), 00689 fSigmas(0), 00690 fCovarianceMatrix(1,1), 00691 fEigenVectors(1,1), 00692 fEigenValues(0), 00693 fOffDiagonal(0), 00694 fStoreData(kFALSE) 00695 { 00696 // Empty CTOR, Do not use. 00697 00698 fTrace = 0; 00699 fHistograms = 0; 00700 fIsNormalised = kFALSE; 00701 fNumberOfDataPoints = 0; 00702 fNumberOfVariables = 0; 00703 } 00704 00705 //____________________________________________________________________ 00706 TPrincipal::TPrincipal(Int_t nVariables, Option_t *opt) 00707 : fMeanValues(nVariables), 00708 fSigmas(nVariables), 00709 fCovarianceMatrix(nVariables,nVariables), 00710 fEigenVectors(nVariables,nVariables), 00711 fEigenValues(nVariables), 00712 fOffDiagonal(nVariables), 00713 fStoreData(kFALSE) 00714 { 00715 // Ctor. Argument is number of variables in the sample of data 00716 // Options are: 00717 // N Normalize the covariance matrix (default) 00718 // D Store input data (default) 00719 // 00720 // The created object is named "principal" by default. 00721 if (nVariables <= 1) { 00722 Error("TPrincipal", "You can't be serious - nVariables == 1!!!"); 00723 return; 00724 } 00725 00726 SetName("principal"); 00727 00728 fTrace = 0; 00729 fHistograms = 0; 00730 fIsNormalised = kFALSE; 00731 fNumberOfDataPoints = 0; 00732 fNumberOfVariables = nVariables; 00733 while (strlen(opt) > 0) { 00734 switch(*opt++) { 00735 case 'N': 00736 case 'n': 00737 fIsNormalised = kTRUE; 00738 break; 00739 case 'D': 00740 case 'd': 00741 fStoreData = kTRUE; 00742 break; 00743 default: 00744 break; 00745 } 00746 } 00747 00748 if (!fMeanValues.IsValid()) 00749 Error("TPrincipal","Couldn't create vector mean values"); 00750 if (!fSigmas.IsValid()) 00751 Error("TPrincipal","Couldn't create vector sigmas"); 00752 if (!fCovarianceMatrix.IsValid()) 00753 Error("TPrincipal","Couldn't create covariance matrix"); 00754 if (!fEigenVectors.IsValid()) 00755 Error("TPrincipal","Couldn't create eigenvector matrix"); 00756 if (!fEigenValues.IsValid()) 00757 Error("TPrincipal","Couldn't create eigenvalue vector"); 00758 if (!fOffDiagonal.IsValid()) 00759 Error("TPrincipal","Couldn't create offdiagonal vector"); 00760 if (fStoreData) { 00761 fUserData.ResizeTo(nVariables*1000); 00762 fUserData.Zero(); 00763 if (!fUserData.IsValid()) 00764 Error("TPrincipal","Couldn't create user data vector"); 00765 } 00766 } 00767 00768 //____________________________________________________________________ 00769 TPrincipal::TPrincipal(const TPrincipal& pr) : 00770 TNamed(pr), 00771 fNumberOfDataPoints(pr.fNumberOfDataPoints), 00772 fNumberOfVariables(pr.fNumberOfVariables), 00773 fMeanValues(pr.fMeanValues), 00774 fSigmas(pr.fSigmas), 00775 fCovarianceMatrix(pr.fCovarianceMatrix), 00776 fEigenVectors(pr.fEigenVectors), 00777 fEigenValues(pr.fEigenValues), 00778 fOffDiagonal(pr.fOffDiagonal), 00779 fUserData(pr.fUserData), 00780 fTrace(pr.fTrace), 00781 fHistograms(pr.fHistograms), 00782 fIsNormalised(pr.fIsNormalised), 00783 fStoreData(pr.fStoreData) 00784 { 00785 //copy constructor 00786 } 00787 00788 //____________________________________________________________________ 00789 TPrincipal& TPrincipal::operator=(const TPrincipal& pr) 00790 { 00791 //assignement operator 00792 if(this!=&pr) { 00793 TNamed::operator=(pr); 00794 fNumberOfDataPoints=pr.fNumberOfDataPoints; 00795 fNumberOfVariables=pr.fNumberOfVariables; 00796 fMeanValues=pr.fMeanValues; 00797 fSigmas=pr.fSigmas; 00798 fCovarianceMatrix=pr.fCovarianceMatrix; 00799 fEigenVectors=pr.fEigenVectors; 00800 fEigenValues=pr.fEigenValues; 00801 fOffDiagonal=pr.fOffDiagonal; 00802 fUserData=pr.fUserData; 00803 fTrace=pr.fTrace; 00804 fHistograms=pr.fHistograms; 00805 fIsNormalised=pr.fIsNormalised; 00806 fStoreData=pr.fStoreData; 00807 } 00808 return *this; 00809 } 00810 00811 //____________________________________________________________________ 00812 TPrincipal::~TPrincipal() 00813 { 00814 // destructor 00815 00816 if (fHistograms) { 00817 fHistograms->Delete(); 00818 delete fHistograms; 00819 } 00820 } 00821 00822 //____________________________________________________________________ 00823 void TPrincipal::AddRow(const Double_t *p) 00824 { 00825 // Begin_Html 00826 /* 00827 </PRE> 00828 Add a data point and update the covariance matrix. The input 00829 array must be <TT>fNumberOfVariables</TT> long. 00830 00831 <P> 00832 The Covariance matrix and mean values of the input data is caculated 00833 on the fly by the following equations: 00834 <BR><P></P> 00835 <DIV ALIGN="CENTER"> 00836 00837 <!-- MATH 00838 \begin{displaymath} 00839 \left<x_i\right>^{(0)} = x_{i0} 00840 \end{displaymath} 00841 --> 00842 00843 00844 <IMG 00845 WIDTH="90" HEIGHT="31" BORDER="0" 00846 SRC="gif/principal_img36.gif" 00847 ALT="\begin{displaymath} 00848 \left<x_i\right>^{(0)} = x_{i0} 00849 \end{displaymath}"> 00850 </DIV> 00851 <BR CLEAR="ALL"> 00852 <P></P> 00853 <BR><P></P> 00854 <DIV ALIGN="CENTER"> 00855 00856 <!-- MATH 00857 \begin{displaymath} 00858 \left<x_i\right>^{(n)} = \left<x_i\right>^{(n-1)} 00859 + \frac1n \left(x_{in} - \left<x_i\right>^{(n-1)}\right) 00860 \end{displaymath} 00861 --> 00862 00863 00864 <IMG 00865 WIDTH="302" HEIGHT="42" BORDER="0" 00866 SRC="gif/principal_img37.gif" 00867 ALT="\begin{displaymath} 00868 \left<x_i\right>^{(n)} = \left<x_i\right>^{(n-1)} 00869 + \frac1n \left(x_{in} - \left<x_i\right>^{(n-1)}\right) 00870 \end{displaymath}"> 00871 </DIV> 00872 <BR CLEAR="ALL"> 00873 <P></P> 00874 <BR><P></P> 00875 <DIV ALIGN="CENTER"> 00876 00877 <!-- MATH 00878 \begin{displaymath} 00879 C_{ij}^{(0)} = 0 00880 \end{displaymath} 00881 --> 00882 00883 00884 <IMG 00885 WIDTH="62" HEIGHT="34" BORDER="0" 00886 SRC="gif/principal_img38.gif" 00887 ALT="\begin{displaymath} 00888 C_{ij}^{(0)} = 0 00889 \end{displaymath}"> 00890 </DIV> 00891 <BR CLEAR="ALL"> 00892 <P></P> 00893 <BR><P></P> 00894 <DIV ALIGN="CENTER"> 00895 00896 <!-- MATH 00897 \begin{displaymath} 00898 C_{ij}^{(n)} = C_{ij}^{(n-1)} 00899 + \frac1{n-1}\left[\left(x_{in} - \left<x_i\right>^{(n)}\right) 00900 \left(x_{jn} - \left<x_j\right>^{(n)}\right)\right] 00901 - \frac1n C_{ij}^{(n-1)} 00902 \end{displaymath} 00903 --> 00904 00905 00906 <IMG 00907 WIDTH="504" HEIGHT="43" BORDER="0" 00908 SRC="gif/principal_img39.gif" 00909 ALT="\begin{displaymath} 00910 C_{ij}^{(n)} = C_{ij}^{(n-1)} 00911 + \frac1{n-1}\left[\left(x_{i... 00912 ...\left<x_j\right>^{(n)}\right)\right] 00913 - \frac1n C_{ij}^{(n-1)} 00914 \end{displaymath}"> 00915 </DIV> 00916 <BR CLEAR="ALL"> 00917 <P></P> 00918 since this is a really fast method, with no rounding errors (please 00919 refer to CERN 72-21 pp. 54-106). 00920 00921 <P> 00922 The data is stored internally in a <TT>TVectorD</TT>, in the following 00923 way: 00924 <BR><P></P> 00925 <DIV ALIGN="CENTER"> 00926 00927 <!-- MATH 00928 \begin{displaymath} 00929 \mathbf{x} = \left[\left(x_{0_0},\ldots,x_{{P-1}_0}\right),\ldots, 00930 \left(x_{0_i},\ldots,x_{{P-1}_i}\right), \ldots\right] 00931 \end{displaymath} 00932 --> 00933 00934 00935 <IMG 00936 WIDTH="319" HEIGHT="31" BORDER="0" 00937 SRC="gif/principal_img40.gif" 00938 ALT="\begin{displaymath} 00939 \mathbf{x} = \left[\left(x_{0_0},\ldots,x_{{P-1}_0}\right),\ldots, 00940 \left(x_{0_i},\ldots,x_{{P-1}_i}\right), \ldots\right] 00941 \end{displaymath}"> 00942 </DIV> 00943 <BR CLEAR="ALL"> 00944 <P></P> 00945 With <IMG 00946 WIDTH="18" HEIGHT="15" ALIGN="BOTTOM" BORDER="0" 00947 SRC="gif/principal_img6.gif" 00948 ALT="$P$"> as defined in the class description. 00949 <PRE> 00950 */ 00951 // End_Html 00952 if (!p) 00953 return; 00954 00955 // Increment the data point counter 00956 Int_t i,j; 00957 if (++fNumberOfDataPoints == 1) { 00958 for (i = 0; i < fNumberOfVariables; i++) 00959 fMeanValues(i) = p[i]; 00960 } 00961 else { 00962 00963 Double_t cor = 1 - 1./Double_t(fNumberOfDataPoints); 00964 for (i = 0; i < fNumberOfVariables; i++) { 00965 00966 fMeanValues(i) *= cor; 00967 fMeanValues(i) += p[i] / Double_t(fNumberOfDataPoints); 00968 Double_t t1 = (p[i] - fMeanValues(i)) / (fNumberOfDataPoints - 1); 00969 00970 // Setting Matrix (lower triangle) elements 00971 for (j = 0; j < i + 1; j++) { 00972 fCovarianceMatrix(i,j) *= cor; 00973 fCovarianceMatrix(i,j) += t1 * (p[j] - fMeanValues(j)); 00974 } 00975 } 00976 } 00977 00978 // Store data point in internal vector 00979 // If the vector isn't big enough to hold the new data, then 00980 // expand the vector by half it's size. 00981 if (!fStoreData) 00982 return; 00983 Int_t size = fUserData.GetNrows(); 00984 if (fNumberOfDataPoints * fNumberOfVariables > size) 00985 fUserData.ResizeTo(size + size/2); 00986 00987 for (i = 0; i < fNumberOfVariables; i++) { 00988 j = (fNumberOfDataPoints-1) * fNumberOfVariables + i; 00989 fUserData(j) = p[i]; 00990 } 00991 00992 } 00993 00994 //____________________________________________________________________ 00995 void TPrincipal::Browse(TBrowser *b) 00996 { 00997 // Browse the TPrincipal object in the TBrowser. 00998 if (fHistograms) { 00999 TIter next(fHistograms); 01000 TH1* h = 0; 01001 while ((h = (TH1*)next())) 01002 b->Add(h,h->GetName()); 01003 } 01004 01005 if (fStoreData) 01006 b->Add(&fUserData,"User Data"); 01007 b->Add(&fCovarianceMatrix,"Covariance Matrix"); 01008 b->Add(&fMeanValues,"Mean value vector"); 01009 b->Add(&fSigmas,"Sigma value vector"); 01010 b->Add(&fEigenValues,"Eigenvalue vector"); 01011 b->Add(&fEigenVectors,"Eigenvector Matrix"); 01012 01013 } 01014 01015 //____________________________________________________________________ 01016 void TPrincipal::Clear(Option_t *opt) 01017 { 01018 // Clear the data in Object. Notice, that's not possible to change 01019 // the dimension of the original data. 01020 if (fHistograms) { 01021 fHistograms->Delete(opt); 01022 } 01023 01024 fNumberOfDataPoints = 0; 01025 fTrace = 0; 01026 fCovarianceMatrix.Zero(); 01027 fEigenVectors.Zero(); 01028 fEigenValues.Zero(); 01029 fMeanValues.Zero(); 01030 fSigmas.Zero(); 01031 fOffDiagonal.Zero(); 01032 01033 if (fStoreData) { 01034 fUserData.ResizeTo(fNumberOfVariables * 1000); 01035 fUserData.Zero(); 01036 } 01037 } 01038 01039 //____________________________________________________________________ 01040 const Double_t *TPrincipal::GetRow(Int_t row) 01041 { 01042 // Return a row of the user supplied data. 01043 // If row is out of bounds, 0 is returned. 01044 // It's up to the user to delete the returned array. 01045 // Row 0 is the first row; 01046 if (row >= fNumberOfDataPoints) 01047 return 0; 01048 01049 if (!fStoreData) 01050 return 0; 01051 01052 Int_t index = row * fNumberOfVariables; 01053 return &fUserData(index); 01054 } 01055 01056 01057 //____________________________________________________________________ 01058 void TPrincipal::MakeCode(const char *filename, Option_t *opt) 01059 { 01060 // Generates the file <filename>, with .C appended if it does 01061 // argument doesn't end in .cxx or .C. 01062 // 01063 // The file contains the implementation of two functions 01064 // 01065 // void X2P(Double_t *x, Double *p) 01066 // void P2X(Double_t *p, Double *x, Int_t nTest) 01067 // 01068 // which does the same as TPrincipal::X2P and TPrincipal::P2X 01069 // respectively. Please refer to these methods. 01070 // 01071 // Further, the static variables: 01072 // 01073 // Int_t gNVariables 01074 // Double_t gEigenValues[] 01075 // Double_t gEigenVectors[] 01076 // Double_t gMeanValues[] 01077 // Double_t gSigmaValues[] 01078 // 01079 // are initialized. The only ROOT header file needed is Rtypes.h 01080 // 01081 // See TPrincipal::MakeRealCode for a list of options 01082 01083 TString outName(filename); 01084 if (!outName.EndsWith(".C") && !outName.EndsWith(".cxx")) 01085 outName += ".C"; 01086 01087 MakeRealCode(outName.Data(),"",opt); 01088 } 01089 01090 //____________________________________________________________________ 01091 void TPrincipal::MakeHistograms(const char *name, Option_t *opt) 01092 { 01093 // Make histograms of the result of the analysis. 01094 // The option string say which histograms to create 01095 // X Histogram original data 01096 // P Histogram principal components corresponding to 01097 // original data 01098 // D Histogram the difference between the original data 01099 // and the projection of principal unto a lower 01100 // dimensional subspace (2D histograms) 01101 // E Histogram the eigenvalues 01102 // S Histogram the square of the residues 01103 // (see TPrincipal::SumOfSquareResidues) 01104 // The histograms will be named <name>_<type><number>, where <name> 01105 // is the first argument, <type> is one of X,P,D,E,S, and <number> 01106 // is the variable. 01107 Bool_t makeX = kFALSE; 01108 Bool_t makeD = kFALSE; 01109 Bool_t makeP = kFALSE; 01110 Bool_t makeE = kFALSE; 01111 Bool_t makeS = kFALSE; 01112 01113 Int_t len = strlen(opt); 01114 Int_t i,j,k; 01115 for (i = 0; i < len; i++) { 01116 switch (opt[i]) { 01117 case 'X': 01118 case 'x': 01119 if (fStoreData) 01120 makeX = kTRUE; 01121 break; 01122 case 'd': 01123 case 'D': 01124 if (fStoreData) 01125 makeD = kTRUE; 01126 break; 01127 case 'P': 01128 case 'p': 01129 if (fStoreData) 01130 makeP = kTRUE; 01131 break; 01132 case 'E': 01133 case 'e': 01134 makeE = kTRUE; 01135 break; 01136 case 's': 01137 case 'S': 01138 if (fStoreData) 01139 makeS = kTRUE; 01140 break; 01141 default: 01142 Warning("MakeHistograms","Unknown option: %c",opt[i]); 01143 } 01144 } 01145 01146 // If no option was given, then exit gracefully 01147 if (!makeX && !makeD && !makeP && !makeE && !makeS) 01148 return; 01149 01150 // If the list of histograms doesn't exist, create it. 01151 if (!fHistograms) 01152 fHistograms = new TList; 01153 01154 // Don't create the histograms if they are already in the TList. 01155 if (makeX && fHistograms->FindObject(Form("%s_x000",name))) 01156 makeX = kFALSE; 01157 if (makeD && fHistograms->FindObject(Form("%s_d000",name))) 01158 makeD = kFALSE; 01159 if (makeP && fHistograms->FindObject(Form("%s_p000",name))) 01160 makeP = kFALSE; 01161 if (makeE && fHistograms->FindObject(Form("%s_e",name))) 01162 makeE = kFALSE; 01163 if (makeS && fHistograms->FindObject(Form("%s_s",name))) 01164 makeS = kFALSE; 01165 01166 TH1F **hX = 0; 01167 TH2F **hD = 0; 01168 TH1F **hP = 0; 01169 TH1F *hE = 0; 01170 TH1F *hS = 0; 01171 01172 // Initialize the arrays of histograms needed 01173 if (makeX) 01174 hX = new TH1F * [fNumberOfVariables]; 01175 01176 if (makeD) 01177 hD = new TH2F * [fNumberOfVariables]; 01178 01179 if (makeP) 01180 hP = new TH1F * [fNumberOfVariables]; 01181 01182 if (makeE){ 01183 hE = new TH1F(Form("%s_e",name), "Eigenvalues of Covariance matrix", 01184 fNumberOfVariables,0,fNumberOfVariables); 01185 hE->SetXTitle("Eigenvalue"); 01186 fHistograms->Add(hE); 01187 } 01188 01189 if (makeS) { 01190 hS = new TH1F(Form("%s_s",name),"E_{N}", 01191 fNumberOfVariables-1,1,fNumberOfVariables); 01192 hS->SetXTitle("N"); 01193 hS->SetYTitle("#sum_{i=1}^{M} (x_{i} - x'_{N,i})^{2}"); 01194 fHistograms->Add(hS); 01195 } 01196 01197 // Initialize sub elements of the histogram arrays 01198 for (i = 0; i < fNumberOfVariables; i++) { 01199 if (makeX) { 01200 // We allow 4 sigma spread in the original data in our 01201 // histogram. 01202 Double_t xlowb = fMeanValues(i) - 4 * fSigmas(i); 01203 Double_t xhighb = fMeanValues(i) + 4 * fSigmas(i); 01204 Int_t xbins = fNumberOfDataPoints/100; 01205 hX[i] = new TH1F(Form("%s_x%03d", name, i), 01206 Form("Pattern space, variable %d", i), 01207 xbins,xlowb,xhighb); 01208 hX[i]->SetXTitle(Form("x_{%d}",i)); 01209 fHistograms->Add(hX[i]); 01210 } 01211 01212 if(makeD) { 01213 // The upper limit below is arbitrary!!! 01214 Double_t dlowb = 0; 01215 Double_t dhighb = 20; 01216 Int_t dbins = fNumberOfDataPoints/100; 01217 hD[i] = new TH2F(Form("%s_d%03d", name, i), 01218 Form("Distance from pattern to " 01219 "feature space, variable %d", i), 01220 dbins,dlowb,dhighb, 01221 fNumberOfVariables-1, 01222 1, 01223 fNumberOfVariables); 01224 hD[i]->SetXTitle(Form("|x_{%d} - x'_{%d,N}|/#sigma_{%d}",i,i,i)); 01225 hD[i]->SetYTitle("N"); 01226 fHistograms->Add(hD[i]); 01227 } 01228 01229 if(makeP) { 01230 // For some reason, the trace of the none-scaled matrix 01231 // (see TPrincipal::MakeNormalised) should enter here. Taken 01232 // from LINTRA code. 01233 Double_t et = TMath::Abs(fEigenValues(i) * fTrace); 01234 Double_t plowb = -10 * TMath::Sqrt(et); 01235 Double_t phighb = -plowb; 01236 Int_t pbins = 100; 01237 hP[i] = new TH1F(Form("%s_p%03d", name, i), 01238 Form("Feature space, variable %d", i), 01239 pbins,plowb,phighb); 01240 hP[i]->SetXTitle(Form("p_{%d}",i)); 01241 fHistograms->Add(hP[i]); 01242 } 01243 01244 if (makeE) 01245 // The Eigenvector histogram is easy 01246 hE->Fill(i,fEigenValues(i)); 01247 01248 } 01249 if (!makeX && !makeP && !makeD && !makeS) 01250 return; 01251 01252 Double_t *x = 0; 01253 Double_t *p = new Double_t[fNumberOfVariables]; 01254 Double_t *d = new Double_t[fNumberOfVariables]; 01255 for (i = 0; i < fNumberOfDataPoints; i++) { 01256 01257 // Zero arrays 01258 for (j = 0; j < fNumberOfVariables; j++) 01259 p[j] = d[j] = 0; 01260 01261 // update the original data histogram 01262 x = (Double_t*)(GetRow(i)); 01263 01264 if (makeP||makeD||makeS) 01265 // calculate the corresponding principal component 01266 X2P(x,p); 01267 01268 if (makeD || makeS) { 01269 // Calculate the difference between the original data, and the 01270 // same project onto principal components, and then to a lower 01271 // dimensional sub-space 01272 for (j = fNumberOfVariables; j > 0; j--) { 01273 P2X(p,d,j); 01274 01275 for (k = 0; k < fNumberOfVariables; k++) { 01276 // We use the absolute value of the difference! 01277 d[k] = x[k] - d[k]; 01278 01279 if (makeS) 01280 hS->Fill(j,d[k]*d[k]); 01281 01282 if (makeD) { 01283 d[k] = TMath::Abs(d[k]) / (fIsNormalised ? fSigmas(k) : 1); 01284 (hD[k])->Fill(d[k],j); 01285 } 01286 } 01287 } 01288 } 01289 01290 if (makeX||makeP) { 01291 // If we are asked to make any of these histograms, we have to 01292 // go here 01293 for (j = 0; j < fNumberOfVariables; j++) { 01294 if (makeX) 01295 (hX[j])->Fill(x[j]); 01296 01297 if (makeP) 01298 (hP[j])->Fill(p[j]); 01299 } 01300 } 01301 } 01302 // Clean up 01303 if (hX) 01304 delete [] hX; 01305 if (hD) 01306 delete [] hD; 01307 if (hP) 01308 delete [] hP; 01309 if (d) 01310 delete [] d; 01311 if (p) 01312 delete [] p; 01313 01314 // Normalize the residues 01315 if (makeS) 01316 hS->Scale(Double_t(1.)/fNumberOfDataPoints); 01317 } 01318 01319 //____________________________________________________________________ 01320 void TPrincipal::MakeNormalised() 01321 { 01322 // PRIVATE METHOD: Normalize the covariance matrix 01323 01324 Int_t i,j; 01325 for (i = 0; i < fNumberOfVariables; i++) { 01326 fSigmas(i) = TMath::Sqrt(fCovarianceMatrix(i,i)); 01327 if (fIsNormalised) 01328 for (j = 0; j <= i; j++) 01329 fCovarianceMatrix(i,j) /= (fSigmas(i) * fSigmas(j)); 01330 01331 fTrace += fCovarianceMatrix(i,i); 01332 } 01333 01334 // Fill remaining parts of matrix, and scale. 01335 for (i = 0; i < fNumberOfVariables; i++) 01336 for (j = 0; j <= i; j++) { 01337 fCovarianceMatrix(i,j) /= fTrace; 01338 fCovarianceMatrix(j,i) = fCovarianceMatrix(i,j); 01339 } 01340 01341 } 01342 01343 //____________________________________________________________________ 01344 void TPrincipal::MakeMethods(const char *classname, Option_t *opt) 01345 { 01346 // Generate the file <classname>PCA.cxx which contains the 01347 // implementation of two methods: 01348 // 01349 // void <classname>::X2P(Double_t *x, Double *p) 01350 // void <classname>::P2X(Double_t *p, Double *x, Int_t nTest) 01351 // 01352 // which does the same as TPrincipal::X2P and TPrincipal::P2X 01353 // respectivly. Please refer to these methods. 01354 // 01355 // Further, the public static members: 01356 // 01357 // Int_t <classname>::fgNVariables 01358 // Double_t <classname>::fgEigenValues[] 01359 // Double_t <classname>::fgEigenVectors[] 01360 // Double_t <classname>::fgMeanValues[] 01361 // Double_t <classname>::fgSigmaValues[] 01362 // 01363 // are initialized, and assumed to exist. The class declaration is 01364 // assumed to be in <classname>.h and assumed to be provided by the 01365 // user. 01366 // 01367 // See TPrincipal::MakeRealCode for a list of options 01368 // 01369 // The minimal class definition is: 01370 // 01371 // class <classname> { 01372 // public: 01373 // static Int_t fgNVariables; 01374 // static Double_t fgEigenVectors[]; 01375 // static Double_t fgEigenValues[]; 01376 // static Double_t fgMeanValues[]; 01377 // static Double_t fgSigmaValues[]; 01378 // 01379 // void X2P(Double_t *x, Double_t *p); 01380 // void P2X(Double_t *p, Double_t *x, Int_t nTest); 01381 // }; 01382 // 01383 // Whether the methods <classname>::X2P and <classname>::P2X should 01384 // be static or not, is up to the user. 01385 01386 01387 MakeRealCode(Form("%sPCA.cxx", classname), classname, opt); 01388 } 01389 01390 01391 //____________________________________________________________________ 01392 void TPrincipal::MakePrincipals() 01393 { 01394 // Perform the principal components analysis. 01395 // This is done in several stages in the TMatrix::EigenVectors method: 01396 // * Transform the covariance matrix into a tridiagonal matrix. 01397 // * Find the eigenvalues and vectors of the tridiagonal matrix. 01398 01399 // Normalize covariance matrix 01400 MakeNormalised(); 01401 01402 TMatrixDSym sym; sym.Use(fCovarianceMatrix.GetNrows(),fCovarianceMatrix.GetMatrixArray()); 01403 TMatrixDSymEigen eigen(sym); 01404 fEigenVectors = eigen.GetEigenVectors(); 01405 fEigenValues = eigen.GetEigenValues(); 01406 //make sure that eigenvalues are positive 01407 for (Int_t i = 0; i < fNumberOfVariables; i++) { 01408 if (fEigenValues[i] < 0) fEigenValues[i] = -fEigenValues[i]; 01409 } 01410 } 01411 01412 //____________________________________________________________________ 01413 void TPrincipal::MakeRealCode(const char *filename, const char *classname, 01414 Option_t *) 01415 { 01416 // PRIVATE METHOD: 01417 // This is the method that actually generates the code for the 01418 // transformations to and from feature space and pattern space 01419 // It's called by TPrincipal::MakeCode and TPrincipal::MakeMethods. 01420 // 01421 // The options are: NONE so far 01422 01423 Bool_t isMethod = (classname[0] == '\0' ? kFALSE : kTRUE); 01424 const char *prefix = (isMethod ? Form("%s::", classname) : ""); 01425 const char *cv_qual = (isMethod ? "" : "static "); 01426 01427 ofstream outFile(filename,ios::out|ios::trunc); 01428 if (!outFile) { 01429 Error("MakeRealCode","couldn't open output file '%s'",filename); 01430 return; 01431 } 01432 01433 cout << "Writing on file \"" << filename << "\" ... " << flush; 01434 // 01435 // Write header of file 01436 // 01437 // Emacs mode line ;-) 01438 outFile << "// -*- mode: c++ -*-" << endl; 01439 // Info about creator 01440 outFile << "// " << endl 01441 << "// File " << filename 01442 << " generated by TPrincipal::MakeCode" << endl; 01443 // Time stamp 01444 TDatime date; 01445 outFile << "// on " << date.AsString() << endl; 01446 // ROOT version info 01447 outFile << "// ROOT version " << gROOT->GetVersion() 01448 << endl << "//" << endl; 01449 // General information on the code 01450 outFile << "// This file contains the functions " << endl 01451 << "//" << endl 01452 << "// void " << prefix 01453 << "X2P(Double_t *x, Double_t *p); " << endl 01454 << "// void " << prefix 01455 << "P2X(Double_t *p, Double_t *x, Int_t nTest);" 01456 << endl << "//" << endl 01457 << "// The first for transforming original data x in " << endl 01458 << "// pattern space, to principal components p in " << endl 01459 << "// feature space. The second function is for the" << endl 01460 << "// inverse transformation, but using only nTest" << endl 01461 << "// of the principal components in the expansion" << endl 01462 << "// " << endl 01463 << "// See TPrincipal class documentation for more " 01464 << "information " << endl << "// " << endl; 01465 // Header files 01466 outFile << "#ifndef __CINT__" << endl; 01467 if (isMethod) 01468 // If these are methods, we need the class header 01469 outFile << "#include \"" << classname << ".h\"" << endl; 01470 else 01471 // otherwise, we need the typedefs of Int_t and Double_t 01472 outFile << "#include <Rtypes.h> // needed for Double_t etc" << endl; 01473 // Finish the preprocessor block 01474 outFile << "#endif" << endl << endl; 01475 01476 // 01477 // Now for the data 01478 // 01479 // We make the Eigenvector matrix, Eigenvalue vector, Sigma vector, 01480 // and Mean value vector static, since all are needed in both 01481 // functions. Also ,the number of variables are stored in a static 01482 // variable. 01483 outFile << "//" << endl 01484 << "// Static data variables" << endl 01485 << "//" << endl; 01486 outFile << cv_qual << "Int_t " << prefix << "gNVariables = " 01487 << fNumberOfVariables << ";" << endl; 01488 01489 // Assign the values to the Eigenvector matrix. The elements are 01490 // stored row-wise, that is element 01491 // M[i][j] = e[i * nVariables + j] 01492 // where i and j are zero-based. 01493 outFile << endl << "// Assignment of eigenvector matrix." << endl 01494 << "// Elements are stored row-wise, that is" << endl 01495 << "// M[i][j] = e[i * nVariables + j] " << endl 01496 << "// where i and j are zero-based" << endl; 01497 outFile << cv_qual << "Double_t " << prefix 01498 << "gEigenVectors[] = {" << flush; 01499 Int_t i,j; 01500 for (i = 0; i < fNumberOfVariables; i++) { 01501 for (j = 0; j < fNumberOfVariables; j++) { 01502 Int_t index = i * fNumberOfVariables + j; 01503 outFile << (index != 0 ? "," : "" ) << endl 01504 << " " << fEigenVectors(i,j) << flush; 01505 } 01506 } 01507 outFile << "};" << endl << endl; 01508 01509 // Assignment to eigenvalue vector. Zero-based. 01510 outFile << "// Assignment to eigen value vector. Zero-based." << endl; 01511 outFile << cv_qual << "Double_t " << prefix 01512 << "gEigenValues[] = {" << flush; 01513 for (i = 0; i < fNumberOfVariables; i++) 01514 outFile << (i != 0 ? "," : "") << endl 01515 << " " << fEigenValues(i) << flush; 01516 outFile << endl << "};" << endl << endl; 01517 01518 // Assignment to mean Values vector. Zero-based. 01519 outFile << "// Assignment to mean value vector. Zero-based." << endl; 01520 outFile << cv_qual << "Double_t " << prefix 01521 << "gMeanValues[] = {" << flush; 01522 for (i = 0; i < fNumberOfVariables; i++) 01523 outFile << (i != 0 ? "," : "") << endl 01524 << " " << fMeanValues(i) << flush; 01525 outFile << endl << "};" << endl << endl; 01526 01527 // Assignment to mean Values vector. Zero-based. 01528 outFile << "// Assignment to sigma value vector. Zero-based." << endl; 01529 outFile << cv_qual << "Double_t " << prefix 01530 << "gSigmaValues[] = {" << flush; 01531 for (i = 0; i < fNumberOfVariables; i++) 01532 outFile << (i != 0 ? "," : "") << endl 01533 << " " << (fIsNormalised ? fSigmas(i) : 1) << flush; 01534 // << " " << fSigmas(i) << flush; 01535 outFile << endl << "};" << endl << endl; 01536 01537 // 01538 // Finally we reach the functions themselves 01539 // 01540 // First: void x2p(Double_t *x, Double_t *p); 01541 // 01542 outFile << "// " << endl 01543 << "// The " 01544 << (isMethod ? "method " : "function ") 01545 << " void " << prefix 01546 << "X2P(Double_t *x, Double_t *p)" 01547 << endl << "// " << endl; 01548 outFile << "void " << prefix 01549 << "X2P(Double_t *x, Double_t *p) {" << endl 01550 << " for (Int_t i = 0; i < gNVariables; i++) {" << endl 01551 << " p[i] = 0;" << endl 01552 << " for (Int_t j = 0; j < gNVariables; j++)" << endl 01553 << " p[i] += (x[j] - gMeanValues[j]) " << endl 01554 << " * gEigenVectors[j * gNVariables + i] " 01555 << "/ gSigmaValues[j];" << endl << endl << " }" 01556 << endl << "}" << endl << endl; 01557 // 01558 // Now: void p2x(Double_t *p, Double_t *x, Int_t nTest); 01559 // 01560 outFile << "// " << endl << "// The " 01561 << (isMethod ? "method " : "function ") 01562 << " void " << prefix 01563 << "P2X(Double_t *p, Double_t *x, Int_t nTest)" 01564 << endl << "// " << endl; 01565 outFile << "void " << prefix 01566 << "P2X(Double_t *p, Double_t *x, Int_t nTest) {" << endl 01567 << " for (Int_t i = 0; i < gNVariables; i++) {" << endl 01568 << " x[i] = gMeanValues[i];" << endl 01569 << " for (Int_t j = 0; j < nTest; j++)" << endl 01570 << " x[i] += p[j] * gSigmaValues[i] " << endl 01571 << " * gEigenVectors[i * gNVariables + j];" << endl 01572 << " }" << endl << "}" << endl << endl; 01573 01574 // EOF 01575 outFile << "// EOF for " << filename << endl; 01576 01577 // Close the file 01578 outFile.close(); 01579 01580 cout << "done" << endl; 01581 } 01582 01583 //____________________________________________________________________ 01584 void TPrincipal::P2X(const Double_t *p, Double_t *x, Int_t nTest) 01585 { 01586 // Calculate x as a function of nTest of the most significant 01587 // principal components p, and return it in x. 01588 // It's the users responsibility to make sure that both x and p are 01589 // of the right size (i.e., memory must be allocated for x). 01590 01591 for (Int_t i = 0; i < fNumberOfVariables; i++){ 01592 x[i] = fMeanValues(i); 01593 for (Int_t j = 0; j < nTest; j++) 01594 x[i] += p[j] * (fIsNormalised ? fSigmas(i) : 1) 01595 * fEigenVectors(i,j); 01596 } 01597 01598 } 01599 01600 //____________________________________________________________________ 01601 void TPrincipal::Print(Option_t *opt) const 01602 { 01603 // Print the statistics 01604 // Options are 01605 // M Print mean values of original data 01606 // S Print sigma values of original data 01607 // E Print eigenvalues of covariance matrix 01608 // V Print eigenvectors of covariance matrix 01609 // Default is MSE 01610 01611 Bool_t printV = kFALSE; 01612 Bool_t printM = kFALSE; 01613 Bool_t printS = kFALSE; 01614 Bool_t printE = kFALSE; 01615 01616 Int_t len = strlen(opt); 01617 for (Int_t i = 0; i < len; i++) { 01618 switch (opt[i]) { 01619 case 'V': 01620 case 'v': 01621 printV = kTRUE; 01622 break; 01623 case 'M': 01624 case 'm': 01625 printM = kTRUE; 01626 break; 01627 case 'S': 01628 case 's': 01629 printS = kTRUE; 01630 break; 01631 case 'E': 01632 case 'e': 01633 printE = kTRUE; 01634 break; 01635 default: 01636 Warning("Print", "Unknown option '%c'",opt[i]); 01637 break; 01638 } 01639 } 01640 01641 if (printM||printS||printE) { 01642 cout << " Variable # " << flush; 01643 if (printM) 01644 cout << "| Mean Value " << flush; 01645 if (printS) 01646 cout << "| Sigma " << flush; 01647 if (printE) 01648 cout << "| Eigenvalue" << flush; 01649 cout << endl; 01650 01651 cout << "-------------" << flush; 01652 if (printM) 01653 cout << "+------------" << flush; 01654 if (printS) 01655 cout << "+------------" << flush; 01656 if (printE) 01657 cout << "+------------" << flush; 01658 cout << endl; 01659 01660 for (Int_t i = 0; i < fNumberOfVariables; i++) { 01661 cout << setw(12) << i << " " << flush; 01662 if (printM) 01663 cout << "| " << setw(10) << setprecision(4) 01664 << fMeanValues(i) << " " << flush; 01665 if (printS) 01666 cout << "| " << setw(10) << setprecision(4) 01667 << fSigmas(i) << " " << flush; 01668 if (printE) 01669 cout << "| " << setw(10) << setprecision(4) 01670 << fEigenValues(i) << " " << flush; 01671 cout << endl; 01672 } 01673 cout << endl; 01674 } 01675 01676 if(printV) { 01677 for (Int_t i = 0; i < fNumberOfVariables; i++) { 01678 cout << "Eigenvector # " << i << flush; 01679 TVectorD v(fNumberOfVariables); 01680 v = TMatrixDColumn_const(fEigenVectors,i); 01681 v.Print(); 01682 } 01683 } 01684 } 01685 01686 //____________________________________________________________________ 01687 void TPrincipal::SumOfSquareResiduals(const Double_t *x, Double_t *s) 01688 { 01689 // PRIVATE METHOD: 01690 // Begin_html 01691 /* 01692 </PRE> 01693 Calculates the sum of the square residuals, that is 01694 <BR><P></P> 01695 <DIV ALIGN="CENTER"> 01696 01697 <!-- MATH 01698 \begin{displaymath} 01699 E_N = \sum_{i=0}^{P-1} \left(x_i - x^\prime_i\right)^2 01700 \end{displaymath} 01701 --> 01702 01703 01704 <IMG 01705 WIDTH="147" HEIGHT="58" BORDER="0" 01706 SRC="gif/principal_img52.gif" 01707 ALT="\begin{displaymath} 01708 E_N = \sum_{i=0}^{P-1} \left(x_i - x^\prime_i\right)^2 01709 \end{displaymath}"> 01710 </DIV> 01711 <BR CLEAR="ALL"> 01712 <P></P> 01713 where 01714 <!-- MATH 01715 $x^\prime_i = \sum_{j=i}^N p_i e_{n_j}$ 01716 --> 01717 <IMG 01718 WIDTH="122" HEIGHT="40" ALIGN="MIDDLE" BORDER="0" 01719 SRC="gif/principal_img53.gif" 01720 ALT="$x^\prime_i = \sum_{j=i}^N p_i e_{n_j}$">, <IMG 01721 WIDTH="19" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 01722 SRC="gif/principal_img54.gif" 01723 ALT="$p_i$"> is the 01724 <IMG 01725 WIDTH="28" HEIGHT="23" ALIGN="BOTTOM" BORDER="0" 01726 SRC="gif/principal_img55.gif" 01727 ALT="$i^{\mbox{th}}$"> component of the principal vector, corresponding to 01728 <IMG 01729 WIDTH="20" HEIGHT="30" ALIGN="MIDDLE" BORDER="0" 01730 SRC="gif/principal_img56.gif" 01731 ALT="$x_i$">, the original data; I.e., the square distance to the space 01732 spanned by <IMG 01733 WIDTH="20" HEIGHT="15" ALIGN="BOTTOM" BORDER="0" 01734 SRC="gif/principal_img12.gif" 01735 ALT="$N$"> eigenvectors. 01736 <BR> 01737 <PRE> 01738 */ 01739 // End_Html 01740 if (!x) 01741 return; 01742 01743 Double_t p[100]; 01744 Double_t xp[100]; 01745 01746 X2P(x,p); 01747 for (Int_t i = fNumberOfVariables-1; i >= 0; i--) { 01748 P2X(p,xp,i); 01749 for (Int_t j = 0; j < fNumberOfVariables; j++) { 01750 s[i] += (x[j] - xp[j])*(x[j] - xp[j]); 01751 } 01752 } 01753 } 01754 01755 //____________________________________________________________________ 01756 void TPrincipal::Test(Option_t *) 01757 { 01758 // Test the PCA, bye calculating the sum square of residuals 01759 // (see method SumOfSquareResiduals), and display the histogram 01760 MakeHistograms("pca","S"); 01761 01762 if (!fStoreData) 01763 return; 01764 01765 TH1 *pca_s = 0; 01766 if (fHistograms) pca_s = (TH1*)fHistograms->FindObject("pca_s"); 01767 if (!pca_s) { 01768 Warning("Test", "Couldn't get histogram of square residuals"); 01769 return; 01770 } 01771 01772 pca_s->Draw(); 01773 } 01774 01775 //____________________________________________________________________ 01776 void TPrincipal::X2P(const Double_t *x, Double_t *p) 01777 { 01778 // Calculate the principal components from the original data vector 01779 // x, and return it in p. 01780 // It's the users responsibility to make sure that both x and p are 01781 // of the right size (i.e., memory must be allocated for p). 01782 for (Int_t i = 0; i < fNumberOfVariables; i++){ 01783 p[i] = 0; 01784 for (Int_t j = 0; j < fNumberOfVariables; j++) 01785 p[i] += (x[j] - fMeanValues(j)) * fEigenVectors(j,i) / 01786 (fIsNormalised ? fSigmas(j) : 1); 01787 } 01788 01789 }