principal.C

Go to the documentation of this file.
00001 #include "TPrincipal.h"
00002 #include "iomanip.h"
00003    
00004 void principal(Int_t n=10, Int_t m=10000) 
00005 {
00006   // 
00007   // Principal Components Analysis (PCA) example
00008   // 
00009   // Example of using TPrincipal as a stand alone class. 
00010   // 
00011   // We create n-dimensional data points, where c = trunc(n / 5) + 1
00012   // are  correlated with the rest n - c randomly distributed variables. 
00013   //
00014   // Here's the plot of the eigenvalues Begin_Html
00015   // <IMG SRC="gif/principal_eigen.gif">
00016   // End_Html
00017   //Authors: Rene Brun, Christian Holm Christensen
00018     
00019   Int_t c = n / 5 + 1;
00020 
00021   cout << "*************************************************" << endl; 
00022   cout << "*         Principal Component Analysis          *" << endl;
00023   cout << "*                                               *" << endl;
00024   cout << "*  Number of variables:           " << setw(4) << n 
00025        << "          *" << endl;
00026   cout << "*  Number of data points:         " << setw(8) << m
00027        << "      *" << endl;
00028   cout << "*  Number of dependent variables: " << setw(4) << c
00029        << "          *" << endl;
00030   cout << "*                                               *" << endl;
00031   cout << "*************************************************" << endl; 
00032   
00033       
00034   // Initilase the TPrincipal object. Use the empty string for the
00035   // final argument, if you don't wan't the covariance
00036   // matrix. Normalising the covariance matrix is a good idea if your
00037   // variables have different orders of magnitude. 
00038   TPrincipal* principal = new TPrincipal(n,"ND");
00039   
00040   // Use a pseudo-random number generator
00041   TRandom* random = new TRandom;
00042   
00043   // Make the m data-points
00044   // Make a variable to hold our data
00045   // Allocate memory for the data point
00046   Double_t* data = new Double_t[n];
00047   for (Int_t i = 0; i < m; i++) {
00048 
00049     // First we create the un-correlated, random variables, according
00050     // to one of three distributions 
00051     for (Int_t j = 0; j < n - c; j++) {
00052       if (j % 3 == 0)
00053         data[j] = random->Gaus(5,1);
00054       else if (j % 3 == 1)
00055         data[j] = random->Poisson(8);
00056       else 
00057         data[j] = random->Exp(2);
00058     }
00059 
00060     // Then we create the correlated variables
00061     for (Int_t j = 0 ; j < c; j++) {
00062       data[n - c + j] = 0;
00063       for (Int_t k = 0; k < n - c - j; k++)
00064         data[n - c + j] += data[k];
00065     }
00066     
00067     // Finally we're ready to add this datapoint to the PCA
00068     principal->AddRow(data);
00069   }
00070     
00071   // We delete the data after use, since TPrincipal got it by now. 
00072   delete [] data;
00073   
00074   // Do the actual analysis
00075   principal->MakePrincipals();
00076   
00077   // Print out the result on
00078   principal->Print();
00079 
00080   // Test the PCA 
00081   principal->Test();
00082 
00083   // Make some histograms of the orginal, principal, residue, etc data 
00084   principal->MakeHistograms();
00085   
00086   // Make two functions to map between feature and pattern space 
00087   principal->MakeCode();
00088 
00089   // Start a browser, so that we may browse the histograms generated
00090   // above 
00091   TBrowser* b = new TBrowser("principalBrowser", principal);
00092   
00093 }

Generated on Tue Jul 5 15:44:50 2011 for ROOT_528-00b_version by  doxygen 1.5.1