TMVARegression.cxx

Go to the documentation of this file.
00001 // @(#)root/tmva $Id: TMVARegression.cxx 37399 2010-12-08 15:22:07Z evt $
00002 /**********************************************************************************
00003  * Project   : TMVA - a Root-integrated toolkit for multivariate data analysis    *
00004  * Package   : TMVA                                                               *
00005  * Exectuable: TMVARegression                                                     *
00006  *                                                                                *
00007  * This executable provides examples for the training and testing of the          *
00008  * TMVA classifiers.                                                              *
00009  *                                                                                *
00010  * As input data is used a toy-MC sample consisting of four Gaussian-distributed  *
00011  * and linearly correlated input variables.                                       *
00012  *                                                                                *
00013  * The methods to be used can be switched on and off by means of booleans.        *
00014  *                                                                                *
00015  * Compile and run the example with the following commands                        *
00016  *                                                                                *
00017  *    make                                                                        *
00018  *    ./TMVARegression <Methods>                                                  *
00019  *                                                                                *
00020  * where: <Methods> = "method1 method2"                                           *
00021  *        are the TMVA classifier names                                           *
00022  *                                                                                *
00023  * example:                                                                       *
00024  *    ./TMVARegression LD MLP                                                     *
00025  *                                                                                *
00026  * If no method given, a default set is used.                                     *
00027  *                                                                                *
00028  * The output file "TMVAReg.root" can be analysed with the use of dedicated       *
00029  * macros (simply say: root -l <../macros/macro.C>), which can be conveniently    *
00030  * invoked through a GUI launched by the command                                  *
00031  *                                                                                *
00032  *    root -l ../macros/TMVAGui.C                                                 *
00033  **********************************************************************************/
00034 
00035 #include <cstdlib>
00036 #include <iostream> 
00037 #include <map>
00038 #include <string>
00039 
00040 #include "TChain.h"
00041 #include "TFile.h"
00042 #include "TTree.h"
00043 #include "TString.h"
00044 #include "TObjString.h"
00045 #include "TSystem.h"
00046 #include "TROOT.h"
00047 
00048 #include "TMVA/Factory.h"
00049 #include "TMVA/Tools.h"
00050 
00051 int main( int argc, char** argv ) 
00052 {
00053    // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
00054    // if you use your private .rootrc, or run from a different directory, please copy the 
00055    // corresponding lines from .rootrc
00056 
00057    // methods to be processed can be given as an argument; use format:
00058    //
00059    // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
00060    //
00061 
00062    //---------------------------------------------------------------
00063    // Default MVA methods to be trained + tested
00064    std::map<std::string,int> Use;
00065 
00066    // --- Mutidimensional likelihood and Nearest-Neighbour methods
00067    Use["PDERS"]           = 0;
00068    Use["PDEFoam"]         = 1; 
00069    Use["KNN"]             = 1;
00070    // 
00071    // --- Linear Discriminant Analysis
00072    Use["LD"]                    = 1;
00073    // 
00074    // --- Function Discriminant analysis
00075    Use["FDA_GA"]          = 1;
00076    Use["FDA_MC"]          = 0;
00077    Use["FDA_MT"]          = 0;
00078    Use["FDA_GAMT"]        = 0;
00079    // 
00080    // --- Neural Network
00081    Use["MLP"]             = 1; 
00082    // 
00083    // --- Support Vector Machine 
00084    Use["SVM"]             = 0;
00085    // 
00086    // --- Boosted Decision Trees
00087    Use["BDT"]             = 1;
00088    Use["BDTG"]            = 0;
00089    // ---------------------------------------------------------------
00090 
00091    std::cout << std::endl;
00092    std::cout << "==> Start TMVARegression" << std::endl;
00093 
00094    // Select methods (don't look at this code - not of interest)
00095    if (argc>1) for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
00096    for (int i=1; i<argc; i++) {
00097       std::string regMethod(argv[i]);
00098       if (Use.find(regMethod) == Use.end()) {
00099          std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
00100          for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
00101          std::cout << std::endl;
00102          return 1;
00103       }
00104       Use[regMethod] = 1;
00105    }
00106 
00107    // --------------------------------------------------------------------------------------------------
00108 
00109    // --- Here the preparation phase begins
00110 
00111    // Create a new root output file
00112    TString outfileName( "TMVAReg.root" );
00113    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
00114 
00115    // Create the factory object. Later you can choose the methods
00116    // whose performance you'd like to investigate. The factory will
00117    // then run the performance analysis for you.
00118    //
00119    // The first argument is the base of the name of all the
00120    // weightfiles in the directory weight/ 
00121    //
00122    // The second argument is the output file for the training results
00123    // All TMVA output can be suppressed by removing the "!" (not) in 
00124    // front of the "Silent" argument in the option string
00125    TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, 
00126                                                "!V:!Silent:Color:DrawProgressBar" );
00127 
00128    // If you wish to modify default settings 
00129    // (please check "src/Config.h" to see all available global options)
00130    //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
00131    //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
00132 
00133    // Define the input variables that shall be used for the MVA training
00134    // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
00135    // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
00136    factory->AddVariable( "var1", "Variable 1", "units", 'F' );
00137    factory->AddVariable( "var2", "Variable 2", "units", 'F' );
00138 
00139    // You can add so-called "Spectator variables", which are not used in the MVA training, 
00140    // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
00141    // input variables, the response values of all trained MVAs, and the spectator variables
00142    factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
00143    factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );
00144 
00145    // Add the variable carrying the regression target
00146    factory->AddTarget( "fvalue" ); 
00147 
00148    // It is also possible to declare additional targets for multi-dimensional regression, ie:
00149    // -- factory->AddTarget( "fvalue2" );
00150    // BUT: this is currently ONLY implemented for MLP
00151 
00152    // Read training and test data (see TMVAClassification for reading ASCII files)
00153    // load the signal and background event samples from ROOT trees
00154    TFile *input(0);
00155    TString fname = "./tmva_reg_example.root";
00156    if (!gSystem->AccessPathName( fname )) 
00157       input = TFile::Open( fname ); // check if file in local directory exists
00158    else 
00159       input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server
00160    
00161    if (!input) {
00162       std::cout << "ERROR: could not open data file" << std::endl;
00163       exit(1);
00164    }
00165    std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;
00166 
00167    // --- Register the regression tree
00168 
00169    TTree *regTree = (TTree*)input->Get("TreeR");
00170 
00171    // global event weights per tree (see below for setting event-wise weights)
00172    Double_t regWeight  = 1.0;   
00173 
00174    // You can add an arbitrary number of regression trees
00175    factory->AddRegressionTree( regTree, regWeight );
00176 
00177    // This would set individual event weights (the variables defined in the 
00178    // expression need to exist in the original TTree)
00179    factory->SetWeightExpression( "var1", "Regression" );
00180 
00181    // Apply additional cuts on the signal and background samples (can be different)
00182    TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
00183 
00184    // tell the factory to use all remaining events in the trees after training for testing:
00185    factory->PrepareTrainingAndTestTree( mycut, 
00186                                         "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );
00187 
00188    // If no numbers of events are given, half of the events in the tree are used 
00189    // for training, and the other half for testing:
00190    //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  
00191 
00192    // ---- Book MVA methods
00193    //
00194    // please lookup the various method configuration options in the corresponding cxx files, eg:
00195    // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
00196    // it is possible to preset ranges in the option string in which the cut optimisation should be done:
00197    // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
00198 
00199    // PDE - RS method
00200    if (Use["PDERS"])
00201       factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
00202                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
00203    // And the options strings for the MinMax and RMS methods, respectively:
00204    //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
00205    //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   
00206 
00207    if (Use["PDEFoam"])
00208        factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
00209                             "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );
00210 
00211    // K-Nearest Neighbour classifier (KNN)
00212    if (Use["KNN"])
00213       factory->BookMethod( TMVA::Types::kKNN, "KNN", 
00214                            "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
00215 
00216    // Linear discriminant
00217    if (Use["LD"])
00218       factory->BookMethod( TMVA::Types::kLD, "LD", 
00219                            "!H:!V:VarTransform=None" );
00220 
00221         // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
00222    if (Use["FDA_MC"]) 
00223       factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
00224                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
00225    
00226    if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
00227       factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
00228                            "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );
00229 
00230    if (Use["FDA_MT"]) 
00231       factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
00232                            "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
00233 
00234    if (Use["FDA_GAMT"]) 
00235       factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
00236                            "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
00237 
00238    // Neural network (MLP)
00239    if (Use["MLP"])
00240       factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );
00241 
00242    // Support Vector Machine
00243    if (Use["SVM"])
00244       factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
00245 
00246    // Boosted Decision Trees
00247    if (Use["BDT"])
00248      factory->BookMethod( TMVA::Types::kBDT, "BDT",
00249                            "!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
00250 
00251    if (Use["BDTG"])
00252      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
00253                            "!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.5nCuts=20:MaxDepth=3:NNodesMax=15" );
00254    // --------------------------------------------------------------------------------------------------
00255 
00256    // ---- Now you can tell the factory to train, test, and evaluate the MVAs
00257 
00258    // Train MVAs using the set of training events
00259    factory->TrainAllMethods();
00260 
00261    // ---- Evaluate all MVAs using the set of test events
00262    factory->TestAllMethods();
00263 
00264    // ----- Evaluate and compare performance of all configured MVAs
00265    factory->EvaluateAllMethods();    
00266 
00267    // --------------------------------------------------------------
00268    
00269    // Save the output
00270    outputFile->Close();
00271 
00272    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
00273    std::cout << "==> TMVARegression is done!" << std::endl;      
00274 
00275    delete factory;
00276 
00277    std::cout << std::endl;
00278    std::cout << "==> Too view the results, launch the GUI: \"root -l TMVARegGui.C\"" << std::endl;
00279    std::cout << std::endl;
00280 }
00281 

Generated on Tue Jul 5 15:26:37 2011 for ROOT_528-00b_version by  doxygen 1.5.1