ROOT_528-00b_version: math/mlp/src/TMLPAnalyzer.cxx Source File

00001 // @(#)root/mlp:$Id: TMLPAnalyzer.cxx 35902 2010-09-30 10:37:36Z brun $
00002 // Author: Christophe.Delaere@cern.ch   25/04/04
00003 
00004 /*************************************************************************
00005  * Copyright (C) 1995-2003, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 ///////////////////////////////////////////////////////////////////////////
00013 //
00014 // TMLPAnalyzer
00015 //
00016 // This utility class contains a set of tests usefull when developing
00017 // a neural network.
00018 // It allows you to check for unneeded variables, and to control
00019 // the network structure.
00020 //
00021 ///////////////////////////////////////////////////////////////////////////
00022 
00023 #include "TROOT.h"
00024 #include "TSynapse.h"
00025 #include "TNeuron.h"
00026 #include "TMultiLayerPerceptron.h"
00027 #include "TMLPAnalyzer.h"
00028 #include "TTree.h"
00029 #include "TTreeFormula.h"
00030 #include "TEventList.h"
00031 #include "TH1D.h"
00032 #include "TProfile.h"
00033 #include "THStack.h"
00034 #include "TLegend.h"
00035 #include "TPad.h"
00036 #include "TCanvas.h"
00037 #include "TGaxis.h"
00038 #include "TRegexp.h"
00039 #include "TMath.h"
00040 #include "Riostream.h"
00041 #include <stdlib.h>
00042 
00043 ClassImp(TMLPAnalyzer)
00044 
00045 //______________________________________________________________________________
00046 TMLPAnalyzer::~TMLPAnalyzer()
00047 {
00048    // Destructor
00049    delete fAnalysisTree;
00050    delete fIOTree;
00051 }
00052 
00053 //______________________________________________________________________________
00054 Int_t TMLPAnalyzer::GetLayers()
00055 {
00056    // Returns the number of layers.
00057 
00058    TString fStructure = fNetwork->GetStructure();
00059    return fStructure.CountChar(':')+1;
00060 }
00061 
00062 //______________________________________________________________________________
00063 Int_t TMLPAnalyzer::GetNeurons(Int_t layer)
00064 {
00065    // Returns the number of neurons in given layer.
00066 
00067    if(layer==1) {
00068       TString fStructure = fNetwork->GetStructure();
00069       TString input      = TString(fStructure(0, fStructure.First(':')));
00070       return input.CountChar(',')+1;
00071    }
00072    else if(layer==GetLayers()) {
00073       TString fStructure = fNetwork->GetStructure();
00074       TString output = TString(fStructure(fStructure.Last(':') + 1,
00075                                fStructure.Length() - fStructure.Last(':')));
00076       return output.CountChar(',')+1;
00077    }
00078    else {
00079       Int_t cnt=1;
00080       TString fStructure = fNetwork->GetStructure();
00081       TString hidden = TString(fStructure(fStructure.First(':') + 1,
00082                                fStructure.Last(':') - fStructure.First(':') - 1));
00083       Int_t beg = 0;
00084       Int_t end = hidden.Index(":", beg + 1);
00085       Int_t num = 0;
00086       while (end != -1) {
00087          num = atoi(TString(hidden(beg, end - beg)).Data());
00088          cnt++;
00089          beg = end + 1;
00090          end = hidden.Index(":", beg + 1);
00091          if(layer==cnt) return num;
00092       }
00093       num = atoi(TString(hidden(beg, hidden.Length() - beg)).Data());
00094       cnt++;
00095       if(layer==cnt) return num;
00096    }
00097    return -1;
00098 }
00099 
00100 //______________________________________________________________________________
00101 TString TMLPAnalyzer::GetNeuronFormula(Int_t idx)
00102 {
00103    // Returns the formula used as input for neuron (idx) in
00104    // the first layer.
00105 
00106    TString fStructure = fNetwork->GetStructure();
00107    TString input      = TString(fStructure(0, fStructure.First(':')));
00108    Int_t beg = 0;
00109    Int_t end = input.Index(",", beg + 1);
00110    TString brName;
00111    Int_t cnt = 0;
00112    while (end != -1) {
00113       brName = TString(input(beg, end - beg));
00114       if (brName[0]=='@')
00115          brName = brName(1,brName.Length()-1);
00116       beg = end + 1;
00117       end = input.Index(",", beg + 1);
00118       if(cnt==idx) return brName;
00119       cnt++;
00120    }
00121    brName = TString(input(beg, input.Length() - beg));
00122    if (brName[0]=='@')
00123       brName = brName(1,brName.Length()-1);
00124    return brName;
00125 }
00126 
00127 //______________________________________________________________________________
00128 const char* TMLPAnalyzer::GetInputNeuronTitle(Int_t in)
00129 {
00130    // Returns the name of any neuron from the input layer
00131    TNeuron* neuron=(TNeuron*)fNetwork->fFirstLayer[in];
00132    return neuron ? neuron->GetName() : "NO SUCH NEURON";
00133 }
00134 
00135 //______________________________________________________________________________
00136 const char* TMLPAnalyzer::GetOutputNeuronTitle(Int_t out)
00137 {
00138    // Returns the name of any neuron from the output layer
00139    TNeuron* neuron=(TNeuron*)fNetwork->fLastLayer[out];
00140    return neuron ? neuron->GetName() : "NO SUCH NEURON";
00141 }
00142 
00143 //______________________________________________________________________________
00144 void TMLPAnalyzer::CheckNetwork()
00145 {
00146    // Gives some information about the network in the terminal.
00147 
00148    TString fStructure = fNetwork->GetStructure();
00149    cout << "Network with structure: " << fStructure.Data() << endl;
00150    cout << "inputs with low values in the differences plot may not be needed" << endl;
00151    // Checks if some input variable is not needed
00152    char var[64], sel[64];
00153    for (Int_t i = 0; i < GetNeurons(1); i++) {
00154       snprintf(var,64,"diff>>tmp%d",i);
00155       snprintf(sel,64,"inNeuron==%d",i);
00156       fAnalysisTree->Draw(var, sel, "goff");
00157       TH1F* tmp = (TH1F*)gDirectory->Get(Form("tmp%d",i));
00158       if (!tmp) continue;
00159       cout << GetInputNeuronTitle(i)
00160            << " -> " << tmp->GetMean()
00161            << " +/- " << tmp->GetRMS() << endl;
00162    }
00163 }
00164 
00165 //______________________________________________________________________________
00166 void TMLPAnalyzer::GatherInformations()
00167 {
00168    // Collect informations about what is usefull in the network.
00169    // This method has to be called first when analyzing a network.
00170    // Fills the two analysis trees.
00171 
00172    Double_t shift = 0.1;
00173    TTree* data = fNetwork->fData;
00174    TEventList* test = fNetwork->fTest;
00175    Int_t nEvents = test->GetN();
00176    Int_t nn = GetNeurons(1);
00177    Double_t* params = new Double_t[nn];
00178    Double_t* rms    = new Double_t[nn];
00179    TTreeFormula** formulas = new TTreeFormula*[nn];
00180    Int_t* index = new Int_t[nn];
00181    TString formula;
00182    TRegexp re("{[0-9]+}$");
00183    Ssiz_t len = formula.Length();
00184    Ssiz_t pos = -1;
00185    Int_t i(0), j(0), k(0), l(0);
00186    for(i=0; i<nn; i++){
00187       formula = GetNeuronFormula(i);
00188       pos = re.Index(formula,&len);
00189       if(pos==-1 || len<3) {
00190          formulas[i] = new TTreeFormula(Form("NF%lu",(ULong_t)this),formula,data);
00191          index[i] = 0;
00192       }
00193       else {
00194          TString newformula(formula,pos);
00195          TString val = formula(pos+1,len-2);
00196          formulas[i] = new TTreeFormula(Form("NF%lu",(ULong_t)this),newformula,data);
00197          formula = newformula;
00198          index[i] = val.Atoi();
00199       }
00200       TH1D tmp("tmpb", "tmpb", 1, -FLT_MAX, FLT_MAX);
00201       data->Draw(Form("%s>>tmpb",formula.Data()),"","goff");
00202       rms[i]  = tmp.GetRMS();
00203    }
00204    Int_t inNeuron = 0;
00205    Double_t diff = 0.;
00206    if(fAnalysisTree) delete fAnalysisTree;
00207    fAnalysisTree = new TTree("result","analysis");
00208    fAnalysisTree->SetDirectory(0);
00209    fAnalysisTree->Branch("inNeuron",&inNeuron,"inNeuron/I");
00210    fAnalysisTree->Branch("diff",&diff,"diff/D");
00211    Int_t numOutNodes=GetNeurons(GetLayers());
00212    Double_t *outVal=new Double_t[numOutNodes];
00213    Double_t *trueVal=new Double_t[numOutNodes];
00214 
00215    delete fIOTree;
00216    fIOTree=new TTree("MLP_iotree","MLP_iotree");
00217    fIOTree->SetDirectory(0);
00218    TString leaflist;
00219    for (i=0; i<nn; i++)
00220       leaflist+=Form("In%d/D:",i);
00221    leaflist.Remove(leaflist.Length()-1);
00222    fIOTree->Branch("In", params, leaflist);
00223 
00224    leaflist="";
00225    for (i=0; i<numOutNodes; i++)
00226       leaflist+=Form("Out%d/D:",i);
00227    leaflist.Remove(leaflist.Length()-1);
00228    fIOTree->Branch("Out", outVal, leaflist);
00229 
00230    leaflist="";
00231    for (i=0; i<numOutNodes; i++)
00232       leaflist+=Form("True%d/D:",i);
00233    leaflist.Remove(leaflist.Length()-1);
00234    fIOTree->Branch("True", trueVal, leaflist);
00235    Double_t v1 = 0.;
00236    Double_t v2 = 0.;
00237    // Loop on the events in the test sample
00238    for(j=0; j< nEvents; j++) {
00239       fNetwork->GetEntry(test->GetEntry(j));
00240       // Loop on the neurons to evaluate
00241       for(k=0; k<GetNeurons(1); k++) {
00242          params[k] = formulas[k]->EvalInstance(index[k]);
00243       }
00244       for(k=0; k<GetNeurons(GetLayers()); k++) {
00245          outVal[k] = fNetwork->Evaluate(k,params);
00246          trueVal[k] = ((TNeuron*)fNetwork->fLastLayer[k])->GetBranch();
00247       }
00248       fIOTree->Fill();
00249 
00250       // Loop on the input neurons
00251       for (i = 0; i < GetNeurons(1); i++) {
00252          inNeuron = i;
00253          diff = 0;
00254          // Loop on the neurons in the output layer
00255          for(l=0; l<GetNeurons(GetLayers()); l++){
00256             params[i] += shift*rms[i];
00257             v1 = fNetwork->Evaluate(l,params);
00258             params[i] -= 2*shift*rms[i];
00259             v2 = fNetwork->Evaluate(l,params);
00260             diff += (v1-v2)*(v1-v2);
00261             // reset to original vealue
00262             params[i] += shift*rms[i];
00263          }
00264          diff = TMath::Sqrt(diff);
00265          fAnalysisTree->Fill();
00266       }
00267    }
00268    delete[] params;
00269    delete[] rms;
00270    delete[] outVal;
00271    delete[] trueVal;
00272    delete[] index;
00273    for(i=0; i<GetNeurons(1); i++) delete formulas[i]; delete [] formulas;
00274    fAnalysisTree->ResetBranchAddresses();
00275    fIOTree->ResetBranchAddresses();
00276 }
00277 
00278 //______________________________________________________________________________
00279 void TMLPAnalyzer::DrawDInput(Int_t i)
00280 {
00281    // Draws the distribution (on the test sample) of the
00282    // impact on the network output of a small variation of
00283    // the ith input.
00284 
00285    char sel[64];
00286    snprintf(sel,64, "inNeuron==%d", i);
00287    fAnalysisTree->Draw("diff", sel);
00288 }
00289 
00290 //______________________________________________________________________________
00291 void TMLPAnalyzer::DrawDInputs()
00292 {
00293    // Draws the distribution (on the test sample) of the
00294    // impact on the network output of a small variation of
00295    // each input.
00296    // DrawDInputs() draws something that approximates the distribution of the
00297    // derivative of the NN w.r.t. each input. That quantity is recognized as
00298    // one of the measures to determine key quantities in the network.
00299    //
00300    // What is done is to vary one input around its nominal value and to see
00301    // how the NN changes. This is done for each entry in the sample and produces
00302    // a distribution.
00303    //
00304    // What you can learn from that is:
00305    // - is variable a really useful, or is my network insensitive to it ?
00306    // - is there any risk of big systematic ? Is the network extremely sensitive
00307    //   to small variations of any of my inputs ?
00308    //
00309    // As you might understand, this is to be considered with care and can serve
00310    // as input for an "educated guess" when optimizing the network.
00311 
00312    THStack* stack  = new THStack("differences","differences (impact of variables on ANN)");
00313    TLegend* legend = new TLegend(0.75,0.75,0.95,0.95);
00314    TH1F* tmp = 0;
00315    char var[64], sel[64];
00316    for(Int_t i = 0; i < GetNeurons(1); i++) {
00317       snprintf(var,64, "diff>>tmp%d", i);
00318       snprintf(sel,64, "inNeuron==%d", i);
00319       fAnalysisTree->Draw(var, sel, "goff");
00320       tmp = (TH1F*)gDirectory->Get(Form("tmp%d",i));
00321       tmp->SetDirectory(0);
00322       tmp->SetLineColor(i+1);
00323       stack->Add(tmp);
00324       legend->AddEntry(tmp,GetInputNeuronTitle(i),"l");
00325    }
00326    stack->Draw("nostack");
00327    legend->Draw();
00328    gPad->SetLogy();
00329 }
00330 
00331 //______________________________________________________________________________
00332 void TMLPAnalyzer::DrawNetwork(Int_t neuron, const char* signal, const char* bg)
00333 {
00334    // Draws the distribution of the neural network (using ith neuron).
00335    // Two distributions are drawn, for events passing respectively the "signal"
00336    // and "background" cuts. Only the test sample is used.
00337 
00338    TTree* data = fNetwork->fData;
00339    TEventList* test = fNetwork->fTest;
00340    TEventList* current = data->GetEventList();
00341    data->SetEventList(test);
00342    THStack* stack = new THStack("__NNout_TMLPA",Form("Neural net output (neuron %d)",neuron));
00343    TH1F *bgh  = new TH1F("__bgh_TMLPA", "NN output", 50, -0.5, 1.5);
00344    TH1F *sigh = new TH1F("__sigh_TMLPA", "NN output", 50, -0.5, 1.5);
00345    bgh->SetDirectory(0);
00346    sigh->SetDirectory(0);
00347    Int_t nEvents = 0;
00348    Int_t j=0;
00349    // build event lists for signal and background
00350    TEventList* signal_list = new TEventList("__tmpSig_MLPA");
00351    TEventList* bg_list     = new TEventList("__tmpBkg_MLPA");
00352    data->Draw(">>__tmpSig_MLPA",signal,"goff");
00353    data->Draw(">>__tmpBkg_MLPA",bg,"goff");
00354 
00355    // fill the background
00356    nEvents = bg_list->GetN();
00357    for(j=0; j< nEvents; j++) {
00358       bgh->Fill(fNetwork->Result(bg_list->GetEntry(j),neuron));
00359    }
00360    // fill the signal
00361    nEvents = signal_list->GetN();
00362    for(j=0; j< nEvents; j++) {
00363       sigh->Fill(fNetwork->Result(signal_list->GetEntry(j),neuron));
00364    }
00365    // draws the result
00366    bgh->SetLineColor(kBlue);
00367    bgh->SetFillStyle(3008);
00368    bgh->SetFillColor(kBlue);
00369    sigh->SetLineColor(kRed);
00370    sigh->SetFillStyle(3003);
00371    sigh->SetFillColor(kRed);
00372    bgh->SetStats(0);
00373    sigh->SetStats(0);
00374    stack->Add(bgh);
00375    stack->Add(sigh);
00376    TLegend *legend = new TLegend(.75, .80, .95, .95);
00377    legend->AddEntry(bgh, "Background");
00378    legend->AddEntry(sigh,"Signal");
00379    stack->Draw("nostack");
00380    legend->Draw();
00381    // restore the default event list
00382    data->SetEventList(current);
00383    delete signal_list;
00384    delete bg_list;
00385 }
00386 
00387 //______________________________________________________________________________
00388 TProfile* TMLPAnalyzer::DrawTruthDeviation(Int_t outnode /*=0*/,
00389                                            Option_t *option /*=""*/)
00390 {
00391    // Create a profile of the difference of the MLP output minus the
00392    // true value for a given output node outnode, vs the true value for
00393    // outnode, for all test data events. This method is mainly useful
00394    // when doing regression analysis with the MLP (i.e. not classification,
00395    // but continuous truth values).
00396    // The resulting TProfile histogram is returned.
00397    // It is not drawn if option "goff" is specified.
00398    // Options are passed to TProfile::Draw
00399 
00400    if (!fIOTree) GatherInformations();
00401    TString pipehist=Form("MLP_truthdev_%d",outnode);
00402    TString drawline;
00403    drawline.Form("Out.Out%d-True.True%d:True.True%d>>",
00404                  outnode, outnode, outnode);
00405    fIOTree->Draw(drawline+pipehist+"(20)", "", "goff prof");
00406    TProfile* h=(TProfile*)gDirectory->Get(pipehist);
00407    h->SetDirectory(0);
00408    const char* title=GetOutputNeuronTitle(outnode);
00409    if (title) {
00410       h->SetTitle(Form("#Delta(output - truth) vs. truth for %s",
00411                       title));
00412       h->GetXaxis()->SetTitle(title);
00413       h->GetYaxis()->SetTitle(Form("#Delta(output - truth) for %s", title));
00414    }
00415    if (!strstr(option,"goff"))
00416       h->Draw();
00417    return h;
00418 }
00419 
00420 //______________________________________________________________________________
00421 THStack* TMLPAnalyzer::DrawTruthDeviations(Option_t *option /*=""*/)
00422 {
00423    // Creates TProfiles of the difference of the MLP output minus the
00424    // true value vs the true value, one for each output, filled with the
00425    // test data events. This method is mainly useful when doing regression
00426    // analysis with the MLP (i.e. not classification, but continuous truth
00427    // values).
00428    // The returned THStack contains all the TProfiles. It is drawn unless
00429    // the option "goff" is specified.
00430    // Options are passed to TProfile::Draw.
00431    THStack *hs=new THStack("MLP_TruthDeviation",
00432                            "Deviation of MLP output from truth");
00433 
00434    // leg!=0 means we're drawing
00435    TLegend *leg=0;
00436    if (!option || !strstr(option,"goff"))
00437       leg=new TLegend(.4,.85,.95,.95,"#Delta(output - truth) vs. truth for:");
00438 
00439    const char* xAxisTitle=0;
00440 
00441    // create profile for each input neuron,
00442    // adding them into the THStack and the TLegend
00443    for (Int_t outnode=0; outnode<GetNeurons(GetLayers()); outnode++) {
00444       TProfile* h=DrawTruthDeviation(outnode, "goff");
00445       h->SetLineColor(1+outnode);
00446       hs->Add(h, option);
00447       if (leg) leg->AddEntry(h,GetOutputNeuronTitle(outnode));
00448       if (!outnode)
00449          // Xaxis title is the same for all, extract it from the first one.
00450          xAxisTitle=h->GetXaxis()->GetTitle();
00451    }
00452 
00453    if (leg) {
00454       hs->Draw("nostack");
00455       leg->Draw();
00456       // gotta draw before accessing the axes
00457       hs->GetXaxis()->SetTitle(xAxisTitle);
00458       hs->GetYaxis()->SetTitle("#Delta(output - truth)");
00459    }
00460 
00461    return hs;
00462 }
00463 
00464 //______________________________________________________________________________
00465 TProfile* TMLPAnalyzer::DrawTruthDeviationInOut(Int_t innode,
00466                                                 Int_t outnode /*=0*/,
00467                                                 Option_t *option /*=""*/)
00468 {
00469    // Creates a profile of the difference of the MLP output outnode minus
00470    // the true value of outnode vs the input value innode, for all test
00471    // data events.
00472    // The resulting TProfile histogram is returned.
00473    // It is not drawn if option "goff" is specified.
00474    // Options are passed to TProfile::Draw
00475 
00476    if (!fIOTree) GatherInformations();
00477    TString pipehist=Form("MLP_truthdev_i%d_o%d", innode, outnode);
00478    TString drawline;
00479    drawline.Form("Out.Out%d-True.True%d:In.In%d>>",
00480                  outnode, outnode, innode);
00481    fIOTree->Draw(drawline+pipehist+"(50)", "", "goff prof");
00482    TProfile* h=(TProfile*)gROOT->FindObject(pipehist);
00483    h->SetDirectory(0);
00484    const char* titleInNeuron=GetInputNeuronTitle(innode);
00485    const char* titleOutNeuron=GetOutputNeuronTitle(outnode);
00486    h->SetTitle(Form("#Delta(output - truth) of %s vs. input %s",
00487                     titleOutNeuron, titleInNeuron));
00488    h->GetXaxis()->SetTitle(Form("%s", titleInNeuron));
00489    h->GetYaxis()->SetTitle(Form("#Delta(output - truth) for %s",
00490                                 titleOutNeuron));
00491    if (!strstr(option,"goff"))
00492       h->Draw(option);
00493    return h;
00494 }
00495 
00496 //______________________________________________________________________________
00497 THStack* TMLPAnalyzer::DrawTruthDeviationInsOut(Int_t outnode /*=0*/,
00498                                                 Option_t *option /*=""*/)
00499 {
00500    // Creates a profile of the difference of the MLP output outnode minus the
00501    // true value of outnode vs the input value, stacked for all inputs, for
00502    // all test data events.
00503    // The returned THStack contains all the TProfiles. It is drawn unless
00504    // the option "goff" is specified.
00505    // Options are passed to TProfile::Draw.
00506    TString sName;
00507    sName.Form("MLP_TruthDeviationIO_%d", outnode);
00508    const char* outputNodeTitle=GetOutputNeuronTitle(outnode);
00509    THStack *hs=new THStack(sName,
00510                            Form("Deviation of MLP output %s from truth",
00511                                 outputNodeTitle));
00512 
00513    // leg!=0 means we're drawing.
00514    TLegend *leg=0;
00515    if (!option || !strstr(option,"goff"))
00516       leg=new TLegend(.4,.75,.95,.95,
00517                       Form("#Delta(output - truth) of %s vs. input for:",
00518                            outputNodeTitle));
00519 
00520    // create profile for each input neuron,
00521    // adding them into the THStack and the TLegend
00522    Int_t numInNodes=GetNeurons(1);
00523    Int_t innode=0;
00524    for (innode=0; innode<numInNodes; innode++) {
00525       TProfile* h=DrawTruthDeviationInOut(innode, outnode, "goff");
00526       h->SetLineColor(1+innode);
00527       hs->Add(h, option);
00528       if (leg) leg->AddEntry(h,h->GetXaxis()->GetTitle());
00529    }
00530 
00531    if (leg) {
00532       hs->Draw("nostack");
00533       leg->Draw();
00534       // gotta draw before accessing the axes
00535       hs->GetXaxis()->SetTitle("Input value");
00536       hs->GetYaxis()->SetTitle(Form("#Delta(output - truth) for %s",
00537                                  outputNodeTitle));
00538    }
00539 
00540    return hs;
00541 }