regression_averagedevs.C

Go to the documentation of this file.
00001 #include "tmvaglob.C"
00002 
00003 /*
00004 this macro plots the quadratic deviation of the estimated from the target value, averaged over the first nevt events in test sample (all if Nevt=-1)
00005 a) normal average
00006 b) truncated average, using best 90%
00007  created January 2009, Eckhard von Toerne, University of Bonn, Germany
00008 */
00009 
00010 void regression_averagedevs(TString fin, Int_t Nevt=-1, Bool_t useTMVAStyle = kTRUE )
00011 {
00012    bool debug=false;
00013    if (Nevt <0)  Nevt=1000000; 
00014    Int_t type = 2;
00015    TMVAGlob::Initialize( useTMVAStyle );
00016    // checks if file with name "fin" is already open, and if not opens one
00017    TFile* file = TMVAGlob::OpenFile( fin );  
00018    TList jobDirList;
00019    TMVAGlob::GetListOfJobs(file,jobDirList);
00020    if (jobDirList.GetSize()==0) {
00021      cout << "error could not find jobs" << endl;
00022      return;
00023    }
00024    
00025    Bool_t __PLOT_LOGO__  = kTRUE;
00026    Bool_t __SAVE_IMAGE__ = kTRUE;
00027 
00028    //TDirectory* dir0 = (TDirectory*) (jobDirList.At(0));
00029    TDirectory* dir0 = (TDirectory*) (file->Get("InputVariables_Id"));   
00030    Int_t nTargets = TMVAGlob::GetNumberOfTargets( dir0);
00031 
00032    if (debug) cout << "found targets " << nTargets<<endl;
00033    TCanvas* c=0;
00034    for (Int_t itrgt = 0 ; itrgt < nTargets; itrgt++){
00035      if (debug) cout << "loop targets " << itrgt<<endl;
00036      TString xtit = "Method";
00037      TString ytit = "Average Quadratic Deviation";  
00038      TString ftit = ytit + " versus " + xtit + Form(" for target %d",itrgt);
00039      c = new TCanvas( Form("c%d",itrgt), ftit , 50+20*itrgt, 10*itrgt, 750, 650 );
00040      
00041      // global style settings
00042      c->SetGrid();
00043      c->SetTickx(1);
00044      c->SetTicky(0);
00045      c->SetTopMargin(0.28);
00046      c->SetBottomMargin(0.1);
00047      
00048      TString hNameRef(Form("regression_average_devs_target%d",itrgt));
00049      
00050      const Int_t maxMethods = 100;
00051      const Int_t maxTargets = 100;
00052      Float_t m[4][maxMethods]; // h0 train-all, h1 train-90%, h2 test-all, h3 test-90%
00053      Float_t em[4][maxMethods];
00054      Float_t x[4][maxMethods];
00055      Float_t ex[4][maxMethods];
00056 
00057      TIter next(&jobDirList);
00058      Float_t mymax=0., mymin=1.e40;
00059      TString mvaNames[maxMethods];
00060      TDirectory *jobDir;
00061      Int_t nMethods = 0;
00062      // loop over all methods
00063      while (jobDir = (TDirectory*)next()) {     
00064        TString methodTitle;
00065        TMVAGlob::GetMethodTitle(methodTitle,jobDir);
00066        mvaNames[nMethods]=methodTitle;
00067        if (debug) cout << "--- Found directory for method: " << methodTitle << endl;
00068        TIter keyIt(jobDir->GetListOfKeys());
00069        TKey *histKey;
00070        while ((histKey = (TKey*)keyIt())) {
00071          if (histKey->ReadObj()->InheritsFrom("TH1F") ){
00072            TString s(histKey->ReadObj()->GetName());
00073            if( !s.Contains("Quadr_Dev") ) continue;
00074            if( !s.Contains(Form("target_%d_",itrgt))) continue;
00075            Int_t ihist = 0 ;
00076            if( !s.Contains("best90perc") && s.Contains("train")) ihist=0;
00077            if( s.Contains("best90perc") && s.Contains("train")) ihist=1;
00078            if( !s.Contains("best90perc") && s.Contains("test")) ihist=2;
00079            if( s.Contains("best90perc") && s.Contains("test")) ihist=3; 
00080            if (debug) cout <<"using histogram" << s << ", ihist="<<ihist<<endl;
00081            TH1F* h = (TH1F*) (histKey->ReadObj());
00082            m[ihist][nMethods] = sqrt(h->GetMean());
00083            em[ihist][nMethods] = h->GetRMS()/(sqrt(h->GetEntries())*2.*h->GetMean());
00084            x[ihist][nMethods] = nMethods+0.44+0.12*ihist;
00085            ex[ihist][nMethods] = 0.001;
00086            mymax=  m[ihist][nMethods] > mymax ? m[ihist][nMethods] : mymax;
00087            mymin=  m[ihist][nMethods] < mymin ? m[ihist][nMethods] : mymin;
00088            if (debug) cout << "m"<< ihist << "="<<m[ihist][nMethods]<<endl;
00089          }
00090        }
00091        nMethods++;
00092      }
00093      TH1F* haveragedevs= new TH1F(Form("haveragedevs%d",itrgt),ftit,nMethods,0.,nMethods);
00094      for (int i=0;i<nMethods;i++) haveragedevs->GetXaxis()->SetBinLabel(i+1, mvaNames[i]);
00095      haveragedevs->SetStats(0);
00096      TGraphErrors* graphTrainAv= new TGraphErrors(nMethods,x[0],m[0],ex[0],em[0]);
00097      TGraphErrors* graphTruncTrainAv= new TGraphErrors(nMethods,x[1],m[1],ex[1],em[1]);
00098      TGraphErrors* graphTestAv= new TGraphErrors(nMethods,x[2],m[2],ex[2],em[2]);
00099      TGraphErrors* graphTruncTestAv= new TGraphErrors(nMethods,x[3],m[3],ex[3],em[3]);
00100      
00101      Double_t xmax = 1.2 * mymax;
00102      Double_t xmin = 0.8 * mymin - (mymax - mymin)*0.05;
00103      Double_t xheader = 0.2;
00104      Double_t yheader = xmax*0.92;
00105      xmin = xmin > 0.? xmin : 0.;
00106      if (mymin > 1.e-20 && log10(mymax/mymin)>1.5){
00107        c->SetLogy();
00108        cout << "--- result differ significantly using log scale for display of regression results"<< endl;
00109        xmax = 1.5 * xmax;
00110        xmin = 0.75 * mymin;
00111        yheader = xmax*0.78;
00112      }
00113      Float_t x0L = 0.03,     y0H = 0.91;
00114      Float_t dxL = 0.457-x0L, dyH = 0.14;
00115      //     TLegend *legend = new TLegend( x0L, y0H-dyH, x0L+dxL, y0H , "Average Deviation = (#sum_{evts} (f_{MVA} - f_{target})^{2} )^{1/2}");
00116      TLegend *legend = new TLegend( x0L, y0H-dyH, x0L+dxL, y0H );
00117      legend->SetTextSize( 0.035 );
00118      legend->SetTextAlign(12);
00119      legend->SetMargin( 0.1 );
00120 
00121      TH1F *hr = c->DrawFrame(-1.,0.,nMethods+1, xmax);
00122      cout << endl;
00123      cout << "Training: Average Deviation between target " << itrgt <<" and estimate" << endl;
00124      cout << Form("%-15s%-15s%-15s", "Method","Average Dev.","trunc. Aver.(90%)") <<endl;
00125      for (int i=0;i<nMethods;i++){
00126         cout << Form("%-15s:%#10.3g%#10.3g",
00127                      (const char*)mvaNames[i], m[0][i],m[1][i])<<endl;
00128         //       cout << mvaNames[i] << "  " << m[0][i]<< "  "<< m[1][i]<<endl;
00129        hr->GetXaxis()->SetBinLabel(i+1," ");
00130      }
00131      cout << endl;
00132      cout << "Testing: Average Deviation between target " << itrgt <<" and estimate" << endl;
00133      cout << Form("%-15s%-15s%-15s", "Method","Average Dev.","trunc. Aver.(90%)") <<endl;
00134      for (int i=0;i<nMethods;i++){
00135         cout << Form("%-15s:%#10.3g%#10.3g",
00136                      (const char*)mvaNames[i], m[2][i],m[3][i])<<endl;
00137         //cout << mvaNames[i] << "  " << m[2][i]<< "  "<< m[3][i]<<endl;
00138      }
00139 
00140      haveragedevs->SetMinimum(xmin);
00141      haveragedevs->SetMaximum(xmax);
00142      haveragedevs->SetXTitle("Method");
00143      haveragedevs->SetYTitle("Deviation from target");
00144      haveragedevs->Draw();
00145      c->GetFrame()->SetFillColor(21);
00146      c->GetFrame()->SetBorderSize(12);
00147      graphTrainAv->SetMarkerSize(1.);
00148      graphTrainAv->SetMarkerColor(kBlue);
00149      graphTrainAv->SetMarkerStyle(25);
00150      graphTrainAv->Draw("P");
00151      
00152      graphTruncTrainAv->SetMarkerSize(1.);
00153      graphTruncTrainAv->SetMarkerColor(kBlack);
00154      graphTruncTrainAv->SetMarkerStyle(25);
00155      graphTruncTrainAv->Draw("P");
00156 
00157      graphTestAv->SetMarkerSize(1.);
00158      graphTestAv->SetMarkerColor(kBlue);
00159      graphTestAv->SetMarkerStyle(21);
00160      graphTestAv->Draw("P");
00161      
00162      graphTruncTestAv->SetMarkerSize(1.);
00163      graphTruncTestAv->SetMarkerColor(kBlack);
00164      graphTruncTestAv->SetMarkerStyle(21);
00165      graphTruncTestAv->Draw("P");
00166      legend->AddEntry(graphTrainAv,TString("Training Sample, Average Deviation"),"p");
00167      legend->AddEntry(graphTruncTrainAv,TString("Training Sample, truncated Average Dev. (best 90%)"),"p");
00168      legend->AddEntry(graphTestAv,TString("Test Sample, Average Deviation"),"p");
00169      legend->AddEntry(graphTruncTestAv,TString("Test Sample, truncated Average Dev. (best 90%)"),"p");
00170 
00171      legend->Draw();
00172      TLatex legHeader;
00173      legHeader.SetTextSize(0.035);
00174      legHeader.SetTextAlign(12);
00175      //legHeader.DrawLatex(x0L, y0H+0.01, "Average Deviation = (#sum (_{ } f_{MVA} - f_{target})^{2} )^{1/2}");
00176      legHeader.DrawLatex(xheader, yheader, "Average Deviation = (#sum (_{ } f_{MVA} - f_{target})^{2} )^{1/2}");     
00177      // ============================================================
00178      
00179      if (__PLOT_LOGO__) TMVAGlob::plot_logo();
00180      // ============================================================
00181      
00182      c->Update();
00183      TString fname = "plots/" + hNameRef;
00184      if (__SAVE_IMAGE__) TMVAGlob::imgconv( c, fname );   
00185    } // end loop itrgt
00186    return;
00187 }
00188 

Generated on Tue Jul 5 15:26:36 2011 for ROOT_528-00b_version by  doxygen 1.5.1