00001 #include "tmvaglob.C"
00002
00003
00004
00005
00006
00007
00008
00009
00010 void regression_averagedevs(TString fin, Int_t Nevt=-1, Bool_t useTMVAStyle = kTRUE )
00011 {
00012 bool debug=false;
00013 if (Nevt <0) Nevt=1000000;
00014 Int_t type = 2;
00015 TMVAGlob::Initialize( useTMVAStyle );
00016
00017 TFile* file = TMVAGlob::OpenFile( fin );
00018 TList jobDirList;
00019 TMVAGlob::GetListOfJobs(file,jobDirList);
00020 if (jobDirList.GetSize()==0) {
00021 cout << "error could not find jobs" << endl;
00022 return;
00023 }
00024
00025 Bool_t __PLOT_LOGO__ = kTRUE;
00026 Bool_t __SAVE_IMAGE__ = kTRUE;
00027
00028
00029 TDirectory* dir0 = (TDirectory*) (file->Get("InputVariables_Id"));
00030 Int_t nTargets = TMVAGlob::GetNumberOfTargets( dir0);
00031
00032 if (debug) cout << "found targets " << nTargets<<endl;
00033 TCanvas* c=0;
00034 for (Int_t itrgt = 0 ; itrgt < nTargets; itrgt++){
00035 if (debug) cout << "loop targets " << itrgt<<endl;
00036 TString xtit = "Method";
00037 TString ytit = "Average Quadratic Deviation";
00038 TString ftit = ytit + " versus " + xtit + Form(" for target %d",itrgt);
00039 c = new TCanvas( Form("c%d",itrgt), ftit , 50+20*itrgt, 10*itrgt, 750, 650 );
00040
00041
00042 c->SetGrid();
00043 c->SetTickx(1);
00044 c->SetTicky(0);
00045 c->SetTopMargin(0.28);
00046 c->SetBottomMargin(0.1);
00047
00048 TString hNameRef(Form("regression_average_devs_target%d",itrgt));
00049
00050 const Int_t maxMethods = 100;
00051 const Int_t maxTargets = 100;
00052 Float_t m[4][maxMethods];
00053 Float_t em[4][maxMethods];
00054 Float_t x[4][maxMethods];
00055 Float_t ex[4][maxMethods];
00056
00057 TIter next(&jobDirList);
00058 Float_t mymax=0., mymin=1.e40;
00059 TString mvaNames[maxMethods];
00060 TDirectory *jobDir;
00061 Int_t nMethods = 0;
00062
00063 while (jobDir = (TDirectory*)next()) {
00064 TString methodTitle;
00065 TMVAGlob::GetMethodTitle(methodTitle,jobDir);
00066 mvaNames[nMethods]=methodTitle;
00067 if (debug) cout << "--- Found directory for method: " << methodTitle << endl;
00068 TIter keyIt(jobDir->GetListOfKeys());
00069 TKey *histKey;
00070 while ((histKey = (TKey*)keyIt())) {
00071 if (histKey->ReadObj()->InheritsFrom("TH1F") ){
00072 TString s(histKey->ReadObj()->GetName());
00073 if( !s.Contains("Quadr_Dev") ) continue;
00074 if( !s.Contains(Form("target_%d_",itrgt))) continue;
00075 Int_t ihist = 0 ;
00076 if( !s.Contains("best90perc") && s.Contains("train")) ihist=0;
00077 if( s.Contains("best90perc") && s.Contains("train")) ihist=1;
00078 if( !s.Contains("best90perc") && s.Contains("test")) ihist=2;
00079 if( s.Contains("best90perc") && s.Contains("test")) ihist=3;
00080 if (debug) cout <<"using histogram" << s << ", ihist="<<ihist<<endl;
00081 TH1F* h = (TH1F*) (histKey->ReadObj());
00082 m[ihist][nMethods] = sqrt(h->GetMean());
00083 em[ihist][nMethods] = h->GetRMS()/(sqrt(h->GetEntries())*2.*h->GetMean());
00084 x[ihist][nMethods] = nMethods+0.44+0.12*ihist;
00085 ex[ihist][nMethods] = 0.001;
00086 mymax= m[ihist][nMethods] > mymax ? m[ihist][nMethods] : mymax;
00087 mymin= m[ihist][nMethods] < mymin ? m[ihist][nMethods] : mymin;
00088 if (debug) cout << "m"<< ihist << "="<<m[ihist][nMethods]<<endl;
00089 }
00090 }
00091 nMethods++;
00092 }
00093 TH1F* haveragedevs= new TH1F(Form("haveragedevs%d",itrgt),ftit,nMethods,0.,nMethods);
00094 for (int i=0;i<nMethods;i++) haveragedevs->GetXaxis()->SetBinLabel(i+1, mvaNames[i]);
00095 haveragedevs->SetStats(0);
00096 TGraphErrors* graphTrainAv= new TGraphErrors(nMethods,x[0],m[0],ex[0],em[0]);
00097 TGraphErrors* graphTruncTrainAv= new TGraphErrors(nMethods,x[1],m[1],ex[1],em[1]);
00098 TGraphErrors* graphTestAv= new TGraphErrors(nMethods,x[2],m[2],ex[2],em[2]);
00099 TGraphErrors* graphTruncTestAv= new TGraphErrors(nMethods,x[3],m[3],ex[3],em[3]);
00100
00101 Double_t xmax = 1.2 * mymax;
00102 Double_t xmin = 0.8 * mymin - (mymax - mymin)*0.05;
00103 Double_t xheader = 0.2;
00104 Double_t yheader = xmax*0.92;
00105 xmin = xmin > 0.? xmin : 0.;
00106 if (mymin > 1.e-20 && log10(mymax/mymin)>1.5){
00107 c->SetLogy();
00108 cout << "--- result differ significantly using log scale for display of regression results"<< endl;
00109 xmax = 1.5 * xmax;
00110 xmin = 0.75 * mymin;
00111 yheader = xmax*0.78;
00112 }
00113 Float_t x0L = 0.03, y0H = 0.91;
00114 Float_t dxL = 0.457-x0L, dyH = 0.14;
00115
00116 TLegend *legend = new TLegend( x0L, y0H-dyH, x0L+dxL, y0H );
00117 legend->SetTextSize( 0.035 );
00118 legend->SetTextAlign(12);
00119 legend->SetMargin( 0.1 );
00120
00121 TH1F *hr = c->DrawFrame(-1.,0.,nMethods+1, xmax);
00122 cout << endl;
00123 cout << "Training: Average Deviation between target " << itrgt <<" and estimate" << endl;
00124 cout << Form("%-15s%-15s%-15s", "Method","Average Dev.","trunc. Aver.(90%)") <<endl;
00125 for (int i=0;i<nMethods;i++){
00126 cout << Form("%-15s:%#10.3g%#10.3g",
00127 (const char*)mvaNames[i], m[0][i],m[1][i])<<endl;
00128
00129 hr->GetXaxis()->SetBinLabel(i+1," ");
00130 }
00131 cout << endl;
00132 cout << "Testing: Average Deviation between target " << itrgt <<" and estimate" << endl;
00133 cout << Form("%-15s%-15s%-15s", "Method","Average Dev.","trunc. Aver.(90%)") <<endl;
00134 for (int i=0;i<nMethods;i++){
00135 cout << Form("%-15s:%#10.3g%#10.3g",
00136 (const char*)mvaNames[i], m[2][i],m[3][i])<<endl;
00137
00138 }
00139
00140 haveragedevs->SetMinimum(xmin);
00141 haveragedevs->SetMaximum(xmax);
00142 haveragedevs->SetXTitle("Method");
00143 haveragedevs->SetYTitle("Deviation from target");
00144 haveragedevs->Draw();
00145 c->GetFrame()->SetFillColor(21);
00146 c->GetFrame()->SetBorderSize(12);
00147 graphTrainAv->SetMarkerSize(1.);
00148 graphTrainAv->SetMarkerColor(kBlue);
00149 graphTrainAv->SetMarkerStyle(25);
00150 graphTrainAv->Draw("P");
00151
00152 graphTruncTrainAv->SetMarkerSize(1.);
00153 graphTruncTrainAv->SetMarkerColor(kBlack);
00154 graphTruncTrainAv->SetMarkerStyle(25);
00155 graphTruncTrainAv->Draw("P");
00156
00157 graphTestAv->SetMarkerSize(1.);
00158 graphTestAv->SetMarkerColor(kBlue);
00159 graphTestAv->SetMarkerStyle(21);
00160 graphTestAv->Draw("P");
00161
00162 graphTruncTestAv->SetMarkerSize(1.);
00163 graphTruncTestAv->SetMarkerColor(kBlack);
00164 graphTruncTestAv->SetMarkerStyle(21);
00165 graphTruncTestAv->Draw("P");
00166 legend->AddEntry(graphTrainAv,TString("Training Sample, Average Deviation"),"p");
00167 legend->AddEntry(graphTruncTrainAv,TString("Training Sample, truncated Average Dev. (best 90%)"),"p");
00168 legend->AddEntry(graphTestAv,TString("Test Sample, Average Deviation"),"p");
00169 legend->AddEntry(graphTruncTestAv,TString("Test Sample, truncated Average Dev. (best 90%)"),"p");
00170
00171 legend->Draw();
00172 TLatex legHeader;
00173 legHeader.SetTextSize(0.035);
00174 legHeader.SetTextAlign(12);
00175
00176 legHeader.DrawLatex(xheader, yheader, "Average Deviation = (#sum (_{ } f_{MVA} - f_{target})^{2} )^{1/2}");
00177
00178
00179 if (__PLOT_LOGO__) TMVAGlob::plot_logo();
00180
00181
00182 c->Update();
00183 TString fname = "plots/" + hNameRef;
00184 if (__SAVE_IMAGE__) TMVAGlob::imgconv( c, fname );
00185 }
00186 return;
00187 }
00188