sparsehist.C

Go to the documentation of this file.
00001 //*********************************************************************
00002 //+ Evaluate the performance of THnSparse vs THnF (or Float_t arrays)
00003 //  for different numbers of dimensions and bins per dimension.
00004 // 
00005 //  The script calculates the bandwidth for filling and retrieving
00006 //  bin contents (in million entries per second) for these two
00007 //  histogramming techniques, where "seconds" is CPU and real time.
00008 //
00009 //  The first line of the plots contains the bandwidth based on the 
00010 //  CPU time (THnSpase, THnF/Float_t*, ratio), the second line shows
00011 //  the plots for real time, and the third line shows the fraction of
00012 //  filled bins and memory used by THnSparse vs. THnF/Float_t.
00013 // 
00014 //  The timing depends on the distribution and the amount of entries
00015 //  in the histograms; here, a Gaussian distribution (center is
00016 //  contained in the histograms) is used to fill each histogram with
00017 //  1000 entries. The filling and reading is repeated until enough
00018 //  statistics have been collected.
00019 // 
00020 //  tutorials/tree/drawsparse.C shows an example for visualizing a
00021 //  THnSparse. It creates a TTree which is then drawn using
00022 //  TParallelCoord.
00023 // 
00024 //  This macro should be run in compiled mode due to the many nested
00025 //  loops that force CINT to disable its optimization. If run
00026 //  interpreted one would not benchmark THnSparse but CINT.
00027 // 
00028 //  Run as
00029 //    .L $ROOTSYS/tutorials/hist/sparsehist.C+
00030 // 
00031 //  Axel.Naumann@cern.ch (2007-09-14)
00032 // *********************************************************************
00033 
00034 #include "TH1.h"
00035 #include "TH2.h"
00036 #include "TH3.h"
00037 #include "THnSparse.h"
00038 #include "TStopwatch.h"
00039 #include "TRandom.h"
00040 #include "TCanvas.h"
00041 #include "TFile.h"
00042 #include "TStyle.h"
00043 #include "TSystem.h"
00044 
00045 class TTimeHists {
00046 public:
00047    enum EHist { kHist, kSparse, kNumHist };
00048    enum ETime { kReal, kCPU, kNumTime };
00049    TTimeHists(Int_t dim, Int_t bins, Long_t num):
00050       fValue(0), fDim(dim), fBins(bins), fNum(num),
00051       fSparse(0), fHist(0), fArray(0) {}
00052    ~TTimeHists();
00053    bool Run();
00054    Double_t GetTime(EHist hist, ETime time) const {
00055       if (time == kReal) return fTime[hist][0];
00056       return fTime[hist][1]; }
00057    static void SetDebug(Int_t lvl) { fgDebug = lvl; }
00058    THnSparse* GetSparse() const { return fSparse; }
00059 
00060 protected:
00061    void Fill(EHist hist);
00062    Double_t Check(EHist hist);
00063    void SetupHist(EHist hist);
00064    void NextValues();
00065    void SetupValues();
00066 
00067 private:
00068    Double_t* fValue;
00069    Int_t fDim;
00070    Int_t fBins;
00071    Long_t fNum;
00072    Double_t fTime[2][2];
00073    THnSparse* fSparse;
00074    TH1*       fHist;
00075    Float_t*  fArray;
00076    static Int_t fgDebug;
00077 };
00078 
00079 Int_t TTimeHists::fgDebug = 0;
00080 
00081 TTimeHists::~TTimeHists()
00082 {
00083    delete [] fValue;
00084    delete fSparse;
00085    delete fHist;
00086    delete [] fArray;
00087 }
00088 
00089 bool TTimeHists::Run()
00090 {
00091    // run all tests with current settings, and check for identity of content.
00092 
00093    Double_t check[2];
00094    Long64_t rep[2];
00095    for (int h = 0; h < 2; ++h) {
00096       rep[h] = 0;
00097       SetupValues();
00098       try {
00099          TStopwatch w;
00100          w.Start();
00101          SetupHist((EHist) h);
00102          w.Stop();
00103          do {
00104             w.Start(kFALSE);
00105             Fill((EHist) h);
00106             check[h] = Check((EHist) h);
00107             w.Stop();
00108             ++rep[h];
00109          } while (!h && w.RealTime() < 0.1
00110             || h && rep[0] > 0 && rep[1] < rep[0]);
00111 
00112          fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
00113          fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
00114 
00115          if (h == 1 && (fTime[h][0] > 1E20 || fTime[h][1] > 1E20)) {
00116             do {
00117                // some more cycles:
00118                w.Start(kFALSE);
00119                Fill((EHist) h);
00120                Check((EHist) h);
00121                w.Stop();
00122                ++rep[h];
00123             } while (w.RealTime() < 0.1);
00124 
00125             fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
00126             fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
00127          }
00128 
00129          if (fTime[h][0] > 1E20) fTime[h][0] = 1E20;
00130          if (fTime[h][1] > 1E20) fTime[h][1] = 1E20;
00131       }
00132       catch (std::exception&) {
00133          fTime[h][0] = fTime[h][1] = -1.;
00134          check[h] = -1.; // can never be < 1 without exception
00135          rep[h] = -1;
00136       }
00137    }
00138    if (check[0] != check[1])
00139       if (check[0] != -1.)
00140          printf("ERROR: mismatch of histogram / array (%g) and sparse histogram (%g) for dim=%d, bins=%d!\n",
00141                 check[0], check[1], fDim, fBins);
00142       // else
00143       //   printf("ERROR: cannot allocate histogram / array for dim=%d, bins=%d - out of memory!\n",
00144       //          fDim, fBins);
00145    return (check[0] == check[1]);
00146 }
00147 
00148 void TTimeHists::NextValues()
00149 {
00150    for (Int_t d = 0; d < fDim; ++d)
00151       fValue[d] = gRandom->Gaus() / 4.;
00152 }
00153 
00154 void TTimeHists::SetupValues()
00155 {
00156    // define fValue
00157    if (!fValue) fValue = new Double_t[fDim];
00158    gRandom->SetSeed(42);
00159 }
00160 
00161 void TTimeHists::Fill(EHist hist)
00162 {
00163    for (Long_t n = 0; n < fNum; ++n) {
00164       NextValues();
00165       if (fgDebug > 1) {
00166          printf("%ld: fill %s", n, hist == kHist? (fDim < 4 ? "hist" : "arr") : "sparse");
00167          for (Int_t d = 0; d < fDim; ++d)
00168             printf("[%g]", fValue[d]);
00169          printf("\n");
00170       }
00171       if (hist == kHist) {
00172          switch (fDim) {
00173          case 1: fHist->Fill(fValue[0]); break;
00174          case 2: ((TH2F*)fHist)->Fill(fValue[0], fValue[1]); break;
00175          case 3: ((TH3F*)fHist)->Fill(fValue[0], fValue[1], fValue[2]); break;
00176          default:
00177             {
00178                Long_t pos = 0;
00179                for (Int_t d = 0; d < fDim; ++d) {
00180                   pos *= (fBins + 2);
00181                   Int_t ibin = (Int_t)((fValue[d] + 1.) / 2. * fBins + 1);
00182                   if (ibin > fBins) ibin = fBins + 1;
00183                   if (ibin < 0) ibin = 0;
00184                   pos += ibin;
00185                }
00186                ++fArray[pos];
00187             }
00188          }
00189       } else {
00190          fSparse->Fill(fValue);
00191       }
00192    }
00193 }
00194 
00195 void TTimeHists::SetupHist(EHist hist)
00196 {
00197    if (hist == kHist) {
00198       switch (fDim) {
00199       case 1: fHist = new TH1F("h1", "h1", fBins, -1., 1.); break;
00200       case 2: fHist = new TH2F("h2", "h2", fBins, -1., 1., fBins, -1., 1.); break;
00201       case 3: fHist = new TH3F("h3", "h3", fBins, -1., 1., fBins, -1., 1., fBins, -1., 1.); break;
00202       default:
00203          {
00204             MemInfo_t meminfo;
00205             gSystem->GetMemInfo(&meminfo);
00206             Int_t size = 1;
00207             for (Int_t d = 0; d < fDim; ++d) {
00208                if ((Int_t)(size * sizeof(Float_t)) > INT_MAX / (fBins + 2)
00209                   || meminfo.fMemFree > 0 
00210                   && meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000))
00211                   throw std::bad_alloc();
00212                size *= (fBins + 2);
00213             }
00214             if (meminfo.fMemFree > 0 
00215                && meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000))
00216                throw std::bad_alloc();
00217             fArray = new Float_t[size];
00218             memset(fArray, 0, sizeof(Float_t)*size);
00219          }
00220       }
00221    } else {
00222       Int_t* bins = new Int_t[fDim];
00223       Double_t *xmin = new Double_t[fDim];
00224       Double_t *xmax = new Double_t[fDim];
00225       for (Int_t d = 0; d < fDim; ++d) {
00226          bins[d] = fBins;
00227          xmin[d] = -1.;
00228          xmax[d] =  1.;
00229       }
00230       fSparse = new THnSparseF("hs", "hs", fDim, bins, xmin, xmax);
00231    }
00232 }
00233 
00234 Double_t TTimeHists::Check(EHist hist)
00235 {
00236    // Check bin content of all bins
00237    Double_t check = 0.;
00238    Int_t* x = new Int_t[fDim];
00239    memset(x, 0, sizeof(Int_t) * fDim);
00240 
00241    if (hist == kHist) {
00242       Long_t idx = 0;
00243       Long_t size = 1;
00244       for (Int_t d = 0; d < fDim; ++d)
00245          size *= (fBins + 2);
00246       while (x[0] <= fBins + 1) {
00247          Double_t v = -1.;
00248          if (fDim < 4) {
00249             Long_t histidx = x[0];
00250             if (fDim == 2) histidx = fHist->GetBin(x[0], x[1]);
00251             else if (fDim == 3) histidx = fHist->GetBin(x[0], x[1], x[2]);
00252             v = fHist->GetBinContent(histidx);
00253          }
00254          else v = fArray[idx];
00255          Double_t checkx = 0.;
00256          if (v)
00257             for (Int_t d = 0; d < fDim; ++d)
00258                checkx += x[d];
00259          check += checkx * v;
00260 
00261          if (fgDebug > 2 || fgDebug > 1 && v) {
00262             printf("%s%d", fDim < 4 ? "hist" : "arr", fDim);
00263             for (Int_t d = 0; d < fDim; ++d)
00264                printf("[%d]", x[d]);
00265             printf(" = %g\n", v);
00266          }
00267 
00268          ++x[fDim - 1];
00269          // Adjust the bin idx
00270          // no wrapping for dim 0 - it's what we break on!
00271          for (Int_t d = fDim - 1; d > 0; --d) {
00272             if (x[d] > fBins + 1) {
00273                x[d] = 0;
00274                ++x[d - 1];
00275             }
00276          }
00277          ++idx;
00278       } // while next bin
00279    } else {
00280       for (Long64_t i = 0; i < fSparse->GetNbins(); ++i) {
00281          Double_t v = fSparse->GetBinContent(i, x);
00282          Double_t checkx = 0.;
00283          for (Int_t d = 0; d < fDim; ++d)
00284             checkx += x[d];
00285          check += checkx * v;
00286 
00287          if (fgDebug > 1) {
00288             printf("sparse%d", fDim);
00289             for (Int_t d = 0; d < fDim; ++d)
00290                printf("[%d]", x[d]);
00291             printf(" = %g\n", v);
00292          }
00293       }
00294    }
00295    check /= fNum;
00296    if (fgDebug > 0)
00297       printf("check %s%d = %g\n", hist == kHist ? (fDim < 4 ? "hist" : "arr") : "sparse", fDim, check);
00298    return check;
00299 }
00300 
00301 
00302 void sparsehist() {
00303 #ifdef __CINT__
00304    printf("Please run this script in compiled mode by running \".x sparsehist.C+\"\n");
00305    return;
00306 #endif
00307 
00308    TH2F* htime[TTimeHists::kNumHist][TTimeHists::kNumTime];
00309    for (int h = 0; h < TTimeHists::kNumHist; ++h)
00310       for (int t = 0; t < TTimeHists::kNumTime; ++t) {
00311          TString name("htime_");
00312          if (h == 0) name += "arr";
00313          else name += "sp";
00314          if (t == 0) name += "_r";
00315 
00316          TString title;
00317          title.Form("Throughput (fill,get) %s (%s, 1M entries/sec);dim;bins;1M entries/sec", h == 0 ? "THnF/Float_t[n]" : "THnSparseF", t == 0 ? "real" : "CPU");
00318          htime[h][t] = new TH2F(name, title, 6, 0.5, 6.5, 10, 5, 105);
00319       }
00320 
00321    TH2F* hsparse_mem = new TH2F("hsparse_mem", "Fractional memory usage;dim;bins;fraction of memory used", 6, 0.5, 6.5, 10, 5, 105);
00322    TH2F* hsparse_bins = new TH2F("hsparse_bins", "Fractional number of used bins;dim;bins;fraction of filled bins", 6, 0.5, 6.5, 10, 5, 105);
00323 
00324    // TTimeHists::SetDebug(2);
00325    Double_t max = -1.;
00326    for (Int_t dim = 1; dim < 7; ++dim) {
00327       printf("Processing dimension %d", dim);
00328       for (Int_t bins = 10; bins <= 100; bins += 10) {
00329          TTimeHists timer(dim, bins, /*num*/ 1000);
00330          timer.Run();
00331          for (int h = 0; h < TTimeHists::kNumHist; ++h)
00332             for (int t = 0; t < TTimeHists::kNumTime; ++t) {
00333                Double_t time = timer.GetTime((TTimeHists::EHist)h, (TTimeHists::ETime)t);
00334                if (time >= 0.)
00335                   htime[h][t]->Fill(dim, bins, time);
00336             }
00337 
00338          hsparse_mem->Fill(dim, bins, timer.GetSparse()->GetSparseFractionMem());
00339          hsparse_bins->Fill(dim, bins, timer.GetSparse()->GetSparseFractionBins());
00340 
00341          if (max < timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal))
00342             max = timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal);
00343          printf(".");
00344          fflush(stdout);
00345       }
00346       printf(" done\n");
00347    }
00348 
00349    Double_t markersize = 2.5;
00350    hsparse_mem->SetMarkerSize(markersize);
00351    hsparse_bins->SetMarkerSize(markersize);
00352 
00353    TH2F* htime_ratio[TTimeHists::kNumTime];
00354    for (int t = 0; t < TTimeHists::kNumTime; ++t) {
00355       const char* name = t ? "htime_ratio" : "htime_ratio_r";
00356       htime_ratio[t] = (TH2F*) htime[TTimeHists::kSparse][t]->Clone(name);
00357       TString title;
00358       title.Form("Relative speed improvement (%s, 1M entries/sec): sparse/hist;dim;bins;#Delta 1M entries/sec", t == 0 ? "real" : "CPU");
00359       htime_ratio[t]->SetTitle(title);
00360       htime_ratio[t]->Divide(htime[TTimeHists::kHist][t]);
00361       htime_ratio[t]->SetMinimum(0.1);
00362       htime_ratio[t]->SetMarkerSize(markersize);
00363    }
00364 
00365    TFile* f = new TFile("sparsehist.root","RECREATE");
00366 
00367    TCanvas* canv= new TCanvas("c","c");
00368    canv->Divide(3,3);
00369 
00370    gStyle->SetPalette(8,0);
00371    gStyle->SetPaintTextFormat(".2g");
00372    gStyle->SetOptStat(0);
00373    const char* opt = "TEXT COL";
00374 
00375    for (int t = 0; t < TTimeHists::kNumTime; ++t) {
00376       for (int h = 0; h < TTimeHists::kNumHist; ++h) {
00377          htime[h][t]->SetMaximum(max);
00378          htime[h][t]->SetMarkerSize(markersize);
00379          canv->cd(1 + h + 3 * t);
00380          htime[h][t]->Draw(opt);
00381          htime[h][t]->Write();
00382       }
00383       canv->cd(3 + t * 3);
00384       htime_ratio[t]->Draw(opt); gPad->SetLogz();
00385       htime_ratio[t]->Write();
00386    }
00387 
00388    canv->cd(7); hsparse_mem->Draw(opt);
00389    canv->cd(8); hsparse_bins->Draw(opt);
00390    hsparse_mem->Write();
00391    hsparse_bins->Write();
00392 
00393    canv->Write();
00394 
00395    delete f;
00396 }

Generated on Tue Jul 5 15:44:29 2011 for ROOT_528-00b_version by  doxygen 1.5.1