hadd.cxx

Go to the documentation of this file.
00001 /*
00002 
00003   This program will add histograms (see note) and Trees from a list of root files and write them
00004   to a target root file. The target file is newly created and must not be
00005   identical to one of the source files.
00006 
00007   Syntax:
00008 
00009        hadd targetfile source1 source2 ...
00010     or
00011        hadd -f targetfile source1 source2 ...
00012          (targetfile is overwritten if it exists)
00013 
00014   When -the -f option is specified, one can also specify the compression
00015   level of the target file. By default the compression level is 1, but
00016   if "-f0" is specified, the target file will not be compressed.
00017   if "-f6" is specified, the compression level 6 will be used.
00018 
00019   For example assume 3 files f1, f2, f3 containing histograms hn and Trees Tn
00020     f1 with h1 h2 h3 T1
00021     f2 with h1 h4 T1 T2
00022     f3 with h5
00023    the result of
00024      hadd -f x.root f1.root f2.root f3.root
00025    will be a file x.root with h1 h2 h3 h4 h5 T1 T2
00026    where h1 will be the sum of the 2 histograms in f1 and f2
00027          T1 will be the merge of the Trees in f1 and f2
00028 
00029    The files may contain sub-directories.
00030 
00031   if the source files contains histograms and Trees, one can skip
00032   the Trees with
00033        hadd -T targetfile source1 source2 ...
00034 
00035   Wildcarding and indirect files are also supported
00036     hadd result.root  myfil*.root
00037    will merge all files in myfil*.root
00038     hadd result.root file1.root @list.txt file2. root myfil*.root
00039     will merge file1. root, file2. root, all files in myfil*.root
00040     and all files in the indirect text file list.txt ("@" as the first
00041     character of the file indicates an indirect file. An indirect file
00042     is a text file containing a list of other files, including other
00043     indirect files, one line per file).
00044 
00045   If the sources and and target compression levels are identical (default),
00046   the program uses the TChain::Merge function with option "fast", ie
00047   the merge will be done without  unzipping or unstreaming the baskets
00048   (i.e. direct copy of the raw byte on disk). The "fast" mode is typically
00049   5 times faster than the mode unzipping and unstreaming the baskets.
00050 
00051   NOTE1: By default histograms are added. However hadd does not support the case where
00052          histograms have their bit TH1::kIsAverage set.
00053 
00054   NOTE2: hadd returns a status code: 0 if OK, -1 otherwise
00055 
00056   Authors: Rene Brun, Dirk Geppert, Sven A. Schmidt, sven.schmidt@cern.ch
00057          : rewritten from scratch by Rene Brun (30 November 2005)
00058             to support files with nested directories.
00059            Toby Burnett implemented the possibility to use indirect files.
00060  */
00061 
00062 #include "RConfig.h"
00063 #include <string>
00064 #include "TChain.h"
00065 #include "TFile.h"
00066 #include "THashList.h"
00067 #include "TH1.h"
00068 #include "THStack.h"
00069 #include "TKey.h"
00070 #include "TObjString.h"
00071 #include "Riostream.h"
00072 #include "TClass.h"
00073 #include "TSystem.h"
00074 #include <stdlib.h>
00075 
00076 TList *FileList;
00077 TFile *Target, *Source;
00078 Bool_t noTrees;
00079 Bool_t fastMethod;
00080 Bool_t reoptimize;
00081 
00082 int AddFile(TList* sourcelist, std::string entry, int newcomp) ;
00083 int MergeRootfile( TDirectory *target, TList *sourcelist);
00084 
00085 //___________________________________________________________________________
00086 int main( int argc, char **argv )
00087 {
00088 
00089    if ( argc < 3 || "-h" == string(argv[1]) || "--help" == string(argv[1]) ) {
00090       cout << "Usage: " << argv[0] << " [-f[0-9]] [-T] [-O] targetfile source1 [source2 source3 ...]" << endl;
00091       cout << "This program will add histograms from a list of root files and write them" << endl;
00092       cout << "to a target root file. The target file is newly created and must not " << endl;
00093       cout << "exist, or if -f (\"force\") is given, must not be one of the source files." << endl;
00094       cout << "Supply at least two source files for this to make sense... ;-)" << endl;
00095       cout << "If the option -T is used, Trees are not merged" <<endl;
00096       cout << "If the option -O is used, when merging TTree, the basket size is re-optimized" <<endl;
00097       cout << "When -the -f option is specified, one can also specify the compression" <<endl;
00098       cout << "level of the target file. By default the compression level is 1, but" <<endl;
00099       cout << "if \"-f0\" is specified, the target file will not be compressed." <<endl;
00100       cout << "if \"-f6\" is specified, the compression level 6 will be used." <<endl;
00101       cout << "if Target and source files have different compression levels"<<endl;
00102       cout << " a slower method is used"<<endl;
00103       return 1;
00104    }
00105    FileList = new TList();
00106 
00107    Bool_t force = kFALSE;
00108    reoptimize = kFALSE;
00109    noTrees = kFALSE;
00110 
00111    int ffirst = 2;
00112    Int_t newcomp = 1;
00113    for( int a = 1; a < argc; ++a ) {
00114       if ( strcmp(argv[a],"-T") == 0 ) {
00115          noTrees = kTRUE;
00116          ++ffirst;
00117       } else if ( strcmp(argv[a],"-f") == 0 ) {
00118          force = kTRUE;
00119          ++ffirst;
00120       } else if ( strcmp(argv[a],"-O") == 0 ) {
00121          reoptimize = kTRUE;
00122          ++ffirst;
00123       } else if ( argv[a][0] == '-' ) {
00124          char ft[4];
00125          for( int j=0; j<=9; ++j ) {
00126             snprintf(ft,4,"-f%d",j);
00127             if (!strcmp(argv[a],ft)) {
00128                force = kTRUE;
00129                newcomp = j;
00130                ++ffirst;
00131                break;
00132             }
00133          }
00134       }
00135    }
00136 
00137    gSystem->Load("libTreePlayer");
00138 
00139    cout << "Target file: " << argv[ffirst-1] << endl;
00140 
00141    Target = TFile::Open( argv[ffirst-1], (force?"RECREATE":"CREATE") );
00142    if (!Target || Target->IsZombie()) {
00143       cerr << "Error opening target file (does " << argv[ffirst-1] << " exist?)." << endl;
00144       cerr << "Pass \"-f\" argument to force re-creation of output file." << endl;
00145       exit(1);
00146    }
00147    Target->SetCompressionLevel(newcomp);
00148 
00149    // by default hadd can merge Trees in a file that can go up to 100 Gbytes
00150    // No need to set this, as 100Gb is now the TTree default
00151    // Long64_t maxsize = 100000000; //100GB
00152    // maxsize *= 1000;  //to bypass some compiler limitations with big constants
00153    // TTree::SetMaxTreeSize(maxsize);
00154 
00155    fastMethod = kTRUE;
00156    for ( int i = ffirst; i < argc; i++ ) {
00157       if( AddFile(FileList, argv[i], newcomp) !=0 ) return 1;
00158    }
00159    if (!fastMethod && !reoptimize) {
00160       // Don't warn if the user any request re-optimization.
00161       cout <<"Sources and Target have different compression levels"<<endl;
00162       cout <<"Merging will be slower"<<endl;
00163    }
00164 
00165    int status = MergeRootfile( Target, FileList);
00166 
00167    //must delete Target to avoid a problem with dictionaries in~ TROOT
00168    delete Target;
00169 
00170    return status;
00171 }
00172 
00173 //___________________________________________________________________________
00174 int AddFile(TList* sourcelist, std::string entry, int newcomp)
00175 {
00176    // add a new file to the list of files
00177    static int count(0);
00178    if( entry.empty() ) return 0;
00179    size_t j =entry.find_first_not_of(' ');
00180    if( j==std::string::npos ) return 0;
00181    entry = entry.substr(j);
00182    if( entry.substr(0,1)=="@"){
00183       std::ifstream indirect_file(entry.substr(1).c_str() );
00184       if( ! indirect_file.is_open() ) {
00185          std::cerr<< "Could not open indirect file " << entry.substr(1) << std::endl;
00186          return 1;
00187       }
00188       while( indirect_file ){
00189          std::string line;
00190          std::getline(indirect_file, line);
00191          if( AddFile(sourcelist, line, newcomp)!=0 )return 1;;
00192       }
00193       return 0;
00194    }
00195    cout << "Source file " << (++count) << ": " << entry << endl;
00196 
00197    TFile* source = TFile::Open( entry.c_str());
00198    if( source==0 ){
00199       return 1;
00200    }
00201    sourcelist->Add(source);
00202    if (newcomp != source->GetCompressionLevel()) fastMethod = kFALSE;
00203    return 0;
00204 }
00205 
00206 
00207 //___________________________________________________________________________
00208 int MergeRootfile( TDirectory *target, TList *sourcelist)
00209 {
00210    // Merge all objects in a directory
00211    int status = 0;
00212    cout << "Target path: " << target->GetPath() << endl;
00213    TString path( (char*)strstr( target->GetPath(), ":" ) );
00214    path.Remove( 0, 2 );
00215 
00216    TDirectory *first_source = (TDirectory*)sourcelist->First();
00217    Int_t nguess = sourcelist->GetSize()+1000;
00218    THashList allNames(nguess);
00219    ((THashList*)target->GetList())->Rehash(nguess);
00220    ((THashList*)target->GetListOfKeys())->Rehash(nguess);
00221    TList listH;
00222    TString listHargs;
00223    listHargs.Form("((TCollection*)0x%lx)", (ULong_t)&listH);
00224    while(first_source) {
00225       TDirectory *current_sourcedir = first_source->GetDirectory(path);
00226       if (!current_sourcedir) {
00227          first_source = (TDirectory*)sourcelist->After(first_source);
00228          continue;
00229       }
00230 
00231       // loop over all keys in this directory
00232       TChain *globChain = 0;
00233       TIter nextkey( current_sourcedir->GetListOfKeys() );
00234       TKey *key, *oldkey=0;
00235       //gain time, do not add the objects in the list in memory
00236       TH1::AddDirectory(kFALSE);
00237 
00238       while ( (key = (TKey*)nextkey())) {
00239          if (current_sourcedir == target) break;
00240          //keep only the highest cycle number for each key
00241          if (oldkey && !strcmp(oldkey->GetName(),key->GetName())) continue;
00242          if (!strcmp(key->GetClassName(),"TProcessID")) {key->ReadObj(); continue;}
00243          if (allNames.FindObject(key->GetName())) continue;
00244          TClass *cl = TClass::GetClass(key->GetClassName());
00245          if (!cl || !cl->InheritsFrom(TObject::Class())) {
00246             cout << "Cannot merge object type, name: "
00247                  << key->GetName() << " title: " << key->GetTitle() << endl;
00248             continue;
00249          }
00250          allNames.Add(new TObjString(key->GetName()));
00251          // read object from first source file
00252          //current_sourcedir->cd();
00253          TObject *obj = key->ReadObj();
00254          //printf("keyname=%s, obj=%x\n",key->GetName(),obj);
00255 
00256          if ( obj->IsA()->InheritsFrom( TTree::Class() ) ) {
00257 
00258             // loop over all source files create a chain of Trees "globChain"
00259             if (!noTrees) {
00260                TString obj_name;
00261                if (path.Length()) {
00262                   obj_name = path + "/" + obj->GetName();
00263                } else {
00264                   obj_name = obj->GetName();
00265                }
00266                globChain = new TChain(obj_name);
00267                globChain->Add(first_source->GetName());
00268                TFile *nextsource = (TFile*)sourcelist->After( first_source );
00269                while ( nextsource ) {
00270                   //do not add to the list a file that does not contain this Tree
00271                   TFile *curf = TFile::Open(nextsource->GetName());
00272                   if (curf) {
00273                      Bool_t mustAdd = kFALSE;
00274                      if (curf->FindKey(obj_name)) {
00275                         mustAdd = kTRUE;
00276                      } else {
00277                         //we could be more clever here. No need to import the object
00278                         //we are missing a function in TDirectory
00279                         TObject *aobj = curf->Get(obj_name);
00280                         if (aobj) { mustAdd = kTRUE; delete aobj;}
00281                      }
00282                      if (mustAdd) {
00283                         globChain->Add(nextsource->GetName());
00284                      }
00285                   }
00286                   delete curf;
00287                   nextsource = (TFile*)sourcelist->After( nextsource );
00288                }
00289             }
00290          } else if ( obj->IsA()->InheritsFrom( TDirectory::Class() ) ) {
00291             // it's a subdirectory
00292 
00293             cout << "Found subdirectory " << obj->GetName() << endl;
00294             // create a new subdir of same name and title in the target file
00295             target->cd();
00296             TDirectory *newdir = target->mkdir( obj->GetName(), obj->GetTitle() );
00297 
00298             // newdir is now the starting point of another round of merging
00299             // newdir still knows its depth within the target file via
00300             // GetPath(), so we can still figure out where we are in the recursion
00301             status = MergeRootfile( newdir, sourcelist);
00302             if (status) return status;
00303 
00304          } else if ( obj->InheritsFrom(TObject::Class())
00305               && obj->IsA()->GetMethodWithPrototype("Merge", "TCollection*") ) {
00306             // object implements Merge(TCollection*)
00307 
00308             // loop over all source files and merge same-name object
00309             TFile *nextsource = (TFile*)sourcelist->After( first_source );
00310             while ( nextsource ) {
00311                // make sure we are at the correct directory level by cd'ing to path
00312                TDirectory *ndir = nextsource->GetDirectory(path);
00313                if (ndir) {
00314                   ndir->cd();
00315                   TKey *key2 = (TKey*)gDirectory->GetListOfKeys()->FindObject(key->GetName());
00316                   if (key2) {
00317                      TObject *hobj = key2->ReadObj();
00318                      hobj->ResetBit(kMustCleanup);
00319                      listH.Add(hobj);
00320                      Int_t error = 0;
00321                      obj->Execute("Merge", listHargs.Data(), &error);
00322                      if (error) {
00323                         cerr << "Error calling Merge() on " << obj->GetName()
00324                              << " with the corresponding object in " << nextsource->GetName() << endl;
00325                      }
00326                      listH.Delete();
00327                   }
00328                }
00329                nextsource = (TFile*)sourcelist->After( nextsource );
00330             }
00331          } else if ( obj->IsA()->InheritsFrom( THStack::Class() ) ) {
00332             THStack *hstack1 = (THStack*) obj;
00333             TList* l = new TList();
00334 
00335             // loop over all source files and merge the histos of the
00336             // corresponding THStacks with the one pointed to by "hstack1"
00337             TFile *nextsource = (TFile*)sourcelist->After( first_source );
00338             while ( nextsource ) {
00339                // make sure we are at the correct directory level by cd'ing to path
00340                TDirectory *ndir = nextsource->GetDirectory(path);
00341                if (ndir) {
00342                   ndir->cd();
00343                   TKey *key2 = (TKey*)gDirectory->GetListOfKeys()->FindObject(hstack1->GetName());
00344                   if (key2) {
00345                     THStack *hstack2 = (THStack*) key2->ReadObj();
00346                     l->Add(hstack2->GetHists()->Clone());
00347                     delete hstack2;
00348                   }
00349                }
00350 
00351                nextsource = (TFile*)sourcelist->After( nextsource );
00352             }
00353             hstack1->GetHists()->Merge(l);
00354             l->Delete();
00355          } else {
00356             // object is of no type that we can merge
00357             cout << "Cannot merge object type, name: "
00358                  << obj->GetName() << " title: " << obj->GetTitle() << endl;
00359 
00360             // loop over all source files and write similar objects directly to the output file
00361             TFile *nextsource = (TFile*)sourcelist->After( first_source );
00362             while ( nextsource ) {
00363                // make sure we are at the correct directory level by cd'ing to path
00364                TDirectory *ndir = nextsource->GetDirectory(path);
00365                if (ndir) {
00366                   ndir->cd();
00367                   TKey *key2 = (TKey*)gDirectory->GetListOfKeys()->FindObject(key->GetName());
00368                   if (key2) {
00369                      TObject *nobj = key2->ReadObj();
00370                      nobj->ResetBit(kMustCleanup);
00371                      int nbytes1 = target->WriteTObject(nobj, key2->GetName(), "SingleKey" );
00372                      if (nbytes1 <= 0) status = -1;
00373                      delete nobj;
00374                   }
00375                }
00376                nextsource = (TFile*)sourcelist->After( nextsource );
00377             }
00378          }
00379 
00380          // now write the merged histogram (which is "in" obj) to the target file
00381          // note that this will just store obj in the current directory level,
00382          // which is not persistent until the complete directory itself is stored
00383          // by "target->Write()" below
00384          target->cd();
00385 
00386          //!!if the object is a tree, it is stored in globChain...
00387          if(obj->IsA()->InheritsFrom( TDirectory::Class() )) {
00388             //printf("cas d'une directory\n");
00389          } else if(obj->IsA()->InheritsFrom( TTree::Class() )) {
00390             if (!noTrees) {
00391                globChain->ls();
00392                if (fastMethod && !reoptimize) globChain->Merge(target->GetFile(),0,"keep fast");
00393                else                           globChain->Merge(target->GetFile(),0,"keep");
00394                delete globChain;
00395             }
00396          } else {
00397             int nbytes2 = obj->Write( key->GetName(), TObject::kSingleKey );
00398             if (nbytes2 <= 0) status = -1;
00399          }
00400          oldkey = key;
00401          delete obj;
00402       } // while ( ( TKey *key = (TKey*)nextkey() ) )
00403       first_source = (TDirectory*)sourcelist->After(first_source);
00404    }
00405    // save modifications to target file
00406    target->SaveSelf(kTRUE);
00407    return status;
00408 }

Generated on Tue Jul 5 14:30:42 2011 for ROOT_528-00b_version by  doxygen 1.5.1