TZIPFile.cxx

Go to the documentation of this file.
00001 // @(#)root/io:$Id: TZIPFile.cxx 34257 2010-07-01 06:45:22Z brun $
00002 // Author: Fons Rademakers and Lassi Tuura  30/6/04
00003 
00004 /*************************************************************************
00005  * Copyright (C) 1995-2004, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 //////////////////////////////////////////////////////////////////////////
00013 //                                                                      //
00014 // TZIPFile                                                             //
00015 //                                                                      //
00016 // This class describes a ZIP archive file containing multiple          //
00017 // sub-files. Typically the sub-files are ROOT files. Notice that       //
00018 // the ROOT files should not be compressed when being added to the      //
00019 // ZIP file, since ROOT files are normally already compressed.          //
00020 // Such a ZIP file should be created like:                              //
00021 //                                                                      //
00022 //    zip -n root multi file1.root file2.root                           //
00023 //                                                                      //
00024 // which creates a ZIP file multi.zip.                                  //
00025 //                                                                      //
00026 // A ZIP archive consists of files compressed with the popular ZLIB     //
00027 // compression algorithm. The archive format is used among others by    //
00028 // PKZip and Info-ZIP. The compression algorithm is also used by        //
00029 // GZIP and the PNG graphics standard. The format of the archives is    //
00030 // explained briefly below. This class provides an interface to read    //
00031 // such archives.                                                       //
00032 //                                                                      //
00033 // A ZIP archive contains a prefix, series of archive members           //
00034 // (sub-files), and a central directory. In theory the archive could    //
00035 // span multiple disks (or files) with the central directory of the     //
00036 // whole archive on the last disk, but this class does not support      //
00037 // such multi-part archives. The prefix is only used in self-extracting //
00038 // executable archive files.                                            //
00039 //                                                                      //
00040 // The members are stored in the archive sequentially, each with a      //
00041 // local header followed by the (optionally) compressed data; the local //
00042 // header describes the member, including its file name and compressed  //
00043 // and real sizes. The central directory includes the member details    //
00044 // again, plus allows an extra member comment to be added. The last     //
00045 // member in the central directory is an end marker that can contain    //
00046 // a comment for the whole archive. Both the local header and the       //
00047 // central directory can also carry extra member-specific data; the     //
00048 // data in the local and global parts can be different.                 //
00049 // The fact that the archive has a global directory makes it efficient  //
00050 // and allows for only the reading of the desired data, one does not    //
00051 // have to scan through the whole file to find the desired sub-file.    //
00052 //                                                                      //
00053 // Once the archive has been opened, the client can query the members   //
00054 // and read their contents by asking the archive for an offset where    //
00055 // the sub-file starts. The members can be accessed in any order.       //
00056 //                                                                      //
00057 //////////////////////////////////////////////////////////////////////////
00058 
00059 #include "TZIPFile.h"
00060 #include "TFile.h"
00061 #include "TObjArray.h"
00062 
00063 
00064 ClassImp(TZIPFile)
00065 
00066 //______________________________________________________________________________
00067 TZIPFile::TZIPFile() : TArchiveFile()
00068 {
00069    // Default ctor.
00070 
00071    fDirPos     = 0;
00072    fDirSize    = 0;
00073    fDirOffset  = 0;
00074 }
00075 
00076 //______________________________________________________________________________
00077 TZIPFile::TZIPFile(const char *archive, const char *member, TFile *file)
00078    : TArchiveFile(archive, member, file)
00079 {
00080    // Specify the archive name and member name. The member can be a decimal
00081    // number which allows to access the n-th member.
00082 
00083    fDirPos     = 0;
00084    fDirSize    = 0;
00085    fDirOffset  = 0;
00086 }
00087 
00088 //______________________________________________________________________________
00089 Int_t TZIPFile::OpenArchive()
00090 {
00091    // Open archive and read end-header and directory. Returns -1 in case
00092    // of error, 0 otherwise.
00093 
00094    if (ReadEndHeader(FindEndHeader()) == -1)
00095       return -1;
00096    return ReadDirectory();
00097 }
00098 
00099 //______________________________________________________________________________
00100 Long64_t TZIPFile::FindEndHeader()
00101 {
00102    // Find the end header of the ZIP archive. Returns 0 in case of error.
00103 
00104    const Int_t kBUFSIZE = 1024;
00105    Long64_t    size = fFile->GetSize();
00106    Long64_t    limit = TMath::Min(size, Long64_t(kMAX_VAR_LEN));
00107    char        buf[kBUFSIZE+4];
00108 
00109    // Note, this works correctly even if the signature straddles read
00110    // boundaries since we always read an overlapped area of four
00111    // bytes on the next read
00112    for (Long64_t offset = 4; offset < limit; ) {
00113       offset = TMath::Min(offset + kBUFSIZE, limit);
00114 
00115       Long64_t pos = size - offset;
00116       Int_t    n = TMath::Min(kBUFSIZE+4, Int_t(offset));
00117 
00118       fFile->Seek(pos);
00119       if (fFile->ReadBuffer(buf, n)) {
00120          Error("FindEndHeader", "error reading %d bytes at %lld", n, pos);
00121          return 0;
00122       }
00123 
00124       for (Int_t i = n - 4; i > 0; i--)
00125          if (buf[i]   == 0x50 && buf[i+1] == 0x4b &&
00126              buf[i+2] == 0x05 && buf[i+3] == 0x06) {
00127             return pos + i;
00128          }
00129    }
00130 
00131    Error("FindEndHeader", "did not find end header in %s", fArchiveName.Data());
00132 
00133    return 0;
00134 }
00135 
00136 //______________________________________________________________________________
00137 Int_t TZIPFile::ReadEndHeader(Long64_t pos)
00138 {
00139    // Read the end header of the ZIP archive including the archive comment
00140    // at the current file position. Check that it really was a single-disk
00141    // archive with all the entries as expected. Most importantly, figure
00142    // out where the central directory begins. Returns -1 in case of error,
00143    // 0 otherwise.
00144 
00145    char buf[kEND_HEADER_SIZE];
00146 
00147    // read and validate first the end header magic
00148    fFile->Seek(pos);
00149    if (fFile->ReadBuffer(buf, kZIP_MAGIC_LEN) ||
00150        Get(buf, kZIP_MAGIC_LEN) != kEND_HEADER_MAGIC) {
00151       Error("ReadEndHeader", "wrong end header magic in %s", fArchiveName.Data());
00152       return -1;
00153    }
00154 
00155    // read rest of the header
00156    if (fFile->ReadBuffer(buf + kZIP_MAGIC_LEN,  kEND_HEADER_SIZE - kZIP_MAGIC_LEN)) {
00157       Error("ReadEndHeader", "error reading %d end header bytes from %s",
00158             kEND_HEADER_SIZE - kZIP_MAGIC_LEN, fArchiveName.Data());
00159       return -1;
00160    }
00161 
00162    UInt_t   disk    = Get(buf + kEND_DISK_OFF,       kEND_DISK_LEN);
00163    UInt_t   dirdisk = Get(buf + kEND_DIR_DISK_OFF,   kEND_DIR_DISK_LEN);
00164    UInt_t   dhdrs   = Get(buf + kEND_DISK_HDRS_OFF,  kEND_DISK_HDRS_LEN);
00165    UInt_t   thdrs   = Get(buf + kEND_TOTAL_HDRS_OFF, kEND_TOTAL_HDRS_LEN);
00166    Long64_t dirsz   = Get(buf + kEND_DIR_SIZE_OFF,   kEND_DIR_SIZE_LEN);
00167    Long64_t diroff  = Get(buf + kEND_DIR_OFFSET_OFF, kEND_DIR_OFFSET_LEN);
00168    Int_t    commlen = Get(buf + kEND_COMMENTLEN_OFF, kEND_COMMENTLEN_LEN);
00169 
00170    if (disk != 0 || dirdisk != 0 || dhdrs != thdrs || diroff + dirsz != pos) {
00171       Error("ReadEndHeader", "inconsistency in end header data in %s",
00172             fArchiveName.Data());
00173       return -1;
00174    }
00175 
00176    char *comment = new char[commlen+1];
00177    if (fFile->ReadBuffer(comment, commlen)) {
00178       Error("ReadEndHeader", "error reading %d end header comment bytes from %s",
00179             commlen, fArchiveName.Data());
00180       delete [] comment;
00181       return -1;
00182    }
00183    comment[commlen] = '\0';
00184 
00185    fComment   = comment;
00186    fDirOffset = fDirPos = diroff;
00187    fDirSize   = dirsz;
00188 
00189    delete [] comment;
00190 
00191    return 0;
00192 }
00193 
00194 //______________________________________________________________________________
00195 Int_t TZIPFile::ReadDirectory()
00196 {
00197    // Read the directory of the ZIP archive. Returns -1 in case of error,
00198    // 0 otherwise.
00199 
00200    char   buf[kDIR_HEADER_SIZE];
00201    UInt_t n, i;
00202 
00203    // read and validate first the header magic
00204    fFile->Seek(fDirPos);
00205    if (fFile->ReadBuffer(buf, kZIP_MAGIC_LEN) ||
00206        (n = Get(buf, kZIP_MAGIC_LEN)) != kDIR_HEADER_MAGIC) {
00207       Error("ReadDirectory", "wrong directory header magic in %s",
00208             fArchiveName.Data());
00209       return -1;
00210    }
00211 
00212    // now read the full directory
00213    for (i = 0; n == kDIR_HEADER_MAGIC; i++) {
00214       // read the rest of the header
00215       if (fFile->ReadBuffer(buf + kZIP_MAGIC_LEN, kDIR_HEADER_SIZE - kZIP_MAGIC_LEN)) {
00216          Error("ReadDirectory", "error reading %d directory bytes from %s",
00217                kDIR_HEADER_SIZE - kZIP_MAGIC_LEN, fArchiveName.Data());
00218          return -1;
00219       }
00220 
00221       UInt_t   version = Get(buf + kDIR_VREQD_OFF,      kDIR_VREQD_LEN);
00222       UInt_t   flags   = Get(buf + kDIR_FLAG_OFF,       kDIR_FLAG_LEN);
00223       UInt_t   method  = Get(buf + kDIR_METHOD_OFF,     kDIR_METHOD_LEN);
00224       UInt_t   time    = Get(buf + kDIR_DATE_OFF,       kDIR_DATE_LEN);
00225       UInt_t   crc32   = Get(buf + kDIR_CRC32_OFF,      kDIR_CRC32_LEN);
00226       Long64_t csize   = Get(buf + kDIR_CSIZE_OFF,      kDIR_CSIZE_LEN);
00227       Long64_t usize   = Get(buf + kDIR_USIZE_OFF,      kDIR_USIZE_LEN);
00228       Int_t    namelen = Get(buf + kDIR_NAMELEN_OFF,    kDIR_NAMELEN_LEN);
00229       Int_t    extlen  = Get(buf + kDIR_EXTRALEN_OFF,   kDIR_EXTRALEN_LEN);
00230       Int_t    commlen = Get(buf + kDIR_COMMENTLEN_OFF, kDIR_COMMENTLEN_LEN);
00231       UInt_t   disk    = Get(buf + kDIR_DISK_START_OFF, kDIR_DISK_START_LEN);
00232       UInt_t   iattr   = Get(buf + kDIR_INT_ATTR_OFF,   kDIR_INT_ATTR_LEN);
00233       UInt_t   xattr   = Get(buf + kDIR_EXT_ATTR_OFF,   kDIR_EXT_ATTR_LEN);
00234       Long64_t offset  = Get(buf + kDIR_ENTRY_POS_OFF,  kDIR_ENTRY_POS_LEN);
00235 
00236       // check value sanity and the variable-length fields
00237       if (Get(buf + kDIR_MAGIC_OFF, kZIP_MAGIC_LEN) != kDIR_HEADER_MAGIC ||
00238           version > kARCHIVE_VERSION ||
00239           flags & 8 ||
00240           (method != kSTORED && method != kDEFLATED) ||
00241           disk != 0 ||
00242           csize < 0 ||
00243           usize < 0 ||
00244           csize > kMaxUInt ||
00245           usize > kMaxUInt) {
00246          Error("ReadDirectory", "inconsistency in directory data in %s",
00247                fArchiveName.Data());
00248          return -1;
00249       }
00250 
00251       char *name    = new char[namelen+1];
00252       char *extra   = new char[extlen+1];
00253       char *comment = new char[commlen+1];
00254       if (fFile->ReadBuffer(name, namelen) ||
00255           fFile->ReadBuffer(extra, extlen) ||
00256           fFile->ReadBuffer(comment, commlen)) {
00257          Error("ReadDirectory", "error reading additional directory data from %s",
00258                fArchiveName.Data());
00259          delete [] name;
00260          delete [] extra;
00261          delete [] comment;
00262          return -1;
00263       }
00264       name[namelen]    = '\0';
00265       extra[extlen]    = '\0';
00266       comment[commlen] = '\0';
00267 
00268       // create a new archive member and store the fields
00269       TZIPMember *m = new TZIPMember(name);
00270       fMembers->Add(m);
00271 
00272       m->fMethod = method;
00273       m->fLevel  = method == kSTORED ? 0
00274                                      : (flags & 6)/2 == 0 ? 3  // default (:N)
00275                                      : (flags & 6)/2 == 1 ? 9  // best (:X)
00276                                      : (flags & 6)/2 == 2 ? 2  // fast (:F)
00277                                      : (flags & 6)/2 == 3 ? 1  // fastest (:F)
00278                                      : 3;                      // unreached
00279       m->fCsize     = csize;
00280       m->fDsize     = usize;
00281       m->fCRC32     = crc32;
00282       m->fModTime.Set(time, kTRUE);   // DOS date/time format
00283       m->fGlobalLen = extlen;
00284       m->fGlobal    = extra;
00285       m->fComment   = comment;
00286       m->fAttrInt   = iattr;
00287       m->fAttrExt   = xattr;
00288       m->fPosition  = offset;
00289 
00290       delete [] name;
00291       delete [] comment;
00292       // extra is adopted be the TZIPMember
00293 
00294       if (gDebug)
00295          Info("ReadDirectory", "%lld  %lld  %s  %s",
00296               m->GetDecompressedSize(), m->GetCompressedSize(),
00297               m->GetModTime().AsSQLString(), m->GetName());
00298 
00299       // done, read the next magic
00300       if (fFile->ReadBuffer(buf, kZIP_MAGIC_LEN)) {
00301          Error("ReadDirectory", "error reading %d directory bytes from %s",
00302                kZIP_MAGIC_LEN, fArchiveName.Data());
00303          return -1;
00304       }
00305       n = Get(buf, kZIP_MAGIC_LEN);
00306    }
00307 
00308    // should now see end of archive
00309    if (n != kEND_HEADER_MAGIC) {
00310       Error("ReadDirectory", "wrong end header magic in %s", fArchiveName.Data());
00311       return -1;
00312    }
00313 
00314    return 0;
00315 }
00316 
00317 //______________________________________________________________________________
00318 Int_t TZIPFile::ReadMemberHeader(TZIPMember *member)
00319 {
00320    // Read the member header of the ZIP archive. Sets the position where
00321    // the data starts in the member object. Returns -1 in case of error,
00322    // 0 otherwise.
00323 
00324    // read file header to find start of data, since extra len might be
00325    // different we cannot take it from the directory data
00326    char buf[kENTRY_HEADER_SIZE];
00327 
00328    // read and validate first the entry header magic
00329    fFile->Seek(member->fPosition);
00330    if (fFile->ReadBuffer(buf, kZIP_MAGIC_LEN) ||
00331        Get(buf, kZIP_MAGIC_LEN) != kENTRY_HEADER_MAGIC) {
00332       Error("ReadMemberHeader", "wrong entry header magic in %s",
00333             fArchiveName.Data());
00334       return -1;
00335    }
00336 
00337    // read rest of the header
00338    if (fFile->ReadBuffer(buf + kZIP_MAGIC_LEN,  kENTRY_HEADER_SIZE - kZIP_MAGIC_LEN)) {
00339       Error("ReadMemberHeader", "error reading %d member header bytes from %s",
00340             kENTRY_HEADER_SIZE - kZIP_MAGIC_LEN, fArchiveName.Data());
00341       return -1;
00342    }
00343    Int_t namelen = Get(buf + kENTRY_NAMELEN_OFF,  kENTRY_NAMELEN_LEN);
00344    Int_t extlen  = Get(buf + kENTRY_EXTRALEN_OFF, kENTRY_EXTRALEN_LEN);
00345 
00346    member->fFilePosition = member->fPosition + kENTRY_HEADER_SIZE +
00347                            namelen + extlen;
00348 
00349    return 0;
00350 }
00351 
00352 //______________________________________________________________________________
00353 Int_t TZIPFile::SetCurrentMember()
00354 {
00355    // Find the desired member in the member array and make it the
00356    // current member. Returns -1 in case member is not found, 0 otherwise.
00357 
00358    fCurMember = 0;
00359 
00360    if (fMemberIndex > -1) {
00361       fCurMember = (TZIPMember *) fMembers->At(fMemberIndex);
00362       if (!fCurMember)
00363          return -1;
00364       fMemberName = fCurMember->GetName();
00365    } else {
00366       for (int i = 0; i < fMembers->GetEntriesFast(); i++) {
00367          TZIPMember *m = (TZIPMember *) fMembers->At(i);
00368          if (fMemberName == m->fName) {
00369             fCurMember   = m;
00370             fMemberIndex = i;
00371             break;
00372          }
00373       }
00374       if (!fCurMember)
00375          return -1;
00376    }
00377 
00378    return ReadMemberHeader((TZIPMember *)fCurMember);
00379 }
00380 
00381 //______________________________________________________________________________
00382 UInt_t TZIPFile::Get(const void *buffer, Int_t bytes)
00383 {
00384    // Read a "bytes" long little-endian integer value from "buffer".
00385 
00386    UInt_t value = 0;
00387 #ifdef R__BYTESWAP
00388    memcpy(&value, buffer, bytes);
00389 #else
00390    const UChar_t *buf = static_cast<const unsigned char *>(buffer);
00391    for (UInt_t shift = 0; bytes; shift += 8, --bytes, ++buf)
00392       value += *buf << shift;
00393 #endif
00394    return value;
00395 }
00396 
00397 //______________________________________________________________________________
00398 void TZIPFile::Print(Option_t *) const
00399 {
00400    // Pretty print ZIP archive members.
00401 
00402    if (fMembers)
00403       fMembers->Print();
00404 }
00405 
00406 
00407 ClassImp(TZIPMember)
00408 
00409 //______________________________________________________________________________
00410 TZIPMember::TZIPMember()
00411 {
00412    // Default ctor.
00413 
00414    fLocal     = 0;
00415    fLocalLen  = 0;
00416    fGlobal    = 0;
00417    fGlobalLen = 0;
00418    fCRC32     = 0;
00419    fAttrInt   = 0;
00420    fAttrExt   = 0;
00421    fMethod    = 0;
00422    fLevel     = 0;
00423 }
00424 
00425 //______________________________________________________________________________
00426 TZIPMember::TZIPMember(const char *name)
00427    : TArchiveMember(name)
00428 {
00429    // Create ZIP member file.
00430 
00431    fLocal     = 0;
00432    fLocalLen  = 0;
00433    fGlobal    = 0;
00434    fGlobalLen = 0;
00435    fCRC32     = 0;
00436    fAttrInt   = 0;
00437    fAttrExt   = 0;
00438    fMethod    = 0;
00439    fLevel     = 0;
00440 }
00441 
00442 //______________________________________________________________________________
00443 TZIPMember::TZIPMember(const TZIPMember &member)
00444    : TArchiveMember(member)
00445 {
00446    // Copy ctor.
00447 
00448    fLocal     = 0;
00449    fLocalLen  = member.fLocalLen;
00450    fGlobal    = 0;
00451    fGlobalLen = member.fGlobalLen;
00452    fCRC32     = member.fCRC32;
00453    fAttrInt   = member.fAttrInt;
00454    fAttrExt   = member.fAttrExt;
00455    fMethod    = member.fMethod;
00456    fLevel     = member.fLevel;
00457 
00458    if (member.fLocal) {
00459       fLocal = new char [fLocalLen];
00460       memcpy(fLocal, member.fLocal, fLocalLen);
00461    }
00462    if (member.fGlobal) {
00463       fGlobal = new char [fGlobalLen];
00464       memcpy(fGlobal, member.fGlobal, fGlobalLen);
00465    }
00466 }
00467 
00468 //______________________________________________________________________________
00469 TZIPMember &TZIPMember::operator=(const TZIPMember &rhs)
00470 {
00471    // Assignment operator.
00472 
00473    if (this != &rhs) {
00474       TArchiveMember::operator=(rhs);
00475 
00476       delete [] (char*) fLocal;
00477       delete [] (char*) fGlobal;
00478 
00479       fLocal     = 0;
00480       fLocalLen  = rhs.fLocalLen;
00481       fGlobal    = 0;
00482       fGlobalLen = rhs.fGlobalLen;
00483       fCRC32     = rhs.fCRC32;
00484       fAttrInt   = rhs.fAttrInt;
00485       fAttrExt   = rhs.fAttrExt;
00486       fMethod    = rhs.fMethod;
00487       fLevel     = rhs.fLevel;
00488 
00489       if (rhs.fLocal) {
00490          fLocal = new char [fLocalLen];
00491          memcpy(fLocal, rhs.fLocal, fLocalLen);
00492       }
00493       if (rhs.fGlobal) {
00494          fGlobal = new char [fGlobalLen];
00495          memcpy(fGlobal, rhs.fGlobal, fGlobalLen);
00496       }
00497    }
00498    return *this;
00499 }
00500 
00501 //______________________________________________________________________________
00502 TZIPMember::~TZIPMember()
00503 {
00504    // Cleanup.
00505 
00506    delete [] (char*) fLocal;
00507    delete [] (char*) fGlobal;
00508 }
00509 
00510 //______________________________________________________________________________
00511 void TZIPMember::Print(Option_t *) const
00512 {
00513    // Pretty print basic ZIP member info.
00514 
00515    //printf("%-20lld %s   %s\n", fDsize, fModTime.AsSQLString(), fName.Data());
00516    // above statement does not work with VC++7.1, spurious (null)
00517    printf("%-20lld", fDsize);
00518    printf(" %s   %s\n", fModTime.AsSQLString(), fName.Data());
00519 }

Generated on Tue Jul 5 14:30:21 2011 for ROOT_528-00b_version by  doxygen 1.5.1