ZDeflate.h

Go to the documentation of this file.
00001 /* @(#)root/zip:$Id: ZDeflate.h 20882 2007-11-19 11:31:26Z rdm $ */
00002 /* Author: */
00003 /*
00004 
00005  Copyright (C) 1990-1993 Mark Adler, Richard B. Wales, Jean-loup Gailly,
00006  Kai Uwe Rommel and Igor Mandrichenko.
00007  For conditions of distribution and use, see copyright notice in zlib.h
00008 
00009 */
00010 
00011 /*
00012  *  deflate.c by Jean-loup Gailly.
00013  *
00014  *  PURPOSE
00015  *
00016  *      Identify new text as repetitions of old text within a fixed-
00017  *      length sliding window trailing behind the new text.
00018  *
00019  *  DISCUSSION
00020  *
00021  *      The "deflation" process depends on being able to identify portions
00022  *      of the input text which are identical to earlier input (within a
00023  *      sliding window trailing behind the input currently being processed).
00024  *
00025  *      The most straightforward technique turns out to be the fastest for
00026  *      most input files: try all possible matches and select the longest.
00027  *      The key feature of this algorithm is that insertions into the string
00028  *      dictionary are very simple and thus fast, and deletions are avoided
00029  *      completely. Insertions are performed at each input character, whereas
00030  *      string matches are performed only when the previous match ends. So it
00031  *      is preferable to spend more time in matches to allow very fast string
00032  *      insertions and avoid deletions. The matching algorithm for small
00033  *      strings is inspired from that of Rabin & Karp. A brute force approach
00034  *      is used to find longer strings when a small match has been found.
00035  *      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
00036  *      (by Leonid Broukhis).
00037  *         A previous version of this file used a more sophisticated algorithm
00038  *      (by Fiala and Greene) which is guaranteed to run in linear amortized
00039  *      time, but has a larger average cost, uses more memory and is patented.
00040  *      However the F&G algorithm may be faster for some highly redundant
00041  *      files if the parameter max_chain_length (described below) is too large.
00042  *
00043  *  ACKNOWLEDGEMENTS
00044  *
00045  *      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
00046  *      I found it in 'freeze' written by Leonid Broukhis.
00047  *      Thanks to many info-zippers for bug reports and testing.
00048  *
00049  *  REFERENCES
00050  *
00051  *      APPNOTE.TXT documentation file in PKZIP 1.93a distribution.
00052  *
00053  *      A description of the Rabin and Karp algorithm is given in the book
00054  *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
00055  *
00056  *      Fiala,E.R., and Greene,D.H.
00057  *         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
00058  *
00059  *  INTERFACE
00060  *
00061  *      void lm_init (int pack_level, ush *flags)
00062  *          Initialize the "longest match" routines for a new file
00063  *
00064  *      ulg deflate (void)
00065  *          Processes a new input file and return its compressed length. Sets
00066  *          the compressed length, crc, deflate flags and internal file
00067  *          attributes.
00068  */
00069 
00070 /* #include "zip.h" */
00071 /* #include "ZIP.h" */
00072 
00073 /* ===========================================================================
00074  * Configuration parameters
00075  */
00076 
00077 /* Compile with MEDIUM_MEM to reduce the memory requirements or
00078  * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
00079  * entire input file can be held in memory (not possible on 16 bit systems).
00080  * Warning: defining these symbols affects HASH_BITS (see below) and thus
00081  * affects the compression ratio. The compressed output
00082  * is still correct, and might even be smaller in some cases.
00083  */
00084 
00085 #ifdef SMALL_MEM
00086 #   define HASH_BITS  13  /* Number of bits used to hash strings */
00087 #endif
00088 #ifdef MEDIUM_MEM
00089 #   define HASH_BITS  14
00090 #endif
00091 #ifndef HASH_BITS
00092 #   define HASH_BITS  15
00093    /* For portability to 16 bit machines, do not use values above 15. */
00094 #endif
00095 
00096 #define HASH_SIZE (unsigned)(1<<HASH_BITS)
00097 #define HASH_MASK (HASH_SIZE-1)
00098 #define WMASK     (WSIZE-1)
00099 /* HASH_SIZE and WSIZE must be powers of two */
00100 
00101 #define NIL 0
00102 /* Tail of hash chains */
00103 
00104 #define FAST 4
00105 #define SLOW 2
00106 /* speed options for the general purpose bit flag */
00107 
00108 #ifndef TOO_FAR
00109 #  define TOO_FAR 4096
00110 #endif
00111 /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
00112 
00113 #ifdef ATARI_ST
00114 #  undef MSDOS /* avoid the processor specific parts */
00115    /* (but the Atari should never define MSDOS anyway ...) */
00116 #endif
00117 #if defined(MSDOS) && !defined(NO_ASM) && !defined(ASMV) && !defined(WIN32)
00118 #  define ASMV
00119 #endif
00120 #if defined(ASMV) && !defined(MSDOS) && defined(DYN_ALLOC)
00121   error: DYN_ALLOC not yet supported in match.s
00122 #endif
00123 #if defined(MSDOS) && !defined(__32BIT__)
00124 #  define MAXSEG_64K
00125 #endif
00126 
00127 /* ===========================================================================
00128  * Local data used by the "longest match" routines.
00129  */
00130 
00131 #if defined(BIG_MEM) || defined(MMAP)
00132   typedef unsigned Pos; /* must be at least 32 bits */
00133 #else
00134   typedef ush Pos;
00135 #endif
00136 typedef unsigned IPos;
00137 /* A Pos is an index in the character window. We use short instead of int to
00138  * save space in the various tables. IPos is used only for parameter passing.
00139  */
00140 
00141 #ifndef DYN_ALLOC
00142   uch    R__window[2L*WSIZE];
00143   /* Sliding window. Input bytes are read into the second half of the window,
00144    * and move to the first half later to keep a dictionary of at least WSIZE
00145    * bytes. With this organization, matches are limited to a distance of
00146    * WSIZE-MAX_MATCH bytes, but this ensures that IO is always
00147    * performed with a length multiple of the block size. Also, it limits
00148    * the window size to 64K, which is quite useful on MSDOS.
00149    * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would
00150    * be less efficient since the data would have to be copied WSIZE/BSZ times)
00151    */
00152   Pos    R__prev[WSIZE];
00153   /* Link to older string with same hash index. To limit the size of this
00154    * array to 64K, this link is maintained only for the last 32K strings.
00155    * An index in this array is thus a window index modulo 32K.
00156    */
00157   Pos    R__head[HASH_SIZE];
00158   /* Heads of the hash chains or NIL. If your compiler thinks that
00159    * HASH_SIZE is a dynamic value, recompile with -DDYN_ALLOC.
00160    */
00161 #else
00162   uch    * near R__window = NULL;
00163   Pos    * near R__prev   = NULL;
00164   Pos    * near R__head;
00165 #endif
00166 ulg R__window_size;
00167 /* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
00168  * input file length plus MIN_LOOKAHEAD.
00169  */
00170 
00171 long R__block_start;
00172 /* window position at the beginning of the current output block. Gets
00173  * negative when the window is moved backwards.
00174  */
00175 
00176 local int sliding;
00177 /* Set to false when the input file is already in memory */
00178 
00179 local unsigned ins_h;  /* hash index of string to be inserted */
00180 
00181 #define H_SHIFT  ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
00182 /* Number of bits by which ins_h and del_h must be shifted at each
00183  * input step. It must be such that after MIN_MATCH steps, the oldest
00184  * byte no longer takes part in the hash key, that is:
00185  *   H_SHIFT * MIN_MATCH >= HASH_BITS
00186  */
00187 
00188 unsigned int near R__prev_length;
00189 /* Length of the best match at previous step. Matches not greater than this
00190  * are discarded. This is used in the lazy match evaluation.
00191  */
00192 
00193       unsigned near R__strstart;      /* start of string to insert */
00194       unsigned near R__match_start;   /* start of matching string */
00195 local int           eofile;           /* flag set at end of input file */
00196 local unsigned      lookahead;        /* number of valid bytes ahead in window */
00197 
00198 unsigned near R__max_chain_length;
00199 /* To speed up deflation, hash chains are never searched beyond this length.
00200  * A higher limit improves compression ratio but degrades the speed.
00201  */
00202 
00203 local unsigned int max_lazy_match;
00204 /* Attempt to find a better match only when the current match is strictly
00205  * smaller than this value. This mechanism is used only for compression
00206  * levels >= 4.
00207  */
00208 #define max_insert_length  max_lazy_match
00209 /* Insert new strings in the hash table only if the match length
00210  * is not greater than this length. This saves time but degrades compression.
00211  * max_insert_length is used only for compression levels <= 3.
00212  */
00213 
00214 unsigned near R__good_match;
00215 /* Use a faster search when the previous match is longer than this */
00216 
00217 
00218 /* Values for max_lazy_match, good_match and max_chain_length, depending on
00219  * the desired pack level (0..9). The values given below have been tuned to
00220  * exclude worst case performance for pathological files. Better values may be
00221  * found for specific files.
00222  */
00223 
00224 typedef struct config {
00225    ush good_length; /* reduce lazy search above this match length */
00226    ush max_lazy;    /* do not perform lazy search above this match length */
00227    ush nice_length; /* quit search above this match length */
00228    ush max_chain;
00229 } config;
00230 
00231 #ifdef  FULL_SEARCH
00232 # define R__nice_match MAX_MATCH
00233 #else
00234   int near R__nice_match; /* Stop searching when current match exceeds this */
00235 #endif
00236 
00237 local config configuration_table[10] = {
00238 /*      good lazy nice chain */
00239 /* 0 */ {0,    0,  0,    0},  /* store only */
00240 /* 1 */ {4,    4,  8,    4},  /* maximum speed, no lazy matches */
00241 /* 2 */ {4,    5, 16,    8},
00242 /* 3 */ {4,    6, 32,   32},
00243 
00244 /* 4 */ {4,    4, 16,   16},  /* lazy matches */
00245 /* 5 */ {8,   16, 32,   32},
00246 /* 6 */ {8,   16, 128, 128},
00247 /* 7 */ {8,   32, 128, 256},
00248 /* 8 */ {32, 128, 258, 1024},
00249 /* 9 */ {32, 258, 258, 4096}}; /* maximum compression */
00250 
00251 /* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
00252  * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
00253  * meaning.
00254  */
00255 
00256 #define EQUAL 0
00257 /* result of memcmp for equal strings */
00258 
00259 /* ===========================================================================
00260  *  Prototypes for local functions.
00261  */
00262 
00263 local void R__fill_window    OF((void));
00264 local ulg  R__Deflate_fast   OF((void));
00265 
00266       int  R__longest_match  OF((IPos cur_match));
00267 #ifdef ASMV
00268       void match_init OF((void)); /* asm code initialization */
00269 #endif
00270 
00271 #ifdef DEBUG
00272 local  void check_match OF((IPos start, IPos match, int length));
00273 #endif
00274 
00275 /* ===========================================================================
00276  * Update a hash value with the given input byte
00277  * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
00278  *    input characters, so that a running hash key can be computed from the
00279  *    previous key instead of complete recalculation each time.
00280  */
00281 #define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)
00282 
00283 /* ===========================================================================
00284  * Insert string s in the dictionary and set match_head to the previous head
00285  * of the hash chain (the most recent string with same hash key). Return
00286  * the previous length of the hash chain.
00287  * IN  assertion: all calls to to INSERT_STRING are made with consecutive
00288  *    input characters and the first MIN_MATCH bytes of s are valid
00289  *    (except for the last MIN_MATCH-1 bytes of the input file).
00290  */
00291 #define INSERT_STRING(s, match_head) \
00292    (UPDATE_HASH(ins_h, R__window[(s) + MIN_MATCH-1]), \
00293     R__prev[(s) & WMASK] = match_head = R__head[ins_h], \
00294     R__head[ins_h] = (s))
00295 
00296 /* ===========================================================================
00297  * Initialize the "longest match" routines for a new file
00298  *
00299  * IN assertion: window_size is > 0 if the input file is already read or
00300  *    mmap'ed in the window[] array, 0 otherwise. In the first case,
00301  *    window_size is sufficient to contain the whole input file plus
00302  *    MIN_LOOKAHEAD bytes (to avoid referencing memory beyond the end
00303  *    of window[] when looking for matches towards the end).
00304  */
00305 void R__lm_init (int pack_level, ush *flags)
00306     /* int pack_level;  0: store, 1: best speed, 9: best compression */
00307     /* ush *flags;      general purpose bit flag */
00308 {
00309     register unsigned j;
00310 
00311     if (pack_level < 1 || pack_level > 9) R__error("bad pack level");
00312 
00313     /* Do not slide the window if the whole input is already in memory
00314      * (window_size > 0)
00315      */
00316     sliding = 0;
00317     if (R__window_size == 0L) {
00318         sliding = 1;
00319         R__window_size = (ulg)2L*WSIZE;
00320     }
00321 
00322     /* Use dynamic allocation if compiler does not like big static arrays: */
00323 #ifdef DYN_ALLOC
00324     if (R__window == NULL) {
00325         R__window = (uch*) fcalloc(WSIZE,   2*sizeof(uch));
00326         if (R__window == NULL) R__error("window allocation");
00327     }
00328     if (R__prev == NULL) {
00329         R__prev   = (Pos*) fcalloc(WSIZE,     sizeof(Pos));
00330         R__head   = (Pos*) fcalloc(HASH_SIZE, sizeof(Pos));
00331         if (R__prev == NULL || R__head == NULL) {
00332             R__error("hash table allocation");
00333         }
00334     }
00335 #endif /* DYN_ALLOC */
00336 
00337     /* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
00338      * prev[] will be initialized on the fly.
00339      */
00340     R__head[HASH_SIZE-1] = NIL;
00341     memset((char*)R__head, NIL, (unsigned)(HASH_SIZE-1)*sizeof(*R__head));
00342 
00343     /* Set the default configuration parameters:
00344      */
00345     max_lazy_match   = configuration_table[pack_level].max_lazy;
00346     R__good_match    = configuration_table[pack_level].good_length;
00347 #ifndef FULL_SEARCH
00348     R__nice_match    = configuration_table[pack_level].nice_length;
00349 #endif
00350     R__max_chain_length = configuration_table[pack_level].max_chain;
00351     if (pack_level == 1) {
00352        *flags |= FAST;
00353     } else if (pack_level == 9) {
00354        *flags |= SLOW;
00355     }
00356     /* ??? reduce max_chain_length for binary files */
00357 
00358     R__strstart = 0;
00359     R__block_start = 0L;
00360 #ifdef ASMV
00361     match_init(); /* initialize the asm code */
00362 #endif
00363 
00364     j = WSIZE;
00365 #ifndef MAXSEG_64K
00366     if (sizeof(int) > 2) j <<= 1; /* Can read 64K in one step */
00367 #endif
00368     lookahead = (*R__read_buf)((char*)R__window, j);
00369 
00370     if (lookahead == 0 || lookahead == (unsigned)EOF) {
00371        eofile = 1, lookahead = 0;
00372        return;
00373     }
00374     eofile = 0;
00375     /* Make sure that we always have enough lookahead. This is important
00376      * if input comes from a device such as a tty.
00377      */
00378     while (lookahead < MIN_LOOKAHEAD && !eofile) R__fill_window();
00379 
00380     ins_h = 0;
00381     for (j=0; j<MIN_MATCH-1; j++) UPDATE_HASH(ins_h, R__window[j]);
00382     /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
00383      * not important since only literal bytes will be emitted.
00384      */
00385 }
00386 
00387 /* ===========================================================================
00388  * Free the window and hash table
00389  */
00390 void R__lm_free()
00391 {
00392 #ifdef DYN_ALLOC
00393     if (R__window != NULL) {
00394         fcfree(R__window);
00395         R__window = NULL;
00396     }
00397     if (R__prev != NULL) {
00398         fcfree(R__prev);
00399         fcfree(R__head);
00400         R__prev = R__head = NULL;
00401     }
00402 #endif /* DYN_ALLOC */
00403 }
00404 
00405 /* ===========================================================================
00406  * Set match_start to the longest match starting at the given string and
00407  * return its length. Matches shorter or equal to prev_length are discarded,
00408  * in which case the result is equal to prev_length and match_start is
00409  * garbage.
00410  * IN assertions: cur_match is the head of the hash chain for the current
00411  *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
00412  */
00413 #ifndef ASMV
00414 /* For MSDOS, OS/2 and 386 Unix, an optimized version is in match.asm or
00415  * match.s. The code is functionally equivalent, so you can use the C version
00416  * if desired.  A 68000 version is in amiga/match_68.a -- this could be used
00417  * with other 68000 based systems such as Macintosh with a little effort.
00418  */
00419 int R__longest_match(IPos cur_match)
00420     /* IPos cur_match; */                       /* current match */
00421 {
00422     unsigned chain_length = R__max_chain_length;   /* max hash chain length */
00423     register uch *scan = R__window + R__strstart;     /* current string */
00424     register uch *match;                        /* matched string */
00425     register int len;                           /* length of current match */
00426     int best_len = R__prev_length;              /* best match length so far */
00427     IPos limit = R__strstart > (IPos)MAX_DIST ? R__strstart - (IPos)MAX_DIST : NIL;
00428     /* Stop when cur_match becomes <= limit. To simplify the code,
00429      * we prevent matches with the string of window index 0.
00430      */
00431 
00432 /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
00433  * It is easy to get rid of this optimization if necessary.
00434  */
00435 #if HASH_BITS < 8 || MAX_MATCH != 258
00436    error: Code too clever
00437 #endif
00438 
00439 #ifdef UNALIGNED_OK
00440     /* Compare two bytes at a time. Note: this is not always beneficial.
00441      * Try with and without -DUNALIGNED_OK to check.
00442      */
00443     register uch *strend = R__window + R__strstart + MAX_MATCH - 1;
00444     register ush scan_start = *(ush*)scan;
00445     register ush scan_end   = *(ush*)(scan+best_len-1);
00446 #else
00447     register uch *strend = R__window + R__strstart + MAX_MATCH;
00448     register uch scan_end1  = scan[best_len-1];
00449     register uch scan_end   = scan[best_len];
00450 #endif
00451 
00452     /* Do not waste too much time if we already have a good match: */
00453     if (R__prev_length >= R__good_match) {
00454         chain_length >>= 2;
00455     }
00456     Assert(R__strstart <= R__window_size-MIN_LOOKAHEAD, "insufficient lookahead");
00457 
00458     do {
00459         Assert(cur_match < R__strstart, "no future");
00460         match = R__window + cur_match;
00461 
00462         /* Skip to next match if the match length cannot increase
00463          * or if the match length is less than 2:
00464          */
00465 #if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
00466         /* This code assumes sizeof(unsigned short) == 2. Do not use
00467          * UNALIGNED_OK if your compiler uses a different size.
00468          */
00469         if (*(ush*)(match+best_len-1) != scan_end ||
00470             *(ush*)match != scan_start) continue;
00471 
00472         /* It is not necessary to compare scan[2] and match[2] since they are
00473          * always equal when the other bytes match, given that the hash keys
00474          * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
00475          * strstart+3, +5, ... up to strstart+257. We check for insufficient
00476          * lookahead only every 4th comparison; the 128th check will be made
00477          * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
00478          * necessary to put more guard bytes at the end of the window, or
00479          * to check more often for insufficient lookahead.
00480          */
00481         scan++, match++;
00482         do {
00483         } while (*(ush*)(scan+=2) == *(ush*)(match+=2) &&
00484                  *(ush*)(scan+=2) == *(ush*)(match+=2) &&
00485                  *(ush*)(scan+=2) == *(ush*)(match+=2) &&
00486                  *(ush*)(scan+=2) == *(ush*)(match+=2) &&
00487                  scan < strend);
00488         /* The funny "do {}" generates better code on most compilers */
00489 
00490         /* Here, scan <= window+R__strstart+257 */
00491         Assert(scan <= R__window+(unsigned)(R__window_size-1), "wild scan");
00492         if (*scan == *match) scan++;
00493 
00494         len = (MAX_MATCH - 1) - (int)(strend-scan);
00495         scan = strend - (MAX_MATCH-1);
00496 
00497 #else /* UNALIGNED_OK */
00498 
00499         if (match[best_len]   != scan_end  ||
00500             match[best_len-1] != scan_end1 ||
00501             *match            != *scan     ||
00502             *++match          != scan[1])      continue;
00503 
00504         /* The check at best_len-1 can be removed because it will be made
00505          * again later. (This heuristic is not always a win.)
00506          * It is not necessary to compare scan[2] and match[2] since they
00507          * are always equal when the other bytes match, given that
00508          * the hash keys are equal and that HASH_BITS >= 8.
00509          */
00510         scan += 2, match++;
00511 
00512         /* We check for insufficient lookahead only every 8th comparison;
00513          * the 256th check will be made at strstart+258.
00514          */
00515         do {
00516         } while (*++scan == *++match && *++scan == *++match &&
00517                  *++scan == *++match && *++scan == *++match &&
00518                  *++scan == *++match && *++scan == *++match &&
00519                  *++scan == *++match && *++scan == *++match &&
00520                  scan < strend);
00521 
00522         len = MAX_MATCH - (int)(strend - scan);
00523         scan = strend - MAX_MATCH;
00524 
00525 #endif /* UNALIGNED_OK */
00526 
00527         if (len > best_len) {
00528             R__match_start = cur_match;
00529             best_len = len;
00530             if (len >= R__nice_match) break;
00531 #ifdef UNALIGNED_OK
00532             scan_end = *(ush*)(scan+best_len-1);
00533 #else
00534             scan_end1  = scan[best_len-1];
00535             scan_end   = scan[best_len];
00536 #endif
00537         }
00538     } while ((cur_match = R__prev[cur_match & WMASK]) > limit
00539              && --chain_length != 0);
00540 
00541     return best_len;
00542 }
00543 #endif /* ASMV */
00544 
00545 #ifdef DEBUG
00546 /* ===========================================================================
00547  * Check that the match at match_start is indeed a match.
00548  */
00549 local void check_match(IPos start, IPos match, int length)
00550 {
00551     /* check that the match is indeed a match */
00552     if (memcmp((char*)R__window + match,
00553                 (char*)R__window + start, length) != EQUAL) {
00554         fprintf(stderr,
00555             " start %d, match %d, length %d\n",
00556             start, match, length);
00557         R__error("invalid match");
00558     }
00559     if (verbose > 1) {
00560         fprintf(stderr,"\\[%d,%d]", start-match, length);
00561         do { putc(R__window[start++], stderr); } while (--length != 0);
00562     }
00563 }
00564 #else
00565 #  define check_match(start, match, length)
00566 #endif
00567 
00568 /* ===========================================================================
00569  * Fill the window when the lookahead becomes insufficient.
00570  * Updates strstart and lookahead, and sets eofile if end of input file.
00571  *
00572  * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
00573  * OUT assertions: at least one byte has been read, or eofile is set;
00574  *    file reads are performed for at least two bytes (required for the
00575  *    translate_eol option).
00576  */
00577 local void R__fill_window()
00578 {
00579     register unsigned n, m;
00580     unsigned more = (unsigned)(R__window_size - (ulg)lookahead - (ulg)R__strstart);
00581     /* Amount of free space at the end of the window. */
00582 
00583     /* If the window is almost full and there is insufficient lookahead,
00584      * move the upper half to the lower one to make room in the upper half.
00585      */
00586     if (more == (unsigned)EOF) {
00587         /* Very unlikely, but possible on 16 bit machine if strstart == 0
00588          * and lookahead == 1 (input done one byte at time)
00589          */
00590         more--;
00591 
00592     /* For MMAP or BIG_MEM, the whole input file is already in memory
00593      * so we must not perform sliding. We must however call file_read
00594      * in order to compute the crc, update lookahead and possibly set eofile.
00595      */
00596     } else if (R__strstart >= WSIZE+MAX_DIST && sliding) {
00597 
00598         /* By the IN assertion, the window is not empty so we can't confuse
00599          * more == 0 with more == 64K on a 16 bit machine.
00600          */
00601         memcpy((char*)R__window, (char*)R__window+WSIZE, (unsigned)WSIZE);
00602         R__match_start -= WSIZE;
00603         R__strstart    -= WSIZE; /* we now have strstart >= MAX_DIST: */
00604 
00605         R__block_start -= (long) WSIZE;
00606 
00607         for (n = 0; n < HASH_SIZE; n++) {
00608             m = R__head[n];
00609             R__head[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);
00610         }
00611         for (n = 0; n < WSIZE; n++) {
00612             m = R__prev[n];
00613             R__prev[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);
00614             /* If n is not on any hash chain, prev[n] is garbage but
00615              * its value will never be used.
00616              */
00617         }
00618         more += WSIZE;
00619         if (verbose) putc('.', stderr);
00620     }
00621     /* At this point, more >= 2 */
00622     if (!eofile) {
00623         n = (*R__read_buf)((char*)R__window+R__strstart+lookahead, more);
00624         if (n == 0 || n == (unsigned)EOF) {
00625             eofile = 1;
00626         } else {
00627             lookahead += n;
00628         }
00629     }
00630 }
00631 
00632 /* ===========================================================================
00633  * Flush the current block, with given end-of-file flag.
00634  * IN assertion: strstart is set to the end of the current match.
00635  */
00636 #define FLUSH_BLOCK(eof) \
00637    R__flush_block(R__block_start >= 0L ? (char*)&R__window[(unsigned)R__block_start] : \
00638                 (char*)NULL, (long)R__strstart - R__block_start, (eof))
00639 
00640 /* ===========================================================================
00641  * Processes a new input file and return its compressed length. This
00642  * function does not perform lazy evaluationof matches and inserts
00643  * new strings in the dictionary only for unmatched strings or for short
00644  * matches. It is used only for the fast compression options.
00645  */
00646 local ulg R__Deflate_fast()
00647 {
00648     IPos hash_head; /* head of the hash chain */
00649     int flush;      /* set if current block must be flushed */
00650     unsigned match_length = 0;  /* length of best match */
00651 
00652     R__prev_length = MIN_MATCH-1;
00653     while (lookahead != 0) {
00654         /* Insert the string window[strstart .. strstart+2] in the
00655          * dictionary, and set hash_head to the head of the hash chain:
00656          */
00657         INSERT_STRING(R__strstart, hash_head);
00658 
00659         /* Find the longest match, discarding those <= prev_length.
00660          * At this point we have always match_length < MIN_MATCH
00661          */
00662         if (hash_head != NIL && R__strstart - hash_head <= MAX_DIST) {
00663             /* To simplify the code, we prevent matches with the string
00664              * of window index 0 (in particular we have to avoid a match
00665              * of the string with itself at the start of the input file).
00666              */
00667             match_length = R__longest_match (hash_head);
00668             /* R__longest_match() sets match_start */
00669             if (match_length > lookahead) match_length = lookahead;
00670         }
00671         if (match_length >= MIN_MATCH) {
00672             check_match(R__strstart, R__match_start, match_length);
00673 
00674             flush = R__ct_tally(R__strstart-R__match_start, match_length - MIN_MATCH);
00675 
00676             lookahead -= match_length;
00677 
00678             /* Insert new strings in the hash table only if the match length
00679              * is not too large. This saves time but degrades compression.
00680              */
00681             if (match_length <= max_insert_length) {
00682                 match_length--; /* string at strstart already in hash table */
00683                 do {
00684                     R__strstart++;
00685                     INSERT_STRING(R__strstart, hash_head);
00686                     /* strstart never exceeds WSIZE-MAX_MATCH, so there are
00687                      * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
00688                      * these bytes are garbage, but it does not matter since
00689                      * the next lookahead bytes will be emitted as literals.
00690                      */
00691                 } while (--match_length != 0);
00692                 R__strstart++;
00693             } else {
00694                 R__strstart += match_length;
00695                 match_length = 0;
00696                 ins_h = R__window[R__strstart];
00697                 UPDATE_HASH(ins_h, R__window[R__strstart+1]);
00698 #if MIN_MATCH != 3
00699                 Call UPDATE_HASH() MIN_MATCH-3 more times
00700 #endif
00701             }
00702         } else {
00703             /* No match, output a literal byte */
00704             Tracevv((stderr,"%c",R__window[R__strstart]));
00705             flush = R__ct_tally (0, R__window[R__strstart]);
00706             lookahead--;
00707             R__strstart++;
00708         }
00709         if (flush) FLUSH_BLOCK(0), R__block_start = R__strstart;
00710 
00711         /* Make sure that we always have enough lookahead, except
00712          * at the end of the input file. We need MAX_MATCH bytes
00713          * for the next match, plus MIN_MATCH bytes to insert the
00714          * string following the next match.
00715          */
00716         while (lookahead < MIN_LOOKAHEAD && !eofile) R__fill_window();
00717 
00718     }
00719     return FLUSH_BLOCK(1); /* eof */
00720 }
00721 
00722 /* ===========================================================================
00723  * Same as above, but achieves better compression. We use a lazy
00724  * evaluation for matches: a match is finally adopted only if there is
00725  * no better match at the next window position.
00726  */
00727 ulg R__Deflate()
00728 {
00729     IPos hash_head;          /* head of hash chain */
00730     IPos R__prev_match;      /* previous match */
00731     int flush;               /* set if current block must be flushed */
00732     int match_available = 0; /* set if previous match exists */
00733     register unsigned match_length = MIN_MATCH-1; /* length of best match */
00734 #ifdef DEBUG
00735     /* extern ulg R__isize; */ /* byte length of input file, for debug only */
00736 #endif
00737 
00738     if (level <= 3) return R__Deflate_fast(); /* optimized for speed */
00739 
00740     /* Process the input block. */
00741     while (lookahead != 0) {
00742 
00743         /* Insert the string window[strstart .. strstart+2] in the
00744          * dictionary, and set hash_head to the head of the hash chain:
00745          */
00746         INSERT_STRING(R__strstart, hash_head);
00747 
00748         /* Find the longest match, discarding those <= prev_length.
00749          */
00750         R__prev_length = match_length, R__prev_match = R__match_start;
00751         match_length = MIN_MATCH-1;
00752 
00753         if (hash_head != NIL && R__prev_length < max_lazy_match &&
00754             R__strstart - hash_head <= MAX_DIST) {
00755             /* To simplify the code, we prevent matches with the string
00756              * of window index 0 (in particular we have to avoid a match
00757              * of the string with itself at the start of the input file).
00758              */
00759             match_length = R__longest_match (hash_head);
00760             /* R__longest_match() sets match_start */
00761             if (match_length > lookahead) match_length = lookahead;
00762 
00763             /* Ignore a length 3 match if it is too distant: */
00764             if (match_length == MIN_MATCH && R__strstart-R__match_start > TOO_FAR){
00765                 /* If prev_match is also MIN_MATCH, match_start is garbage
00766                  * but we will ignore the current match anyway.
00767                  */
00768                 match_length--;
00769             }
00770         }
00771         /* If there was a match at the previous step and the current
00772          * match is not better, output the previous match:
00773          */
00774         if (R__prev_length >= MIN_MATCH && match_length <= R__prev_length) {
00775 
00776             check_match(R__strstart-1, R__prev_match, R__prev_length);
00777 
00778             flush = R__ct_tally(R__strstart-1-R__prev_match, R__prev_length - MIN_MATCH);
00779 
00780             /* Insert in hash table all strings up to the end of the match.
00781              * strstart-1 and strstart are already inserted.
00782              */
00783             lookahead -= R__prev_length-1;
00784             R__prev_length -= 2;
00785             do {
00786                 R__strstart++;
00787                 INSERT_STRING(R__strstart, hash_head);
00788                 /* strstart never exceeds WSIZE-MAX_MATCH, so there are
00789                  * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
00790                  * these bytes are garbage, but it does not matter since the
00791                  * next lookahead bytes will always be emitted as literals.
00792                  */
00793             } while (--R__prev_length != 0);
00794             match_available = 0;
00795             match_length = MIN_MATCH-1;
00796             R__strstart++;
00797             if (flush) FLUSH_BLOCK(0), R__block_start = R__strstart;
00798 
00799         } else if (match_available) {
00800             /* If there was no match at the previous position, output a
00801              * single literal. If there was a match but the current match
00802              * is longer, truncate the previous match to a single literal.
00803              */
00804             Tracevv((stderr,"%c",R__window[R__strstart-1]));
00805             if (R__ct_tally (0, R__window[R__strstart-1])) {
00806                 FLUSH_BLOCK(0), R__block_start = R__strstart;
00807             }
00808             R__strstart++;
00809             lookahead--;
00810         } else {
00811             /* There is no previous match to compare with, wait for
00812              * the next step to decide.
00813              */
00814             match_available = 1;
00815             R__strstart++;
00816             lookahead--;
00817         }
00818 #ifdef DEBUG
00819         Assert (R__strstart <= R__isize && lookahead <= R__isize, "a bit too far");
00820 #endif
00821 
00822         /* Make sure that we always have enough lookahead, except
00823          * at the end of the input file. We need MAX_MATCH bytes
00824          * for the next match, plus MIN_MATCH bytes to insert the
00825          * string following the next match.
00826          */
00827         while (lookahead < MIN_LOOKAHEAD && !eofile) R__fill_window();
00828     }
00829     if (match_available) R__ct_tally (0, R__window[R__strstart-1]);
00830 
00831     return FLUSH_BLOCK(1); /* eof */
00832 }
00833 

Generated on Tue Jul 5 14:12:05 2011 for ROOT_528-00b_version by  doxygen 1.5.1