pcregrep.c

Go to the documentation of this file.
00001 /*************************************************
00002 *               pcregrep program                 *
00003 *************************************************/
00004 
00005 /* This is a grep program that uses the PCRE regular expression library to do
00006 its pattern matching. On a Unix or Win32 system it can recurse into
00007 directories.
00008 
00009            Copyright (c) 1997-2008 University of Cambridge
00010 
00011 -----------------------------------------------------------------------------
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014 
00015     * Redistributions of source code must retain the above copyright notice,
00016       this list of conditions and the following disclaimer.
00017 
00018     * Redistributions in binary form must reproduce the above copyright
00019       notice, this list of conditions and the following disclaimer in the
00020       documentation and/or other materials provided with the distribution.
00021 
00022     * Neither the name of the University of Cambridge nor the names of its
00023       contributors may be used to endorse or promote products derived from
00024       this software without specific prior written permission.
00025 
00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00036 POSSIBILITY OF SUCH DAMAGE.
00037 -----------------------------------------------------------------------------
00038 */
00039 
00040 #ifdef HAVE_CONFIG_H
00041 #include "config.h"
00042 #endif
00043 
00044 #include <ctype.h>
00045 #include <locale.h>
00046 #include <stdio.h>
00047 #include <string.h>
00048 #include <stdlib.h>
00049 #include <errno.h>
00050 
00051 #include <sys/types.h>
00052 #include <sys/stat.h>
00053 
00054 #ifdef HAVE_UNISTD_H
00055 #include <unistd.h>
00056 #endif
00057 
00058 #ifdef SUPPORT_LIBZ
00059 #include <zlib.h>
00060 #endif
00061 
00062 #ifdef SUPPORT_LIBBZ2
00063 #include <bzlib.h>
00064 #endif
00065 
00066 #include "pcre.h"
00067 
00068 #define FALSE 0
00069 #define TRUE 1
00070 
00071 typedef int BOOL;
00072 
00073 #define MAX_PATTERN_COUNT 100
00074 
00075 #if BUFSIZ > 8192
00076 #define MBUFTHIRD BUFSIZ
00077 #else
00078 #define MBUFTHIRD 8192
00079 #endif
00080 
00081 /* Values for the "filenames" variable, which specifies options for file name
00082 output. The order is important; it is assumed that a file name is wanted for
00083 all values greater than FN_DEFAULT. */
00084 
00085 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
00086 
00087 /* File reading styles */
00088 
00089 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
00090 
00091 /* Actions for the -d and -D options */
00092 
00093 enum { dee_READ, dee_SKIP, dee_RECURSE };
00094 enum { DEE_READ, DEE_SKIP };
00095 
00096 /* Actions for special processing options (flag bits) */
00097 
00098 #define PO_WORD_MATCH     0x0001
00099 #define PO_LINE_MATCH     0x0002
00100 #define PO_FIXED_STRINGS  0x0004
00101 
00102 /* Line ending types */
00103 
00104 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
00105 
00106 
00107 
00108 /*************************************************
00109 *               Global variables                 *
00110 *************************************************/
00111 
00112 /* Jeffrey Friedl has some debugging requirements that are not part of the
00113 regular code. */
00114 
00115 #ifdef JFRIEDL_DEBUG
00116 static int S_arg = -1;
00117 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
00118 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
00119 static const char *jfriedl_prefix = "";
00120 static const char *jfriedl_postfix = "";
00121 #endif
00122 
00123 static int  endlinetype;
00124 
00125 static char *colour_string = (char *)"1;31";
00126 static char *colour_option = NULL;
00127 static char *dee_option = NULL;
00128 static char *DEE_option = NULL;
00129 static char *newline = NULL;
00130 static char *pattern_filename = NULL;
00131 static char *stdin_name = (char *)"(standard input)";
00132 static char *locale = NULL;
00133 
00134 static const unsigned char *pcretables = NULL;
00135 
00136 static int  pattern_count = 0;
00137 static pcre **pattern_list = NULL;
00138 static pcre_extra **hints_list = NULL;
00139 
00140 static char *include_pattern = NULL;
00141 static char *exclude_pattern = NULL;
00142 static char *include_dir_pattern = NULL;
00143 static char *exclude_dir_pattern = NULL;
00144 
00145 static pcre *include_compiled = NULL;
00146 static pcre *exclude_compiled = NULL;
00147 static pcre *include_dir_compiled = NULL;
00148 static pcre *exclude_dir_compiled = NULL;
00149 
00150 static int after_context = 0;
00151 static int before_context = 0;
00152 static int both_context = 0;
00153 static int dee_action = dee_READ;
00154 static int DEE_action = DEE_READ;
00155 static int error_count = 0;
00156 static int filenames = FN_DEFAULT;
00157 static int process_options = 0;
00158 
00159 static BOOL count_only = FALSE;
00160 static BOOL do_colour = FALSE;
00161 static BOOL file_offsets = FALSE;
00162 static BOOL hyphenpending = FALSE;
00163 static BOOL invert = FALSE;
00164 static BOOL line_offsets = FALSE;
00165 static BOOL multiline = FALSE;
00166 static BOOL number = FALSE;
00167 static BOOL only_matching = FALSE;
00168 static BOOL quiet = FALSE;
00169 static BOOL silent = FALSE;
00170 static BOOL utf8 = FALSE;
00171 
00172 /* Structure for options and list of them */
00173 
00174 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
00175        OP_PATLIST };
00176 
00177 typedef struct option_item {
00178   int type;
00179   int one_char;
00180   void *dataptr;
00181   const char *long_name;
00182   const char *help_text;
00183 } option_item;
00184 
00185 /* Options without a single-letter equivalent get a negative value. This can be
00186 used to identify them. */
00187 
00188 #define N_COLOUR       (-1)
00189 #define N_EXCLUDE      (-2)
00190 #define N_EXCLUDE_DIR  (-3)
00191 #define N_HELP         (-4)
00192 #define N_INCLUDE      (-5)
00193 #define N_INCLUDE_DIR  (-6)
00194 #define N_LABEL        (-7)
00195 #define N_LOCALE       (-8)
00196 #define N_NULL         (-9)
00197 #define N_LOFFSETS     (-10)
00198 #define N_FOFFSETS     (-11)
00199 
00200 static option_item optionlist[] = {
00201   { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
00202   { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
00203   { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
00204   { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
00205   { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
00206   { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
00207   { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
00208   { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
00209   { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
00210   { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
00211   { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
00212   { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
00213   { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
00214   { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
00215   { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
00216   { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
00217   { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
00218   { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
00219   { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
00220   { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
00221   { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
00222   { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
00223   { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
00224   { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
00225   { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
00226   { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
00227   { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
00228   { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
00229   { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
00230   { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
00231   { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
00232   { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
00233 #ifdef JFRIEDL_DEBUG
00234   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
00235 #endif
00236   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
00237   { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
00238   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
00239   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
00240   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
00241   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
00242   { OP_NODATA,    0,        NULL,               NULL,            NULL }
00243 };
00244 
00245 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
00246 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
00247 that the combination of -w and -x has the same effect as -x on its own, so we
00248 can treat them as the same. */
00249 
00250 static const char *prefix[] = {
00251   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
00252 
00253 static const char *suffix[] = {
00254   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
00255 
00256 /* UTF-8 tables - used only when the newline setting is "any". */
00257 
00258 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
00259 
00260 const char utf8_table4[] = {
00261   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00262   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00263   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
00264   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
00265 
00266 
00267 
00268 /*************************************************
00269 *            OS-specific functions               *
00270 *************************************************/
00271 
00272 /* These functions are defined so that they can be made system specific,
00273 although at present the only ones are for Unix, Win32, and for "no support". */
00274 
00275 
00276 /************* Directory scanning in Unix ***********/
00277 
00278 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
00279 #include <sys/types.h>
00280 #include <sys/stat.h>
00281 #include <dirent.h>
00282 
00283 typedef DIR directory_type;
00284 
00285 static int
00286 isdirectory(char *filename)
00287 {
00288 struct stat statbuf;
00289 if (stat(filename, &statbuf) < 0)
00290   return 0;        /* In the expectation that opening as a file will fail */
00291 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
00292 }
00293 
00294 static directory_type *
00295 opendirectory(char *filename)
00296 {
00297 return opendir(filename);
00298 }
00299 
00300 static char *
00301 readdirectory(directory_type *dir)
00302 {
00303 for (;;)
00304   {
00305   struct dirent *dent = readdir(dir);
00306   if (dent == NULL) return NULL;
00307   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
00308     return dent->d_name;
00309   }
00310 /* Control never reaches here */
00311 }
00312 
00313 static void
00314 closedirectory(directory_type *dir)
00315 {
00316 closedir(dir);
00317 }
00318 
00319 
00320 /************* Test for regular file in Unix **********/
00321 
00322 static int
00323 isregfile(char *filename)
00324 {
00325 struct stat statbuf;
00326 if (stat(filename, &statbuf) < 0)
00327   return 1;        /* In the expectation that opening as a file will fail */
00328 return (statbuf.st_mode & S_IFMT) == S_IFREG;
00329 }
00330 
00331 
00332 /************* Test stdout for being a terminal in Unix **********/
00333 
00334 static BOOL
00335 is_stdout_tty(void)
00336 {
00337 return isatty(fileno(stdout));
00338 }
00339 
00340 
00341 /************* Directory scanning in Win32 ***********/
00342 
00343 /* I (Philip Hazel) have no means of testing this code. It was contributed by
00344 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
00345 when it did not exist. David Byron added a patch that moved the #include of
00346 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
00347 */
00348 
00349 #elif HAVE_WINDOWS_H
00350 
00351 #ifndef STRICT
00352 # define STRICT
00353 #endif
00354 #ifndef WIN32_LEAN_AND_MEAN
00355 # define WIN32_LEAN_AND_MEAN
00356 #endif
00357 
00358 #include <windows.h>
00359 
00360 #ifndef INVALID_FILE_ATTRIBUTES
00361 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
00362 #endif
00363 
00364 typedef struct directory_type
00365 {
00366 HANDLE handle;
00367 BOOL first;
00368 WIN32_FIND_DATA data;
00369 } directory_type;
00370 
00371 int
00372 isdirectory(char *filename)
00373 {
00374 DWORD attr = GetFileAttributes(filename);
00375 if (attr == INVALID_FILE_ATTRIBUTES)
00376   return 0;
00377 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
00378 }
00379 
00380 directory_type *
00381 opendirectory(char *filename)
00382 {
00383 size_t len;
00384 char *pattern;
00385 directory_type *dir;
00386 DWORD err;
00387 len = strlen(filename);
00388 pattern = (char *) malloc(len + 3);
00389 dir = (directory_type *) malloc(sizeof(*dir));
00390 if ((pattern == NULL) || (dir == NULL))
00391   {
00392   fprintf(stderr, "pcregrep: malloc failed\n");
00393   exit(2);
00394   }
00395 memcpy(pattern, filename, len);
00396 memcpy(&(pattern[len]), "\\*", 3);
00397 dir->handle = FindFirstFile(pattern, &(dir->data));
00398 if (dir->handle != INVALID_HANDLE_VALUE)
00399   {
00400   free(pattern);
00401   dir->first = TRUE;
00402   return dir;
00403   }
00404 err = GetLastError();
00405 free(pattern);
00406 free(dir);
00407 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
00408 return NULL;
00409 }
00410 
00411 char *
00412 readdirectory(directory_type *dir)
00413 {
00414 for (;;)
00415   {
00416   if (!dir->first)
00417     {
00418     if (!FindNextFile(dir->handle, &(dir->data)))
00419       return NULL;
00420     }
00421   else
00422     {
00423     dir->first = FALSE;
00424     }
00425   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
00426     return dir->data.cFileName;
00427   }
00428 #ifndef _MSC_VER
00429 return NULL;   /* Keep compiler happy; never executed */
00430 #endif
00431 }
00432 
00433 void
00434 closedirectory(directory_type *dir)
00435 {
00436 FindClose(dir->handle);
00437 free(dir);
00438 }
00439 
00440 
00441 /************* Test for regular file in Win32 **********/
00442 
00443 /* I don't know how to do this, or if it can be done; assume all paths are
00444 regular if they are not directories. */
00445 
00446 int isregfile(char *filename)
00447 {
00448 return !isdirectory(filename);
00449 }
00450 
00451 
00452 /************* Test stdout for being a terminal in Win32 **********/
00453 
00454 /* I don't know how to do this; assume never */
00455 
00456 static BOOL
00457 is_stdout_tty(void)
00458 {
00459 return FALSE;
00460 }
00461 
00462 
00463 /************* Directory scanning when we can't do it ***********/
00464 
00465 /* The type is void, and apart from isdirectory(), the functions do nothing. */
00466 
00467 #else
00468 
00469 typedef void directory_type;
00470 
00471 int isdirectory(char *filename) { return 0; }
00472 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
00473 char *readdirectory(directory_type *dir) { return (char*)0;}
00474 void closedirectory(directory_type *dir) {}
00475 
00476 
00477 /************* Test for regular when we can't do it **********/
00478 
00479 /* Assume all files are regular. */
00480 
00481 int isregfile(char *filename) { return 1; }
00482 
00483 
00484 /************* Test stdout for being a terminal when we can't do it **********/
00485 
00486 static BOOL
00487 is_stdout_tty(void)
00488 {
00489 return FALSE;
00490 }
00491 
00492 
00493 #endif
00494 
00495 
00496 
00497 #ifndef HAVE_STRERROR
00498 /*************************************************
00499 *     Provide strerror() for non-ANSI libraries  *
00500 *************************************************/
00501 
00502 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
00503 in their libraries, but can provide the same facility by this simple
00504 alternative function. */
00505 
00506 extern int   sys_nerr;
00507 extern char *sys_errlist[];
00508 
00509 char *
00510 strerror(int n)
00511 {
00512 if (n < 0 || n >= sys_nerr) return "unknown error number";
00513 return sys_errlist[n];
00514 }
00515 #endif /* HAVE_STRERROR */
00516 
00517 
00518 
00519 /*************************************************
00520 *             Find end of line                   *
00521 *************************************************/
00522 
00523 /* The length of the endline sequence that is found is set via lenptr. This may
00524 be zero at the very end of the file if there is no line-ending sequence there.
00525 
00526 Arguments:
00527   p         current position in line
00528   endptr    end of available data
00529   lenptr    where to put the length of the eol sequence
00530 
00531 Returns:    pointer to the last byte of the line
00532 */
00533 
00534 static char *
00535 end_of_line(char *p, char *endptr, int *lenptr)
00536 {
00537 switch(endlinetype)
00538   {
00539   default:      /* Just in case */
00540   case EL_LF:
00541   while (p < endptr && *p != '\n') p++;
00542   if (p < endptr)
00543     {
00544     *lenptr = 1;
00545     return p + 1;
00546     }
00547   *lenptr = 0;
00548   return endptr;
00549 
00550   case EL_CR:
00551   while (p < endptr && *p != '\r') p++;
00552   if (p < endptr)
00553     {
00554     *lenptr = 1;
00555     return p + 1;
00556     }
00557   *lenptr = 0;
00558   return endptr;
00559 
00560   case EL_CRLF:
00561   for (;;)
00562     {
00563     while (p < endptr && *p != '\r') p++;
00564     if (++p >= endptr)
00565       {
00566       *lenptr = 0;
00567       return endptr;
00568       }
00569     if (*p == '\n')
00570       {
00571       *lenptr = 2;
00572       return p + 1;
00573       }
00574     }
00575   break;
00576 
00577   case EL_ANYCRLF:
00578   while (p < endptr)
00579     {
00580     int extra = 0;
00581     register int c = *((unsigned char *)p);
00582 
00583     if (utf8 && c >= 0xc0)
00584       {
00585       int gcii, gcss;
00586       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
00587       gcss = 6*extra;
00588       c = (c & utf8_table3[extra]) << gcss;
00589       for (gcii = 1; gcii <= extra; gcii++)
00590         {
00591         gcss -= 6;
00592         c |= (p[gcii] & 0x3f) << gcss;
00593         }
00594       }
00595 
00596     p += 1 + extra;
00597 
00598     switch (c)
00599       {
00600       case 0x0a:    /* LF */
00601       *lenptr = 1;
00602       return p;
00603 
00604       case 0x0d:    /* CR */
00605       if (p < endptr && *p == 0x0a)
00606         {
00607         *lenptr = 2;
00608         p++;
00609         }
00610       else *lenptr = 1;
00611       return p;
00612 
00613       default:
00614       break;
00615       }
00616     }   /* End of loop for ANYCRLF case */
00617 
00618   *lenptr = 0;  /* Must have hit the end */
00619   return endptr;
00620 
00621   case EL_ANY:
00622   while (p < endptr)
00623     {
00624     int extra = 0;
00625     register int c = *((unsigned char *)p);
00626 
00627     if (utf8 && c >= 0xc0)
00628       {
00629       int gcii, gcss;
00630       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
00631       gcss = 6*extra;
00632       c = (c & utf8_table3[extra]) << gcss;
00633       for (gcii = 1; gcii <= extra; gcii++)
00634         {
00635         gcss -= 6;
00636         c |= (p[gcii] & 0x3f) << gcss;
00637         }
00638       }
00639 
00640     p += 1 + extra;
00641 
00642     switch (c)
00643       {
00644       case 0x0a:    /* LF */
00645       case 0x0b:    /* VT */
00646       case 0x0c:    /* FF */
00647       *lenptr = 1;
00648       return p;
00649 
00650       case 0x0d:    /* CR */
00651       if (p < endptr && *p == 0x0a)
00652         {
00653         *lenptr = 2;
00654         p++;
00655         }
00656       else *lenptr = 1;
00657       return p;
00658 
00659       case 0x85:    /* NEL */
00660       *lenptr = utf8? 2 : 1;
00661       return p;
00662 
00663       case 0x2028:  /* LS */
00664       case 0x2029:  /* PS */
00665       *lenptr = 3;
00666       return p;
00667 
00668       default:
00669       break;
00670       }
00671     }   /* End of loop for ANY case */
00672 
00673   *lenptr = 0;  /* Must have hit the end */
00674   return endptr;
00675   }     /* End of overall switch */
00676 }
00677 
00678 
00679 
00680 /*************************************************
00681 *         Find start of previous line            *
00682 *************************************************/
00683 
00684 /* This is called when looking back for before lines to print.
00685 
00686 Arguments:
00687   p         start of the subsequent line
00688   startptr  start of available data
00689 
00690 Returns:    pointer to the start of the previous line
00691 */
00692 
00693 static char *
00694 previous_line(char *p, char *startptr)
00695 {
00696 switch(endlinetype)
00697   {
00698   default:      /* Just in case */
00699   case EL_LF:
00700   p--;
00701   while (p > startptr && p[-1] != '\n') p--;
00702   return p;
00703 
00704   case EL_CR:
00705   p--;
00706   while (p > startptr && p[-1] != '\n') p--;
00707   return p;
00708 
00709   case EL_CRLF:
00710   for (;;)
00711     {
00712     p -= 2;
00713     while (p > startptr && p[-1] != '\n') p--;
00714     if (p <= startptr + 1 || p[-2] == '\r') return p;
00715     }
00716   return p;   /* But control should never get here */
00717 
00718   case EL_ANY:
00719   case EL_ANYCRLF:
00720   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
00721   if (utf8) while ((*p & 0xc0) == 0x80) p--;
00722 
00723   while (p > startptr)
00724     {
00725     register int c;
00726     char *pp = p - 1;
00727 
00728     if (utf8)
00729       {
00730       int extra = 0;
00731       while ((*pp & 0xc0) == 0x80) pp--;
00732       c = *((unsigned char *)pp);
00733       if (c >= 0xc0)
00734         {
00735         int gcii, gcss;
00736         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
00737         gcss = 6*extra;
00738         c = (c & utf8_table3[extra]) << gcss;
00739         for (gcii = 1; gcii <= extra; gcii++)
00740           {
00741           gcss -= 6;
00742           c |= (pp[gcii] & 0x3f) << gcss;
00743           }
00744         }
00745       }
00746     else c = *((unsigned char *)pp);
00747 
00748     if (endlinetype == EL_ANYCRLF) switch (c)
00749       {
00750       case 0x0a:    /* LF */
00751       case 0x0d:    /* CR */
00752       return p;
00753 
00754       default:
00755       break;
00756       }
00757 
00758     else switch (c)
00759       {
00760       case 0x0a:    /* LF */
00761       case 0x0b:    /* VT */
00762       case 0x0c:    /* FF */
00763       case 0x0d:    /* CR */
00764       case 0x85:    /* NEL */
00765       case 0x2028:  /* LS */
00766       case 0x2029:  /* PS */
00767       return p;
00768 
00769       default:
00770       break;
00771       }
00772 
00773     p = pp;  /* Back one character */
00774     }        /* End of loop for ANY case */
00775 
00776   return startptr;  /* Hit start of data */
00777   }     /* End of overall switch */
00778 }
00779 
00780 
00781 
00782 
00783 
00784 /*************************************************
00785 *       Print the previous "after" lines         *
00786 *************************************************/
00787 
00788 /* This is called if we are about to lose said lines because of buffer filling,
00789 and at the end of the file. The data in the line is written using fwrite() so
00790 that a binary zero does not terminate it.
00791 
00792 Arguments:
00793   lastmatchnumber   the number of the last matching line, plus one
00794   lastmatchrestart  where we restarted after the last match
00795   endptr            end of available data
00796   printname         filename for printing
00797 
00798 Returns:            nothing
00799 */
00800 
00801 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
00802   char *endptr, char *printname)
00803 {
00804 if (after_context > 0 && lastmatchnumber > 0)
00805   {
00806   int count = 0;
00807   while (lastmatchrestart < endptr && count++ < after_context)
00808     {
00809     int ellength;
00810     char *pp = lastmatchrestart;
00811     if (printname != NULL) fprintf(stdout, "%s-", printname);
00812     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
00813     pp = end_of_line(pp, endptr, &ellength);
00814     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
00815     lastmatchrestart = pp;
00816     }
00817   hyphenpending = TRUE;
00818   }
00819 }
00820 
00821 
00822 
00823 /*************************************************
00824 *            Grep an individual file             *
00825 *************************************************/
00826 
00827 /* This is called from grep_or_recurse() below. It uses a buffer that is three
00828 times the value of MBUFTHIRD. The matching point is never allowed to stray into
00829 the top third of the buffer, thus keeping more of the file available for
00830 context printing or for multiline scanning. For large files, the pointer will
00831 be in the middle third most of the time, so the bottom third is available for
00832 "before" context printing.
00833 
00834 Arguments:
00835   handle       the fopened FILE stream for a normal file
00836                the gzFile pointer when reading is via libz
00837                the BZFILE pointer when reading is via libbz2
00838   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
00839   printname    the file name if it is to be printed for each match
00840                or NULL if the file name is not to be printed
00841                it cannot be NULL if filenames[_nomatch]_only is set
00842 
00843 Returns:       0 if there was at least one match
00844                1 otherwise (no matches)
00845                2 if there is a read error on a .bz2 file
00846 */
00847 
00848 static int
00849 pcregrep(void *handle, int frtype, char *printname)
00850 {
00851 int rc = 1;
00852 int linenumber = 1;
00853 int lastmatchnumber = 0;
00854 int count = 0;
00855 int filepos = 0;
00856 int offsets[99];
00857 char *lastmatchrestart = NULL;
00858 char buffer[3*MBUFTHIRD];
00859 char *ptr = buffer;
00860 char *endptr;
00861 size_t bufflength;
00862 BOOL endhyphenpending = FALSE;
00863 FILE *in = NULL;                    /* Ensure initialized */
00864 
00865 #ifdef SUPPORT_LIBZ
00866 gzFile ingz = NULL;
00867 #endif
00868 
00869 #ifdef SUPPORT_LIBBZ2
00870 BZFILE *inbz2 = NULL;
00871 #endif
00872 
00873 
00874 /* Do the first read into the start of the buffer and set up the pointer to end
00875 of what we have. In the case of libz, a non-zipped .gz file will be read as a
00876 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
00877 fail. */
00878 
00879 #ifdef SUPPORT_LIBZ
00880 if (frtype == FR_LIBZ)
00881   {
00882   ingz = (gzFile)handle;
00883   bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
00884   }
00885 else
00886 #endif
00887 
00888 #ifdef SUPPORT_LIBBZ2
00889 if (frtype == FR_LIBBZ2)
00890   {
00891   inbz2 = (BZFILE *)handle;
00892   bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
00893   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
00894   }                                    /* without the cast it is unsigned. */
00895 else
00896 #endif
00897 
00898   {
00899   in = (FILE *)handle;
00900   bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
00901   }
00902 
00903 endptr = buffer + bufflength;
00904 
00905 /* Loop while the current pointer is not at the end of the file. For large
00906 files, endptr will be at the end of the buffer when we are in the middle of the
00907 file, but ptr will never get there, because as soon as it gets over 2/3 of the
00908 way, the buffer is shifted left and re-filled. */
00909 
00910 while (ptr < endptr)
00911   {
00912   int i, endlinelength;
00913   int mrc = 0;
00914   BOOL match = FALSE;
00915   char *matchptr = ptr;
00916   char *t = ptr;
00917   size_t length, linelength;
00918 
00919   /* At this point, ptr is at the start of a line. We need to find the length
00920   of the subject string to pass to pcre_exec(). In multiline mode, it is the
00921   length remainder of the data in the buffer. Otherwise, it is the length of
00922   the next line. After matching, we always advance by the length of the next
00923   line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
00924   that any match is constrained to be in the first line. */
00925 
00926   t = end_of_line(t, endptr, &endlinelength);
00927   linelength = t - ptr - endlinelength;
00928   length = multiline? (size_t)(endptr - ptr) : linelength;
00929 
00930   /* Extra processing for Jeffrey Friedl's debugging. */
00931 
00932 #ifdef JFRIEDL_DEBUG
00933   if (jfriedl_XT || jfriedl_XR)
00934   {
00935       #include <sys/time.h>
00936       #include <time.h>
00937       struct timeval start_time, end_time;
00938       struct timezone dummy;
00939 
00940       if (jfriedl_XT)
00941       {
00942           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
00943           const char *orig = ptr;
00944           ptr = malloc(newlen + 1);
00945           if (!ptr) {
00946                   printf("out of memory");
00947                   exit(2);
00948           }
00949           endptr = ptr;
00950           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
00951           for (i = 0; i < jfriedl_XT; i++) {
00952                   strncpy(endptr, orig,  length);
00953                   endptr += length;
00954           }
00955           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
00956           length = newlen;
00957       }
00958 
00959       if (gettimeofday(&start_time, &dummy) != 0)
00960               perror("bad gettimeofday");
00961 
00962 
00963       for (i = 0; i < jfriedl_XR; i++)
00964           match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
00965 
00966       if (gettimeofday(&end_time, &dummy) != 0)
00967               perror("bad gettimeofday");
00968 
00969       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
00970                       -
00971                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
00972 
00973       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
00974       return 0;
00975   }
00976 #endif
00977 
00978   /* We come back here after a match when the -o option (only_matching) is set,
00979   in order to find any further matches in the same line. */
00980 
00981   ONLY_MATCHING_RESTART:
00982 
00983   /* Run through all the patterns until one matches. Note that we don't include
00984   the final newline in the subject string. */
00985 
00986   for (i = 0; i < pattern_count; i++)
00987     {
00988     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
00989       offsets, 99);
00990     if (mrc >= 0) { match = TRUE; break; }
00991     if (mrc != PCRE_ERROR_NOMATCH)
00992       {
00993       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
00994       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
00995       fprintf(stderr, "this line:\n");
00996       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
00997       fprintf(stderr, "\n");
00998       if (error_count == 0 &&
00999           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
01000         {
01001         fprintf(stderr, "pcregrep: error %d means that a resource limit "
01002           "was exceeded\n", mrc);
01003         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
01004         }
01005       if (error_count++ > 20)
01006         {
01007         fprintf(stderr, "pcregrep: too many errors - abandoned\n");
01008         exit(2);
01009         }
01010       match = invert;    /* No more matching; don't show the line again */
01011       break;
01012       }
01013     }
01014 
01015   /* If it's a match or a not-match (as required), do what's wanted. */
01016 
01017   if (match != invert)
01018     {
01019     BOOL hyphenprinted = FALSE;
01020 
01021     /* We've failed if we want a file that doesn't have any matches. */
01022 
01023     if (filenames == FN_NOMATCH_ONLY) return 1;
01024 
01025     /* Just count if just counting is wanted. */
01026 
01027     if (count_only) count++;
01028 
01029     /* If all we want is a file name, there is no need to scan any more lines
01030     in the file. */
01031 
01032     else if (filenames == FN_ONLY)
01033       {
01034       fprintf(stdout, "%s\n", printname);
01035       return 0;
01036       }
01037 
01038     /* Likewise, if all we want is a yes/no answer. */
01039 
01040     else if (quiet) return 0;
01041 
01042     /* The --only-matching option prints just the substring that matched, and
01043     the --file-offsets and --line-offsets options output offsets for the
01044     matching substring (they both force --only-matching). None of these options
01045     prints any context. Afterwards, adjust the start and length, and then jump
01046     back to look for further matches in the same line. If we are in invert
01047     mode, however, nothing is printed - this could be still useful because the
01048     return code is set. */
01049 
01050     else if (only_matching)
01051       {
01052       if (!invert)
01053         {
01054         if (printname != NULL) fprintf(stdout, "%s:", printname);
01055         if (number) fprintf(stdout, "%d:", linenumber);
01056         if (line_offsets)
01057           fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
01058             offsets[1] - offsets[0]);
01059         else if (file_offsets)
01060           fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
01061             offsets[1] - offsets[0]);
01062         else
01063           fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01064         fprintf(stdout, "\n");
01065         matchptr += offsets[1];
01066         length -= offsets[1];
01067         match = FALSE;
01068         goto ONLY_MATCHING_RESTART;
01069         }
01070       }
01071 
01072     /* This is the default case when none of the above options is set. We print
01073     the matching lines(s), possibly preceded and/or followed by other lines of
01074     context. */
01075 
01076     else
01077       {
01078       /* See if there is a requirement to print some "after" lines from a
01079       previous match. We never print any overlaps. */
01080 
01081       if (after_context > 0 && lastmatchnumber > 0)
01082         {
01083         int ellength;
01084         int linecount = 0;
01085         char *p = lastmatchrestart;
01086 
01087         while (p < ptr && linecount < after_context)
01088           {
01089           p = end_of_line(p, ptr, &ellength);
01090           linecount++;
01091           }
01092 
01093         /* It is important to advance lastmatchrestart during this printing so
01094         that it interacts correctly with any "before" printing below. Print
01095         each line's data using fwrite() in case there are binary zeroes. */
01096 
01097         while (lastmatchrestart < p)
01098           {
01099           char *pp = lastmatchrestart;
01100           if (printname != NULL) fprintf(stdout, "%s-", printname);
01101           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
01102           pp = end_of_line(pp, endptr, &ellength);
01103           fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
01104           lastmatchrestart = pp;
01105           }
01106         if (lastmatchrestart != ptr) hyphenpending = TRUE;
01107         }
01108 
01109       /* If there were non-contiguous lines printed above, insert hyphens. */
01110 
01111       if (hyphenpending)
01112         {
01113         fprintf(stdout, "--\n");
01114         hyphenpending = FALSE;
01115         hyphenprinted = TRUE;
01116         }
01117 
01118       /* See if there is a requirement to print some "before" lines for this
01119       match. Again, don't print overlaps. */
01120 
01121       if (before_context > 0)
01122         {
01123         int linecount = 0;
01124         char *p = ptr;
01125 
01126         while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
01127                linecount < before_context)
01128           {
01129           linecount++;
01130           p = previous_line(p, buffer);
01131           }
01132 
01133         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
01134           fprintf(stdout, "--\n");
01135 
01136         while (p < ptr)
01137           {
01138           int ellength;
01139           char *pp = p;
01140           if (printname != NULL) fprintf(stdout, "%s-", printname);
01141           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
01142           pp = end_of_line(pp, endptr, &ellength);
01143           fwrite(p, 1, pp - p, stdout);
01144           p = pp;
01145           }
01146         }
01147 
01148       /* Now print the matching line(s); ensure we set hyphenpending at the end
01149       of the file if any context lines are being output. */
01150 
01151       if (after_context > 0 || before_context > 0)
01152         endhyphenpending = TRUE;
01153 
01154       if (printname != NULL) fprintf(stdout, "%s:", printname);
01155       if (number) fprintf(stdout, "%d:", linenumber);
01156 
01157       /* In multiline mode, we want to print to the end of the line in which
01158       the end of the matched string is found, so we adjust linelength and the
01159       line number appropriately, but only when there actually was a match
01160       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
01161       the match will always be before the first newline sequence. */
01162 
01163       if (multiline)
01164         {
01165         int ellength;
01166         char *endmatch = ptr;
01167         if (!invert)
01168           {
01169           endmatch += offsets[1];
01170           t = ptr;
01171           while (t < endmatch)
01172             {
01173             t = end_of_line(t, endptr, &ellength);
01174             if (t <= endmatch) linenumber++; else break;
01175             }
01176           }
01177         endmatch = end_of_line(endmatch, endptr, &ellength);
01178         linelength = endmatch - ptr - ellength;
01179         }
01180 
01181       /*** NOTE: Use only fwrite() to output the data line, so that binary
01182       zeroes are treated as just another data character. */
01183 
01184       /* This extra option, for Jeffrey Friedl's debugging requirements,
01185       replaces the matched string, or a specific captured string if it exists,
01186       with X. When this happens, colouring is ignored. */
01187 
01188 #ifdef JFRIEDL_DEBUG
01189       if (S_arg >= 0 && S_arg < mrc)
01190         {
01191         int first = S_arg * 2;
01192         int last  = first + 1;
01193         fwrite(ptr, 1, offsets[first], stdout);
01194         fprintf(stdout, "X");
01195         fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
01196         }
01197       else
01198 #endif
01199 
01200       /* We have to split the line(s) up if colouring. */
01201 
01202       if (do_colour)
01203         {
01204         fwrite(ptr, 1, offsets[0], stdout);
01205         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
01206         fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01207         fprintf(stdout, "%c[00m", 0x1b);
01208         fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
01209           stdout);
01210         }
01211       else fwrite(ptr, 1, linelength + endlinelength, stdout);
01212       }
01213 
01214     /* End of doing what has to be done for a match */
01215 
01216     rc = 0;    /* Had some success */
01217 
01218     /* Remember where the last match happened for after_context. We remember
01219     where we are about to restart, and that line's number. */
01220 
01221     lastmatchrestart = ptr + linelength + endlinelength;
01222     lastmatchnumber = linenumber + 1;
01223     }
01224 
01225   /* For a match in multiline inverted mode (which of course did not cause
01226   anything to be printed), we have to move on to the end of the match before
01227   proceeding. */
01228 
01229   if (multiline && invert && match)
01230     {
01231     int ellength;
01232     char *endmatch = ptr + offsets[1];
01233     t = ptr;
01234     while (t < endmatch)
01235       {
01236       t = end_of_line(t, endptr, &ellength);
01237       if (t <= endmatch) linenumber++; else break;
01238       }
01239     endmatch = end_of_line(endmatch, endptr, &ellength);
01240     linelength = endmatch - ptr - ellength;
01241     }
01242 
01243   /* Advance to after the newline and increment the line number. The file
01244   offset to the current line is maintained in filepos. */
01245 
01246   ptr += linelength + endlinelength;
01247   filepos += linelength + endlinelength;
01248   linenumber++;
01249 
01250   /* If we haven't yet reached the end of the file (the buffer is full), and
01251   the current point is in the top 1/3 of the buffer, slide the buffer down by
01252   1/3 and refill it. Before we do this, if some unprinted "after" lines are
01253   about to be lost, print them. */
01254 
01255   if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
01256     {
01257     if (after_context > 0 &&
01258         lastmatchnumber > 0 &&
01259         lastmatchrestart < buffer + MBUFTHIRD)
01260       {
01261       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
01262       lastmatchnumber = 0;
01263       }
01264 
01265     /* Now do the shuffle */
01266 
01267     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
01268     ptr -= MBUFTHIRD;
01269 
01270 #ifdef SUPPORT_LIBZ
01271     if (frtype == FR_LIBZ)
01272       bufflength = 2*MBUFTHIRD +
01273         gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
01274     else
01275 #endif
01276 
01277 #ifdef SUPPORT_LIBBZ2
01278     if (frtype == FR_LIBBZ2)
01279       bufflength = 2*MBUFTHIRD +
01280         BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
01281     else
01282 #endif
01283 
01284     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
01285 
01286     endptr = buffer + bufflength;
01287 
01288     /* Adjust any last match point */
01289 
01290     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
01291     }
01292   }     /* Loop through the whole file */
01293 
01294 /* End of file; print final "after" lines if wanted; do_after_lines sets
01295 hyphenpending if it prints something. */
01296 
01297 if (!only_matching && !count_only)
01298   {
01299   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
01300   hyphenpending |= endhyphenpending;
01301   }
01302 
01303 /* Print the file name if we are looking for those without matches and there
01304 were none. If we found a match, we won't have got this far. */
01305 
01306 if (filenames == FN_NOMATCH_ONLY)
01307   {
01308   fprintf(stdout, "%s\n", printname);
01309   return 0;
01310   }
01311 
01312 /* Print the match count if wanted */
01313 
01314 if (count_only)
01315   {
01316   if (printname != NULL) fprintf(stdout, "%s:", printname);
01317   fprintf(stdout, "%d\n", count);
01318   }
01319 
01320 return rc;
01321 }
01322 
01323 
01324 
01325 /*************************************************
01326 *     Grep a file or recurse into a directory    *
01327 *************************************************/
01328 
01329 /* Given a path name, if it's a directory, scan all the files if we are
01330 recursing; if it's a file, grep it.
01331 
01332 Arguments:
01333   pathname          the path to investigate
01334   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
01335   only_one_at_top   TRUE if the path is the only one at toplevel
01336 
01337 Returns:   0 if there was at least one match
01338            1 if there were no matches
01339            2 there was some kind of error
01340 
01341 However, file opening failures are suppressed if "silent" is set.
01342 */
01343 
01344 static int
01345 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
01346 {
01347 int rc = 1;
01348 int sep;
01349 int frtype;
01350 int pathlen;
01351 void *handle;
01352 FILE *in = NULL;           /* Ensure initialized */
01353 
01354 #ifdef SUPPORT_LIBZ
01355 gzFile ingz = NULL;
01356 #endif
01357 
01358 #ifdef SUPPORT_LIBBZ2
01359 BZFILE *inbz2 = NULL;
01360 #endif
01361 
01362 /* If the file name is "-" we scan stdin */
01363 
01364 if (strcmp(pathname, "-") == 0)
01365   {
01366   return pcregrep(stdin, FR_PLAIN,
01367     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
01368       stdin_name : NULL);
01369   }
01370 
01371 /* If the file is a directory, skip if skipping or if we are recursing, scan
01372 each file and directory within it, subject to any include or exclude patterns
01373 that were set. The scanning code is localized so it can be made
01374 system-specific. */
01375 
01376 if ((sep = isdirectory(pathname)) != 0)
01377   {
01378   if (dee_action == dee_SKIP) return 1;
01379   if (dee_action == dee_RECURSE)
01380     {
01381     char buffer[1024];
01382     char *nextfile;
01383     directory_type *dir = opendirectory(pathname);
01384 
01385     if (dir == NULL)
01386       {
01387       if (!silent)
01388         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
01389           strerror(errno));
01390       return 2;
01391       }
01392 
01393     while ((nextfile = readdirectory(dir)) != NULL)
01394       {
01395       int frc, nflen;
01396       sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
01397       nflen = strlen(nextfile);
01398 
01399       if (isdirectory(buffer))
01400         {
01401         if (exclude_dir_compiled != NULL &&
01402             pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
01403           continue;
01404 
01405         if (include_dir_compiled != NULL &&
01406             pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
01407           continue;
01408         }
01409       else
01410         {
01411         if (exclude_compiled != NULL &&
01412             pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
01413           continue;
01414 
01415         if (include_compiled != NULL &&
01416             pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
01417           continue;
01418         }
01419 
01420       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
01421       if (frc > 1) rc = frc;
01422        else if (frc == 0 && rc == 1) rc = 0;
01423       }
01424 
01425     closedirectory(dir);
01426     return rc;
01427     }
01428   }
01429 
01430 /* If the file is not a directory and not a regular file, skip it if that's
01431 been requested. */
01432 
01433 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
01434 
01435 /* Control reaches here if we have a regular file, or if we have a directory
01436 and recursion or skipping was not requested, or if we have anything else and
01437 skipping was not requested. The scan proceeds. If this is the first and only
01438 argument at top level, we don't show the file name, unless we are only showing
01439 the file name, or the filename was forced (-H). */
01440 
01441 pathlen = strlen(pathname);
01442 
01443 /* Open using zlib if it is supported and the file name ends with .gz. */
01444 
01445 #ifdef SUPPORT_LIBZ
01446 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
01447   {
01448   ingz = gzopen(pathname, "rb");
01449   if (ingz == NULL)
01450     {
01451     if (!silent)
01452       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
01453         strerror(errno));
01454     return 2;
01455     }
01456   handle = (void *)ingz;
01457   frtype = FR_LIBZ;
01458   }
01459 else
01460 #endif
01461 
01462 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
01463 
01464 #ifdef SUPPORT_LIBBZ2
01465 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
01466   {
01467   inbz2 = BZ2_bzopen(pathname, "rb");
01468   handle = (void *)inbz2;
01469   frtype = FR_LIBBZ2;
01470   }
01471 else
01472 #endif
01473 
01474 /* Otherwise use plain fopen(). The label is so that we can come back here if
01475 an attempt to read a .bz2 file indicates that it really is a plain file. */
01476 
01477 #ifdef SUPPORT_LIBBZ2
01478 PLAIN_FILE:
01479 #endif
01480   {
01481   in = fopen(pathname, "r");
01482   handle = (void *)in;
01483   frtype = FR_PLAIN;
01484   }
01485 
01486 /* All the opening methods return errno when they fail. */
01487 
01488 if (handle == NULL)
01489   {
01490   if (!silent)
01491     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
01492       strerror(errno));
01493   return 2;
01494   }
01495 
01496 /* Now grep the file */
01497 
01498 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
01499   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
01500 
01501 /* Close in an appropriate manner. */
01502 
01503 #ifdef SUPPORT_LIBZ
01504 if (frtype == FR_LIBZ)
01505   gzclose(ingz);
01506 else
01507 #endif
01508 
01509 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
01510 read failed. If the error indicates that the file isn't in fact bzipped, try
01511 again as a normal file. */
01512 
01513 #ifdef SUPPORT_LIBBZ2
01514 if (frtype == FR_LIBBZ2)
01515   {
01516   if (rc == 2)
01517     {
01518     int errnum;
01519     const char *err = BZ2_bzerror(inbz2, &errnum);
01520     if (errnum == BZ_DATA_ERROR_MAGIC)
01521       {
01522       BZ2_bzclose(inbz2);
01523       goto PLAIN_FILE;
01524       }
01525     else if (!silent)
01526       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
01527         pathname, err);
01528     }
01529   BZ2_bzclose(inbz2);
01530   }
01531 else
01532 #endif
01533 
01534 /* Normal file close */
01535 
01536 fclose(in);
01537 
01538 /* Pass back the yield from pcregrep(). */
01539 
01540 return rc;
01541 }
01542 
01543 
01544 
01545 
01546 /*************************************************
01547 *                Usage function                  *
01548 *************************************************/
01549 
01550 static int
01551 usage(int rc)
01552 {
01553 option_item *op;
01554 fprintf(stderr, "Usage: pcregrep [-");
01555 for (op = optionlist; op->one_char != 0; op++)
01556   {
01557   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
01558   }
01559 fprintf(stderr, "] [long options] [pattern] [files]\n");
01560 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
01561   "options.\n");
01562 return rc;
01563 }
01564 
01565 
01566 
01567 
01568 /*************************************************
01569 *                Help function                   *
01570 *************************************************/
01571 
01572 static void
01573 help(void)
01574 {
01575 option_item *op;
01576 
01577 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
01578 printf("Search for PATTERN in each FILE or standard input.\n");
01579 printf("PATTERN must be present if neither -e nor -f is used.\n");
01580 printf("\"-\" can be used as a file name to mean STDIN.\n");
01581 
01582 #ifdef SUPPORT_LIBZ
01583 printf("Files whose names end in .gz are read using zlib.\n");
01584 #endif
01585 
01586 #ifdef SUPPORT_LIBBZ2
01587 printf("Files whose names end in .bz2 are read using bzlib2.\n");
01588 #endif
01589 
01590 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
01591 printf("Other files and the standard input are read as plain files.\n\n");
01592 #else
01593 printf("All files are read as plain files, without any interpretation.\n\n");
01594 #endif
01595 
01596 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
01597 printf("Options:\n");
01598 
01599 for (op = optionlist; op->one_char != 0; op++)
01600   {
01601   int n;
01602   char s[4];
01603   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
01604   n = 30 - printf("  %s --%s", s, op->long_name);
01605   if (n < 1) n = 1;
01606   printf("%.*s%s\n", n, "                    ", op->help_text);
01607   }
01608 
01609 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
01610 printf("trailing white space is removed and blank lines are ignored.\n");
01611 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
01612 
01613 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
01614 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
01615 }
01616 
01617 
01618 
01619 
01620 /*************************************************
01621 *    Handle a single-letter, no data option      *
01622 *************************************************/
01623 
01624 static int
01625 handle_option(int letter, int options)
01626 {
01627 switch(letter)
01628   {
01629   case N_FOFFSETS: file_offsets = TRUE; break;
01630   case N_HELP: help(); exit(0);
01631   case N_LOFFSETS: line_offsets = number = TRUE; break;
01632   case 'c': count_only = TRUE; break;
01633   case 'F': process_options |= PO_FIXED_STRINGS; break;
01634   case 'H': filenames = FN_FORCE; break;
01635   case 'h': filenames = FN_NONE; break;
01636   case 'i': options |= PCRE_CASELESS; break;
01637   case 'l': filenames = FN_ONLY; break;
01638   case 'L': filenames = FN_NOMATCH_ONLY; break;
01639   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
01640   case 'n': number = TRUE; break;
01641   case 'o': only_matching = TRUE; break;
01642   case 'q': quiet = TRUE; break;
01643   case 'r': dee_action = dee_RECURSE; break;
01644   case 's': silent = TRUE; break;
01645   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
01646   case 'v': invert = TRUE; break;
01647   case 'w': process_options |= PO_WORD_MATCH; break;
01648   case 'x': process_options |= PO_LINE_MATCH; break;
01649 
01650   case 'V':
01651   fprintf(stderr, "pcregrep version %s\n", pcre_version());
01652   exit(0);
01653   break;
01654 
01655   default:
01656   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
01657   exit(usage(2));
01658   }
01659 
01660 return options;
01661 }
01662 
01663 
01664 
01665 
01666 /*************************************************
01667 *          Construct printed ordinal             *
01668 *************************************************/
01669 
01670 /* This turns a number into "1st", "3rd", etc. */
01671 
01672 static char *
01673 ordin(int n)
01674 {
01675 static char buffer[8];
01676 char *p = buffer;
01677 sprintf(p, "%d", n);
01678 while (*p != 0) p++;
01679 switch (n%10)
01680   {
01681   case 1: strcpy(p, "st"); break;
01682   case 2: strcpy(p, "nd"); break;
01683   case 3: strcpy(p, "rd"); break;
01684   default: strcpy(p, "th"); break;
01685   }
01686 return buffer;
01687 }
01688 
01689 
01690 
01691 /*************************************************
01692 *          Compile a single pattern              *
01693 *************************************************/
01694 
01695 /* When the -F option has been used, this is called for each substring.
01696 Otherwise it's called for each supplied pattern.
01697 
01698 Arguments:
01699   pattern        the pattern string
01700   options        the PCRE options
01701   filename       the file name, or NULL for a command-line pattern
01702   count          0 if this is the only command line pattern, or
01703                  number of the command line pattern, or
01704                  linenumber for a pattern from a file
01705 
01706 Returns:         TRUE on success, FALSE after an error
01707 */
01708 
01709 static BOOL
01710 compile_single_pattern(char *pattern, int options, char *filename, int count)
01711 {
01712 char buffer[MBUFTHIRD + 16];
01713 const char *error;
01714 int errptr;
01715 
01716 if (pattern_count >= MAX_PATTERN_COUNT)
01717   {
01718   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
01719     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
01720   return FALSE;
01721   }
01722 
01723 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
01724   suffix[process_options]);
01725 pattern_list[pattern_count] =
01726   pcre_compile(buffer, options, &error, &errptr, pcretables);
01727 if (pattern_list[pattern_count] != NULL)
01728   {
01729   pattern_count++;
01730   return TRUE;
01731   }
01732 
01733 /* Handle compile errors */
01734 
01735 errptr -= (int)strlen(prefix[process_options]);
01736 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
01737 
01738 if (filename == NULL)
01739   {
01740   if (count == 0)
01741     fprintf(stderr, "pcregrep: Error in command-line regex "
01742       "at offset %d: %s\n", errptr, error);
01743   else
01744     fprintf(stderr, "pcregrep: Error in %s command-line regex "
01745       "at offset %d: %s\n", ordin(count), errptr, error);
01746   }
01747 else
01748   {
01749   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
01750     "at offset %d: %s\n", count, filename, errptr, error);
01751   }
01752 
01753 return FALSE;
01754 }
01755 
01756 
01757 
01758 /*************************************************
01759 *           Compile one supplied pattern         *
01760 *************************************************/
01761 
01762 /* When the -F option has been used, each string may be a list of strings,
01763 separated by line breaks. They will be matched literally.
01764 
01765 Arguments:
01766   pattern        the pattern string
01767   options        the PCRE options
01768   filename       the file name, or NULL for a command-line pattern
01769   count          0 if this is the only command line pattern, or
01770                  number of the command line pattern, or
01771                  linenumber for a pattern from a file
01772 
01773 Returns:         TRUE on success, FALSE after an error
01774 */
01775 
01776 static BOOL
01777 compile_pattern(char *pattern, int options, char *filename, int count)
01778 {
01779 if ((process_options & PO_FIXED_STRINGS) != 0)
01780   {
01781   char *eop = pattern + strlen(pattern);
01782   char buffer[MBUFTHIRD];
01783   for(;;)
01784     {
01785     int ellength;
01786     char *p = end_of_line(pattern, eop, &ellength);
01787     if (ellength == 0)
01788       return compile_single_pattern(pattern, options, filename, count);
01789     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
01790     pattern = p;
01791     if (!compile_single_pattern(buffer, options, filename, count))
01792       return FALSE;
01793     }
01794   }
01795 else return compile_single_pattern(pattern, options, filename, count);
01796 }
01797 
01798 
01799 
01800 /*************************************************
01801 *                Main program                    *
01802 *************************************************/
01803 
01804 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
01805 
01806 int
01807 main(int argc, char **argv)
01808 {
01809 int i, j;
01810 int rc = 1;
01811 int pcre_options = 0;
01812 int cmd_pattern_count = 0;
01813 int hint_count = 0;
01814 int errptr;
01815 BOOL only_one_at_top;
01816 char *patterns[MAX_PATTERN_COUNT];
01817 const char *locale_from = "--locale";
01818 const char *error;
01819 
01820 /* Set the default line ending value from the default in the PCRE library;
01821 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
01822 */
01823 
01824 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
01825 switch(i)
01826   {
01827   default:                 newline = (char *)"lf"; break;
01828   case '\r':               newline = (char *)"cr"; break;
01829   case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
01830   case -1:                 newline = (char *)"any"; break;
01831   case -2:                 newline = (char *)"anycrlf"; break;
01832   }
01833 
01834 /* Process the options */
01835 
01836 for (i = 1; i < argc; i++)
01837   {
01838   option_item *op = NULL;
01839   char *option_data = (char *)"";    /* default to keep compiler happy */
01840   BOOL longop;
01841   BOOL longopwasequals = FALSE;
01842 
01843   if (argv[i][0] != '-') break;
01844 
01845   /* If we hit an argument that is just "-", it may be a reference to STDIN,
01846   but only if we have previously had -e or -f to define the patterns. */
01847 
01848   if (argv[i][1] == 0)
01849     {
01850     if (pattern_filename != NULL || pattern_count > 0) break;
01851       else exit(usage(2));
01852     }
01853 
01854   /* Handle a long name option, or -- to terminate the options */
01855 
01856   if (argv[i][1] == '-')
01857     {
01858     char *arg = argv[i] + 2;
01859     char *argequals = strchr(arg, '=');
01860 
01861     if (*arg == 0)    /* -- terminates options */
01862       {
01863       i++;
01864       break;                /* out of the options-handling loop */
01865       }
01866 
01867     longop = TRUE;
01868 
01869     /* Some long options have data that follows after =, for example file=name.
01870     Some options have variations in the long name spelling: specifically, we
01871     allow "regexp" because GNU grep allows it, though I personally go along
01872     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
01873     These options are entered in the table as "regex(p)". No option is in both
01874     these categories, fortunately. */
01875 
01876     for (op = optionlist; op->one_char != 0; op++)
01877       {
01878       char *opbra = strchr(op->long_name, '(');
01879       char *equals = strchr(op->long_name, '=');
01880       if (opbra == NULL)     /* Not a (p) case */
01881         {
01882         if (equals == NULL)  /* Not thing=data case */
01883           {
01884           if (strcmp(arg, op->long_name) == 0) break;
01885           }
01886         else                 /* Special case xxx=data */
01887           {
01888           int oplen = equals - op->long_name;
01889           int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
01890           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
01891             {
01892             option_data = arg + arglen;
01893             if (*option_data == '=')
01894               {
01895               option_data++;
01896               longopwasequals = TRUE;
01897               }
01898             break;
01899             }
01900           }
01901         }
01902       else                   /* Special case xxxx(p) */
01903         {
01904         char buff1[24];
01905         char buff2[24];
01906         int baselen = opbra - op->long_name;
01907         sprintf(buff1, "%.*s", baselen, op->long_name);
01908         sprintf(buff2, "%s%.*s", buff1,
01909           (int)strlen(op->long_name) - baselen - 2, opbra + 1);
01910         if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
01911           break;
01912         }
01913       }
01914 
01915     if (op->one_char == 0)
01916       {
01917       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
01918       exit(usage(2));
01919       }
01920     }
01921 
01922 
01923   /* Jeffrey Friedl's debugging harness uses these additional options which
01924   are not in the right form for putting in the option table because they use
01925   only one hyphen, yet are more than one character long. By putting them
01926   separately here, they will not get displayed as part of the help() output,
01927   but I don't think Jeffrey will care about that. */
01928 
01929 #ifdef JFRIEDL_DEBUG
01930   else if (strcmp(argv[i], "-pre") == 0) {
01931           jfriedl_prefix = argv[++i];
01932           continue;
01933   } else if (strcmp(argv[i], "-post") == 0) {
01934           jfriedl_postfix = argv[++i];
01935           continue;
01936   } else if (strcmp(argv[i], "-XT") == 0) {
01937           sscanf(argv[++i], "%d", &jfriedl_XT);
01938           continue;
01939   } else if (strcmp(argv[i], "-XR") == 0) {
01940           sscanf(argv[++i], "%d", &jfriedl_XR);
01941           continue;
01942   }
01943 #endif
01944 
01945 
01946   /* One-char options; many that have no data may be in a single argument; we
01947   continue till we hit the last one or one that needs data. */
01948 
01949   else
01950     {
01951     char *s = argv[i] + 1;
01952     longop = FALSE;
01953     while (*s != 0)
01954       {
01955       for (op = optionlist; op->one_char != 0; op++)
01956         { if (*s == op->one_char) break; }
01957       if (op->one_char == 0)
01958         {
01959         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
01960           *s, argv[i]);
01961         exit(usage(2));
01962         }
01963       if (op->type != OP_NODATA || s[1] == 0)
01964         {
01965         option_data = s+1;
01966         break;
01967         }
01968       pcre_options = handle_option(*s++, pcre_options);
01969       }
01970     }
01971 
01972   /* At this point we should have op pointing to a matched option. If the type
01973   is NO_DATA, it means that there is no data, and the option might set
01974   something in the PCRE options. */
01975 
01976   if (op->type == OP_NODATA)
01977     {
01978     pcre_options = handle_option(op->one_char, pcre_options);
01979     continue;
01980     }
01981 
01982   /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
01983   either has a value or defaults to something. It cannot have data in a
01984   separate item. At the moment, the only such options are "colo(u)r" and
01985   Jeffrey Friedl's special -S debugging option. */
01986 
01987   if (*option_data == 0 &&
01988       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
01989     {
01990     switch (op->one_char)
01991       {
01992       case N_COLOUR:
01993       colour_option = (char *)"auto";
01994       break;
01995 #ifdef JFRIEDL_DEBUG
01996       case 'S':
01997       S_arg = 0;
01998       break;
01999 #endif
02000       }
02001     continue;
02002     }
02003 
02004   /* Otherwise, find the data string for the option. */
02005 
02006   if (*option_data == 0)
02007     {
02008     if (i >= argc - 1 || longopwasequals)
02009       {
02010       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
02011       exit(usage(2));
02012       }
02013     option_data = argv[++i];
02014     }
02015 
02016   /* If the option type is OP_PATLIST, it's the -e option, which can be called
02017   multiple times to create a list of patterns. */
02018 
02019   if (op->type == OP_PATLIST)
02020     {
02021     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
02022       {
02023       fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
02024         MAX_PATTERN_COUNT);
02025       return 2;
02026       }
02027     patterns[cmd_pattern_count++] = option_data;
02028     }
02029 
02030   /* Otherwise, deal with single string or numeric data values. */
02031 
02032   else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
02033     {
02034     *((char **)op->dataptr) = option_data;
02035     }
02036   else
02037     {
02038     char *endptr;
02039     int n = strtoul(option_data, &endptr, 10);
02040     if (*endptr != 0)
02041       {
02042       if (longop)
02043         {
02044         char *equals = strchr(op->long_name, '=');
02045         int nlen = (equals == NULL)? (int)strlen(op->long_name) :
02046           equals - op->long_name;
02047         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
02048           option_data, nlen, op->long_name);
02049         }
02050       else
02051         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
02052           option_data, op->one_char);
02053       exit(usage(2));
02054       }
02055     *((int *)op->dataptr) = n;
02056     }
02057   }
02058 
02059 /* Options have been decoded. If -C was used, its value is used as a default
02060 for -A and -B. */
02061 
02062 if (both_context > 0)
02063   {
02064   if (after_context == 0) after_context = both_context;
02065   if (before_context == 0) before_context = both_context;
02066   }
02067 
02068 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
02069 However, the latter two set the only_matching flag. */
02070 
02071 if ((only_matching && (file_offsets || line_offsets)) ||
02072     (file_offsets && line_offsets))
02073   {
02074   fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
02075     "and/or --line-offsets\n");
02076   exit(usage(2));
02077   }
02078 
02079 if (file_offsets || line_offsets) only_matching = TRUE;
02080 
02081 /* If a locale has not been provided as an option, see if the LC_CTYPE or
02082 LC_ALL environment variable is set, and if so, use it. */
02083 
02084 if (locale == NULL)
02085   {
02086   locale = getenv("LC_ALL");
02087   locale_from = "LCC_ALL";
02088   }
02089 
02090 if (locale == NULL)
02091   {
02092   locale = getenv("LC_CTYPE");
02093   locale_from = "LC_CTYPE";
02094   }
02095 
02096 /* If a locale has been provided, set it, and generate the tables the PCRE
02097 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
02098 
02099 if (locale != NULL)
02100   {
02101   if (setlocale(LC_CTYPE, locale) == NULL)
02102     {
02103     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
02104       locale, locale_from);
02105     return 2;
02106     }
02107   pcretables = pcre_maketables();
02108   }
02109 
02110 /* Sort out colouring */
02111 
02112 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
02113   {
02114   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
02115   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
02116   else
02117     {
02118     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
02119       colour_option);
02120     return 2;
02121     }
02122   if (do_colour)
02123     {
02124     char *cs = getenv("PCREGREP_COLOUR");
02125     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
02126     if (cs != NULL) colour_string = cs;
02127     }
02128   }
02129 
02130 /* Interpret the newline type; the default settings are Unix-like. */
02131 
02132 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
02133   {
02134   pcre_options |= PCRE_NEWLINE_CR;
02135   endlinetype = EL_CR;
02136   }
02137 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
02138   {
02139   pcre_options |= PCRE_NEWLINE_LF;
02140   endlinetype = EL_LF;
02141   }
02142 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
02143   {
02144   pcre_options |= PCRE_NEWLINE_CRLF;
02145   endlinetype = EL_CRLF;
02146   }
02147 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
02148   {
02149   pcre_options |= PCRE_NEWLINE_ANY;
02150   endlinetype = EL_ANY;
02151   }
02152 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
02153   {
02154   pcre_options |= PCRE_NEWLINE_ANYCRLF;
02155   endlinetype = EL_ANYCRLF;
02156   }
02157 else
02158   {
02159   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
02160   return 2;
02161   }
02162 
02163 /* Interpret the text values for -d and -D */
02164 
02165 if (dee_option != NULL)
02166   {
02167   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
02168   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
02169   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
02170   else
02171     {
02172     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
02173     return 2;
02174     }
02175   }
02176 
02177 if (DEE_option != NULL)
02178   {
02179   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
02180   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
02181   else
02182     {
02183     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
02184     return 2;
02185     }
02186   }
02187 
02188 /* Check the values for Jeffrey Friedl's debugging options. */
02189 
02190 #ifdef JFRIEDL_DEBUG
02191 if (S_arg > 9)
02192   {
02193   fprintf(stderr, "pcregrep: bad value for -S option\n");
02194   return 2;
02195   }
02196 if (jfriedl_XT != 0 || jfriedl_XR != 0)
02197   {
02198   if (jfriedl_XT == 0) jfriedl_XT = 1;
02199   if (jfriedl_XR == 0) jfriedl_XR = 1;
02200   }
02201 #endif
02202 
02203 /* Get memory to store the pattern and hints lists. */
02204 
02205 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
02206 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
02207 
02208 if (pattern_list == NULL || hints_list == NULL)
02209   {
02210   fprintf(stderr, "pcregrep: malloc failed\n");
02211   goto EXIT2;
02212   }
02213 
02214 /* If no patterns were provided by -e, and there is no file provided by -f,
02215 the first argument is the one and only pattern, and it must exist. */
02216 
02217 if (cmd_pattern_count == 0 && pattern_filename == NULL)
02218   {
02219   if (i >= argc) return usage(2);
02220   patterns[cmd_pattern_count++] = argv[i++];
02221   }
02222 
02223 /* Compile the patterns that were provided on the command line, either by
02224 multiple uses of -e or as a single unkeyed pattern. */
02225 
02226 for (j = 0; j < cmd_pattern_count; j++)
02227   {
02228   if (!compile_pattern(patterns[j], pcre_options, NULL,
02229        (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
02230     goto EXIT2;
02231   }
02232 
02233 /* Compile the regular expressions that are provided in a file. */
02234 
02235 if (pattern_filename != NULL)
02236   {
02237   int linenumber = 0;
02238   FILE *f;
02239   char *filename;
02240   char buffer[MBUFTHIRD];
02241 
02242   if (strcmp(pattern_filename, "-") == 0)
02243     {
02244     f = stdin;
02245     filename = stdin_name;
02246     }
02247   else
02248     {
02249     f = fopen(pattern_filename, "r");
02250     if (f == NULL)
02251       {
02252       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
02253         strerror(errno));
02254       goto EXIT2;
02255       }
02256     filename = pattern_filename;
02257     }
02258 
02259   while (fgets(buffer, MBUFTHIRD, f) != NULL)
02260     {
02261     char *s = buffer + (int)strlen(buffer);
02262     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
02263     *s = 0;
02264     linenumber++;
02265     if (buffer[0] == 0) continue;   /* Skip blank lines */
02266     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
02267       goto EXIT2;
02268     }
02269 
02270   if (f != stdin) fclose(f);
02271   }
02272 
02273 /* Study the regular expressions, as we will be running them many times */
02274 
02275 for (j = 0; j < pattern_count; j++)
02276   {
02277   hints_list[j] = pcre_study(pattern_list[j], 0, &error);
02278   if (error != NULL)
02279     {
02280     char s[16];
02281     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
02282     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
02283     goto EXIT2;
02284     }
02285   hint_count++;
02286   }
02287 
02288 /* If there are include or exclude patterns, compile them. */
02289 
02290 if (exclude_pattern != NULL)
02291   {
02292   exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
02293     pcretables);
02294   if (exclude_compiled == NULL)
02295     {
02296     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
02297       errptr, error);
02298     goto EXIT2;
02299     }
02300   }
02301 
02302 if (include_pattern != NULL)
02303   {
02304   include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
02305     pcretables);
02306   if (include_compiled == NULL)
02307     {
02308     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
02309       errptr, error);
02310     goto EXIT2;
02311     }
02312   }
02313 
02314 if (exclude_dir_pattern != NULL)
02315   {
02316   exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
02317     pcretables);
02318   if (exclude_dir_compiled == NULL)
02319     {
02320     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
02321       errptr, error);
02322     goto EXIT2;
02323     }
02324   }
02325 
02326 if (include_dir_pattern != NULL)
02327   {
02328   include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
02329     pcretables);
02330   if (include_dir_compiled == NULL)
02331     {
02332     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
02333       errptr, error);
02334     goto EXIT2;
02335     }
02336   }
02337 
02338 /* If there are no further arguments, do the business on stdin and exit. */
02339 
02340 if (i >= argc)
02341   {
02342   rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
02343   goto EXIT;
02344   }
02345 
02346 /* Otherwise, work through the remaining arguments as files or directories.
02347 Pass in the fact that there is only one argument at top level - this suppresses
02348 the file name if the argument is not a directory and filenames are not
02349 otherwise forced. */
02350 
02351 only_one_at_top = i == argc - 1;   /* Catch initial value of i */
02352 
02353 for (; i < argc; i++)
02354   {
02355   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
02356     only_one_at_top);
02357   if (frc > 1) rc = frc;
02358     else if (frc == 0 && rc == 1) rc = 0;
02359   }
02360 
02361 EXIT:
02362 if (pattern_list != NULL)
02363   {
02364   for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
02365   free(pattern_list);
02366   }
02367 if (hints_list != NULL)
02368   {
02369   for (i = 0; i < hint_count; i++) free(hints_list[i]);
02370   free(hints_list);
02371   }
02372 return rc;
02373 
02374 EXIT2:
02375 rc = 2;
02376 goto EXIT;
02377 }
02378 
02379 /* End of pcregrep */

Generated on Tue Jul 5 14:11:58 2011 for ROOT_528-00b_version by  doxygen 1.5.1