00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #ifdef HAVE_CONFIG_H
00041 #include "config.h"
00042 #endif
00043
00044 #include <ctype.h>
00045 #include <locale.h>
00046 #include <stdio.h>
00047 #include <string.h>
00048 #include <stdlib.h>
00049 #include <errno.h>
00050
00051 #include <sys/types.h>
00052 #include <sys/stat.h>
00053
00054 #ifdef HAVE_UNISTD_H
00055 #include <unistd.h>
00056 #endif
00057
00058 #ifdef SUPPORT_LIBZ
00059 #include <zlib.h>
00060 #endif
00061
00062 #ifdef SUPPORT_LIBBZ2
00063 #include <bzlib.h>
00064 #endif
00065
00066 #include "pcre.h"
00067
00068 #define FALSE 0
00069 #define TRUE 1
00070
00071 typedef int BOOL;
00072
00073 #define MAX_PATTERN_COUNT 100
00074
00075 #if BUFSIZ > 8192
00076 #define MBUFTHIRD BUFSIZ
00077 #else
00078 #define MBUFTHIRD 8192
00079 #endif
00080
00081
00082
00083
00084
00085 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
00086
00087
00088
00089 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
00090
00091
00092
00093 enum { dee_READ, dee_SKIP, dee_RECURSE };
00094 enum { DEE_READ, DEE_SKIP };
00095
00096
00097
00098 #define PO_WORD_MATCH 0x0001
00099 #define PO_LINE_MATCH 0x0002
00100 #define PO_FIXED_STRINGS 0x0004
00101
00102
00103
00104 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115 #ifdef JFRIEDL_DEBUG
00116 static int S_arg = -1;
00117 static unsigned int jfriedl_XR = 0;
00118 static unsigned int jfriedl_XT = 0;
00119 static const char *jfriedl_prefix = "";
00120 static const char *jfriedl_postfix = "";
00121 #endif
00122
00123 static int endlinetype;
00124
00125 static char *colour_string = (char *)"1;31";
00126 static char *colour_option = NULL;
00127 static char *dee_option = NULL;
00128 static char *DEE_option = NULL;
00129 static char *newline = NULL;
00130 static char *pattern_filename = NULL;
00131 static char *stdin_name = (char *)"(standard input)";
00132 static char *locale = NULL;
00133
00134 static const unsigned char *pcretables = NULL;
00135
00136 static int pattern_count = 0;
00137 static pcre **pattern_list = NULL;
00138 static pcre_extra **hints_list = NULL;
00139
00140 static char *include_pattern = NULL;
00141 static char *exclude_pattern = NULL;
00142 static char *include_dir_pattern = NULL;
00143 static char *exclude_dir_pattern = NULL;
00144
00145 static pcre *include_compiled = NULL;
00146 static pcre *exclude_compiled = NULL;
00147 static pcre *include_dir_compiled = NULL;
00148 static pcre *exclude_dir_compiled = NULL;
00149
00150 static int after_context = 0;
00151 static int before_context = 0;
00152 static int both_context = 0;
00153 static int dee_action = dee_READ;
00154 static int DEE_action = DEE_READ;
00155 static int error_count = 0;
00156 static int filenames = FN_DEFAULT;
00157 static int process_options = 0;
00158
00159 static BOOL count_only = FALSE;
00160 static BOOL do_colour = FALSE;
00161 static BOOL file_offsets = FALSE;
00162 static BOOL hyphenpending = FALSE;
00163 static BOOL invert = FALSE;
00164 static BOOL line_offsets = FALSE;
00165 static BOOL multiline = FALSE;
00166 static BOOL number = FALSE;
00167 static BOOL only_matching = FALSE;
00168 static BOOL quiet = FALSE;
00169 static BOOL silent = FALSE;
00170 static BOOL utf8 = FALSE;
00171
00172
00173
00174 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
00175 OP_PATLIST };
00176
00177 typedef struct option_item {
00178 int type;
00179 int one_char;
00180 void *dataptr;
00181 const char *long_name;
00182 const char *help_text;
00183 } option_item;
00184
00185
00186
00187
00188 #define N_COLOUR (-1)
00189 #define N_EXCLUDE (-2)
00190 #define N_EXCLUDE_DIR (-3)
00191 #define N_HELP (-4)
00192 #define N_INCLUDE (-5)
00193 #define N_INCLUDE_DIR (-6)
00194 #define N_LABEL (-7)
00195 #define N_LOCALE (-8)
00196 #define N_NULL (-9)
00197 #define N_LOFFSETS (-10)
00198 #define N_FOFFSETS (-11)
00199
00200 static option_item optionlist[] = {
00201 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
00202 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
00203 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
00204 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
00205 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
00206 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
00207 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
00208 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
00209 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
00210 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
00211 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
00212 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
00213 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
00214 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
00215 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
00216 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
00217 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
00218 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
00219 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
00220 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
00221 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
00222 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
00223 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
00224 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
00225 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
00226 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
00227 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
00228 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
00229 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
00230 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
00231 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
00232 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
00233 #ifdef JFRIEDL_DEBUG
00234 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
00235 #endif
00236 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
00237 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
00238 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
00239 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
00240 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
00241 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
00242 { OP_NODATA, 0, NULL, NULL, NULL }
00243 };
00244
00245
00246
00247
00248
00249
00250 static const char *prefix[] = {
00251 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
00252
00253 static const char *suffix[] = {
00254 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
00255
00256
00257
00258 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
00259
00260 const char utf8_table4[] = {
00261 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00262 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
00263 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
00264 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
00279 #include <sys/types.h>
00280 #include <sys/stat.h>
00281 #include <dirent.h>
00282
00283 typedef DIR directory_type;
00284
00285 static int
00286 isdirectory(char *filename)
00287 {
00288 struct stat statbuf;
00289 if (stat(filename, &statbuf) < 0)
00290 return 0;
00291 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
00292 }
00293
00294 static directory_type *
00295 opendirectory(char *filename)
00296 {
00297 return opendir(filename);
00298 }
00299
00300 static char *
00301 readdirectory(directory_type *dir)
00302 {
00303 for (;;)
00304 {
00305 struct dirent *dent = readdir(dir);
00306 if (dent == NULL) return NULL;
00307 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
00308 return dent->d_name;
00309 }
00310
00311 }
00312
00313 static void
00314 closedirectory(directory_type *dir)
00315 {
00316 closedir(dir);
00317 }
00318
00319
00320
00321
00322 static int
00323 isregfile(char *filename)
00324 {
00325 struct stat statbuf;
00326 if (stat(filename, &statbuf) < 0)
00327 return 1;
00328 return (statbuf.st_mode & S_IFMT) == S_IFREG;
00329 }
00330
00331
00332
00333
00334 static BOOL
00335 is_stdout_tty(void)
00336 {
00337 return isatty(fileno(stdout));
00338 }
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349 #elif HAVE_WINDOWS_H
00350
00351 #ifndef STRICT
00352 # define STRICT
00353 #endif
00354 #ifndef WIN32_LEAN_AND_MEAN
00355 # define WIN32_LEAN_AND_MEAN
00356 #endif
00357
00358 #include <windows.h>
00359
00360 #ifndef INVALID_FILE_ATTRIBUTES
00361 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
00362 #endif
00363
00364 typedef struct directory_type
00365 {
00366 HANDLE handle;
00367 BOOL first;
00368 WIN32_FIND_DATA data;
00369 } directory_type;
00370
00371 int
00372 isdirectory(char *filename)
00373 {
00374 DWORD attr = GetFileAttributes(filename);
00375 if (attr == INVALID_FILE_ATTRIBUTES)
00376 return 0;
00377 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
00378 }
00379
00380 directory_type *
00381 opendirectory(char *filename)
00382 {
00383 size_t len;
00384 char *pattern;
00385 directory_type *dir;
00386 DWORD err;
00387 len = strlen(filename);
00388 pattern = (char *) malloc(len + 3);
00389 dir = (directory_type *) malloc(sizeof(*dir));
00390 if ((pattern == NULL) || (dir == NULL))
00391 {
00392 fprintf(stderr, "pcregrep: malloc failed\n");
00393 exit(2);
00394 }
00395 memcpy(pattern, filename, len);
00396 memcpy(&(pattern[len]), "\\*", 3);
00397 dir->handle = FindFirstFile(pattern, &(dir->data));
00398 if (dir->handle != INVALID_HANDLE_VALUE)
00399 {
00400 free(pattern);
00401 dir->first = TRUE;
00402 return dir;
00403 }
00404 err = GetLastError();
00405 free(pattern);
00406 free(dir);
00407 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
00408 return NULL;
00409 }
00410
00411 char *
00412 readdirectory(directory_type *dir)
00413 {
00414 for (;;)
00415 {
00416 if (!dir->first)
00417 {
00418 if (!FindNextFile(dir->handle, &(dir->data)))
00419 return NULL;
00420 }
00421 else
00422 {
00423 dir->first = FALSE;
00424 }
00425 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
00426 return dir->data.cFileName;
00427 }
00428 #ifndef _MSC_VER
00429 return NULL;
00430 #endif
00431 }
00432
00433 void
00434 closedirectory(directory_type *dir)
00435 {
00436 FindClose(dir->handle);
00437 free(dir);
00438 }
00439
00440
00441
00442
00443
00444
00445
00446 int isregfile(char *filename)
00447 {
00448 return !isdirectory(filename);
00449 }
00450
00451
00452
00453
00454
00455
00456 static BOOL
00457 is_stdout_tty(void)
00458 {
00459 return FALSE;
00460 }
00461
00462
00463
00464
00465
00466
00467 #else
00468
00469 typedef void directory_type;
00470
00471 int isdirectory(char *filename) { return 0; }
00472 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
00473 char *readdirectory(directory_type *dir) { return (char*)0;}
00474 void closedirectory(directory_type *dir) {}
00475
00476
00477
00478
00479
00480
00481 int isregfile(char *filename) { return 1; }
00482
00483
00484
00485
00486 static BOOL
00487 is_stdout_tty(void)
00488 {
00489 return FALSE;
00490 }
00491
00492
00493 #endif
00494
00495
00496
00497 #ifndef HAVE_STRERROR
00498
00499
00500
00501
00502
00503
00504
00505
00506 extern int sys_nerr;
00507 extern char *sys_errlist[];
00508
00509 char *
00510 strerror(int n)
00511 {
00512 if (n < 0 || n >= sys_nerr) return "unknown error number";
00513 return sys_errlist[n];
00514 }
00515 #endif
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534 static char *
00535 end_of_line(char *p, char *endptr, int *lenptr)
00536 {
00537 switch(endlinetype)
00538 {
00539 default:
00540 case EL_LF:
00541 while (p < endptr && *p != '\n') p++;
00542 if (p < endptr)
00543 {
00544 *lenptr = 1;
00545 return p + 1;
00546 }
00547 *lenptr = 0;
00548 return endptr;
00549
00550 case EL_CR:
00551 while (p < endptr && *p != '\r') p++;
00552 if (p < endptr)
00553 {
00554 *lenptr = 1;
00555 return p + 1;
00556 }
00557 *lenptr = 0;
00558 return endptr;
00559
00560 case EL_CRLF:
00561 for (;;)
00562 {
00563 while (p < endptr && *p != '\r') p++;
00564 if (++p >= endptr)
00565 {
00566 *lenptr = 0;
00567 return endptr;
00568 }
00569 if (*p == '\n')
00570 {
00571 *lenptr = 2;
00572 return p + 1;
00573 }
00574 }
00575 break;
00576
00577 case EL_ANYCRLF:
00578 while (p < endptr)
00579 {
00580 int extra = 0;
00581 register int c = *((unsigned char *)p);
00582
00583 if (utf8 && c >= 0xc0)
00584 {
00585 int gcii, gcss;
00586 extra = utf8_table4[c & 0x3f];
00587 gcss = 6*extra;
00588 c = (c & utf8_table3[extra]) << gcss;
00589 for (gcii = 1; gcii <= extra; gcii++)
00590 {
00591 gcss -= 6;
00592 c |= (p[gcii] & 0x3f) << gcss;
00593 }
00594 }
00595
00596 p += 1 + extra;
00597
00598 switch (c)
00599 {
00600 case 0x0a:
00601 *lenptr = 1;
00602 return p;
00603
00604 case 0x0d:
00605 if (p < endptr && *p == 0x0a)
00606 {
00607 *lenptr = 2;
00608 p++;
00609 }
00610 else *lenptr = 1;
00611 return p;
00612
00613 default:
00614 break;
00615 }
00616 }
00617
00618 *lenptr = 0;
00619 return endptr;
00620
00621 case EL_ANY:
00622 while (p < endptr)
00623 {
00624 int extra = 0;
00625 register int c = *((unsigned char *)p);
00626
00627 if (utf8 && c >= 0xc0)
00628 {
00629 int gcii, gcss;
00630 extra = utf8_table4[c & 0x3f];
00631 gcss = 6*extra;
00632 c = (c & utf8_table3[extra]) << gcss;
00633 for (gcii = 1; gcii <= extra; gcii++)
00634 {
00635 gcss -= 6;
00636 c |= (p[gcii] & 0x3f) << gcss;
00637 }
00638 }
00639
00640 p += 1 + extra;
00641
00642 switch (c)
00643 {
00644 case 0x0a:
00645 case 0x0b:
00646 case 0x0c:
00647 *lenptr = 1;
00648 return p;
00649
00650 case 0x0d:
00651 if (p < endptr && *p == 0x0a)
00652 {
00653 *lenptr = 2;
00654 p++;
00655 }
00656 else *lenptr = 1;
00657 return p;
00658
00659 case 0x85:
00660 *lenptr = utf8? 2 : 1;
00661 return p;
00662
00663 case 0x2028:
00664 case 0x2029:
00665 *lenptr = 3;
00666 return p;
00667
00668 default:
00669 break;
00670 }
00671 }
00672
00673 *lenptr = 0;
00674 return endptr;
00675 }
00676 }
00677
00678
00679
00680
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693 static char *
00694 previous_line(char *p, char *startptr)
00695 {
00696 switch(endlinetype)
00697 {
00698 default:
00699 case EL_LF:
00700 p--;
00701 while (p > startptr && p[-1] != '\n') p--;
00702 return p;
00703
00704 case EL_CR:
00705 p--;
00706 while (p > startptr && p[-1] != '\n') p--;
00707 return p;
00708
00709 case EL_CRLF:
00710 for (;;)
00711 {
00712 p -= 2;
00713 while (p > startptr && p[-1] != '\n') p--;
00714 if (p <= startptr + 1 || p[-2] == '\r') return p;
00715 }
00716 return p;
00717
00718 case EL_ANY:
00719 case EL_ANYCRLF:
00720 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
00721 if (utf8) while ((*p & 0xc0) == 0x80) p--;
00722
00723 while (p > startptr)
00724 {
00725 register int c;
00726 char *pp = p - 1;
00727
00728 if (utf8)
00729 {
00730 int extra = 0;
00731 while ((*pp & 0xc0) == 0x80) pp--;
00732 c = *((unsigned char *)pp);
00733 if (c >= 0xc0)
00734 {
00735 int gcii, gcss;
00736 extra = utf8_table4[c & 0x3f];
00737 gcss = 6*extra;
00738 c = (c & utf8_table3[extra]) << gcss;
00739 for (gcii = 1; gcii <= extra; gcii++)
00740 {
00741 gcss -= 6;
00742 c |= (pp[gcii] & 0x3f) << gcss;
00743 }
00744 }
00745 }
00746 else c = *((unsigned char *)pp);
00747
00748 if (endlinetype == EL_ANYCRLF) switch (c)
00749 {
00750 case 0x0a:
00751 case 0x0d:
00752 return p;
00753
00754 default:
00755 break;
00756 }
00757
00758 else switch (c)
00759 {
00760 case 0x0a:
00761 case 0x0b:
00762 case 0x0c:
00763 case 0x0d:
00764 case 0x85:
00765 case 0x2028:
00766 case 0x2029:
00767 return p;
00768
00769 default:
00770 break;
00771 }
00772
00773 p = pp;
00774 }
00775
00776 return startptr;
00777 }
00778 }
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
00802 char *endptr, char *printname)
00803 {
00804 if (after_context > 0 && lastmatchnumber > 0)
00805 {
00806 int count = 0;
00807 while (lastmatchrestart < endptr && count++ < after_context)
00808 {
00809 int ellength;
00810 char *pp = lastmatchrestart;
00811 if (printname != NULL) fprintf(stdout, "%s-", printname);
00812 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
00813 pp = end_of_line(pp, endptr, &ellength);
00814 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
00815 lastmatchrestart = pp;
00816 }
00817 hyphenpending = TRUE;
00818 }
00819 }
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848 static int
00849 pcregrep(void *handle, int frtype, char *printname)
00850 {
00851 int rc = 1;
00852 int linenumber = 1;
00853 int lastmatchnumber = 0;
00854 int count = 0;
00855 int filepos = 0;
00856 int offsets[99];
00857 char *lastmatchrestart = NULL;
00858 char buffer[3*MBUFTHIRD];
00859 char *ptr = buffer;
00860 char *endptr;
00861 size_t bufflength;
00862 BOOL endhyphenpending = FALSE;
00863 FILE *in = NULL;
00864
00865 #ifdef SUPPORT_LIBZ
00866 gzFile ingz = NULL;
00867 #endif
00868
00869 #ifdef SUPPORT_LIBBZ2
00870 BZFILE *inbz2 = NULL;
00871 #endif
00872
00873
00874
00875
00876
00877
00878
00879 #ifdef SUPPORT_LIBZ
00880 if (frtype == FR_LIBZ)
00881 {
00882 ingz = (gzFile)handle;
00883 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
00884 }
00885 else
00886 #endif
00887
00888 #ifdef SUPPORT_LIBBZ2
00889 if (frtype == FR_LIBBZ2)
00890 {
00891 inbz2 = (BZFILE *)handle;
00892 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
00893 if ((int)bufflength < 0) return 2;
00894 }
00895 else
00896 #endif
00897
00898 {
00899 in = (FILE *)handle;
00900 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
00901 }
00902
00903 endptr = buffer + bufflength;
00904
00905
00906
00907
00908
00909
00910 while (ptr < endptr)
00911 {
00912 int i, endlinelength;
00913 int mrc = 0;
00914 BOOL match = FALSE;
00915 char *matchptr = ptr;
00916 char *t = ptr;
00917 size_t length, linelength;
00918
00919
00920
00921
00922
00923
00924
00925
00926 t = end_of_line(t, endptr, &endlinelength);
00927 linelength = t - ptr - endlinelength;
00928 length = multiline? (size_t)(endptr - ptr) : linelength;
00929
00930
00931
00932 #ifdef JFRIEDL_DEBUG
00933 if (jfriedl_XT || jfriedl_XR)
00934 {
00935 #include <sys/time.h>
00936 #include <time.h>
00937 struct timeval start_time, end_time;
00938 struct timezone dummy;
00939
00940 if (jfriedl_XT)
00941 {
00942 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
00943 const char *orig = ptr;
00944 ptr = malloc(newlen + 1);
00945 if (!ptr) {
00946 printf("out of memory");
00947 exit(2);
00948 }
00949 endptr = ptr;
00950 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
00951 for (i = 0; i < jfriedl_XT; i++) {
00952 strncpy(endptr, orig, length);
00953 endptr += length;
00954 }
00955 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
00956 length = newlen;
00957 }
00958
00959 if (gettimeofday(&start_time, &dummy) != 0)
00960 perror("bad gettimeofday");
00961
00962
00963 for (i = 0; i < jfriedl_XR; i++)
00964 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
00965
00966 if (gettimeofday(&end_time, &dummy) != 0)
00967 perror("bad gettimeofday");
00968
00969 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
00970 -
00971 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
00972
00973 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
00974 return 0;
00975 }
00976 #endif
00977
00978
00979
00980
00981 ONLY_MATCHING_RESTART:
00982
00983
00984
00985
00986 for (i = 0; i < pattern_count; i++)
00987 {
00988 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
00989 offsets, 99);
00990 if (mrc >= 0) { match = TRUE; break; }
00991 if (mrc != PCRE_ERROR_NOMATCH)
00992 {
00993 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
00994 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
00995 fprintf(stderr, "this line:\n");
00996 fwrite(matchptr, 1, linelength, stderr);
00997 fprintf(stderr, "\n");
00998 if (error_count == 0 &&
00999 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
01000 {
01001 fprintf(stderr, "pcregrep: error %d means that a resource limit "
01002 "was exceeded\n", mrc);
01003 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
01004 }
01005 if (error_count++ > 20)
01006 {
01007 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
01008 exit(2);
01009 }
01010 match = invert;
01011 break;
01012 }
01013 }
01014
01015
01016
01017 if (match != invert)
01018 {
01019 BOOL hyphenprinted = FALSE;
01020
01021
01022
01023 if (filenames == FN_NOMATCH_ONLY) return 1;
01024
01025
01026
01027 if (count_only) count++;
01028
01029
01030
01031
01032 else if (filenames == FN_ONLY)
01033 {
01034 fprintf(stdout, "%s\n", printname);
01035 return 0;
01036 }
01037
01038
01039
01040 else if (quiet) return 0;
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050 else if (only_matching)
01051 {
01052 if (!invert)
01053 {
01054 if (printname != NULL) fprintf(stdout, "%s:", printname);
01055 if (number) fprintf(stdout, "%d:", linenumber);
01056 if (line_offsets)
01057 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
01058 offsets[1] - offsets[0]);
01059 else if (file_offsets)
01060 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
01061 offsets[1] - offsets[0]);
01062 else
01063 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01064 fprintf(stdout, "\n");
01065 matchptr += offsets[1];
01066 length -= offsets[1];
01067 match = FALSE;
01068 goto ONLY_MATCHING_RESTART;
01069 }
01070 }
01071
01072
01073
01074
01075
01076 else
01077 {
01078
01079
01080
01081 if (after_context > 0 && lastmatchnumber > 0)
01082 {
01083 int ellength;
01084 int linecount = 0;
01085 char *p = lastmatchrestart;
01086
01087 while (p < ptr && linecount < after_context)
01088 {
01089 p = end_of_line(p, ptr, &ellength);
01090 linecount++;
01091 }
01092
01093
01094
01095
01096
01097 while (lastmatchrestart < p)
01098 {
01099 char *pp = lastmatchrestart;
01100 if (printname != NULL) fprintf(stdout, "%s-", printname);
01101 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
01102 pp = end_of_line(pp, endptr, &ellength);
01103 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
01104 lastmatchrestart = pp;
01105 }
01106 if (lastmatchrestart != ptr) hyphenpending = TRUE;
01107 }
01108
01109
01110
01111 if (hyphenpending)
01112 {
01113 fprintf(stdout, "--\n");
01114 hyphenpending = FALSE;
01115 hyphenprinted = TRUE;
01116 }
01117
01118
01119
01120
01121 if (before_context > 0)
01122 {
01123 int linecount = 0;
01124 char *p = ptr;
01125
01126 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
01127 linecount < before_context)
01128 {
01129 linecount++;
01130 p = previous_line(p, buffer);
01131 }
01132
01133 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
01134 fprintf(stdout, "--\n");
01135
01136 while (p < ptr)
01137 {
01138 int ellength;
01139 char *pp = p;
01140 if (printname != NULL) fprintf(stdout, "%s-", printname);
01141 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
01142 pp = end_of_line(pp, endptr, &ellength);
01143 fwrite(p, 1, pp - p, stdout);
01144 p = pp;
01145 }
01146 }
01147
01148
01149
01150
01151 if (after_context > 0 || before_context > 0)
01152 endhyphenpending = TRUE;
01153
01154 if (printname != NULL) fprintf(stdout, "%s:", printname);
01155 if (number) fprintf(stdout, "%d:", linenumber);
01156
01157
01158
01159
01160
01161
01162
01163 if (multiline)
01164 {
01165 int ellength;
01166 char *endmatch = ptr;
01167 if (!invert)
01168 {
01169 endmatch += offsets[1];
01170 t = ptr;
01171 while (t < endmatch)
01172 {
01173 t = end_of_line(t, endptr, &ellength);
01174 if (t <= endmatch) linenumber++; else break;
01175 }
01176 }
01177 endmatch = end_of_line(endmatch, endptr, &ellength);
01178 linelength = endmatch - ptr - ellength;
01179 }
01180
01181
01182
01183
01184
01185
01186
01187
01188 #ifdef JFRIEDL_DEBUG
01189 if (S_arg >= 0 && S_arg < mrc)
01190 {
01191 int first = S_arg * 2;
01192 int last = first + 1;
01193 fwrite(ptr, 1, offsets[first], stdout);
01194 fprintf(stdout, "X");
01195 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
01196 }
01197 else
01198 #endif
01199
01200
01201
01202 if (do_colour)
01203 {
01204 fwrite(ptr, 1, offsets[0], stdout);
01205 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
01206 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
01207 fprintf(stdout, "%c[00m", 0x1b);
01208 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
01209 stdout);
01210 }
01211 else fwrite(ptr, 1, linelength + endlinelength, stdout);
01212 }
01213
01214
01215
01216 rc = 0;
01217
01218
01219
01220
01221 lastmatchrestart = ptr + linelength + endlinelength;
01222 lastmatchnumber = linenumber + 1;
01223 }
01224
01225
01226
01227
01228
01229 if (multiline && invert && match)
01230 {
01231 int ellength;
01232 char *endmatch = ptr + offsets[1];
01233 t = ptr;
01234 while (t < endmatch)
01235 {
01236 t = end_of_line(t, endptr, &ellength);
01237 if (t <= endmatch) linenumber++; else break;
01238 }
01239 endmatch = end_of_line(endmatch, endptr, &ellength);
01240 linelength = endmatch - ptr - ellength;
01241 }
01242
01243
01244
01245
01246 ptr += linelength + endlinelength;
01247 filepos += linelength + endlinelength;
01248 linenumber++;
01249
01250
01251
01252
01253
01254
01255 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
01256 {
01257 if (after_context > 0 &&
01258 lastmatchnumber > 0 &&
01259 lastmatchrestart < buffer + MBUFTHIRD)
01260 {
01261 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
01262 lastmatchnumber = 0;
01263 }
01264
01265
01266
01267 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
01268 ptr -= MBUFTHIRD;
01269
01270 #ifdef SUPPORT_LIBZ
01271 if (frtype == FR_LIBZ)
01272 bufflength = 2*MBUFTHIRD +
01273 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
01274 else
01275 #endif
01276
01277 #ifdef SUPPORT_LIBBZ2
01278 if (frtype == FR_LIBBZ2)
01279 bufflength = 2*MBUFTHIRD +
01280 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
01281 else
01282 #endif
01283
01284 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
01285
01286 endptr = buffer + bufflength;
01287
01288
01289
01290 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
01291 }
01292 }
01293
01294
01295
01296
01297 if (!only_matching && !count_only)
01298 {
01299 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
01300 hyphenpending |= endhyphenpending;
01301 }
01302
01303
01304
01305
01306 if (filenames == FN_NOMATCH_ONLY)
01307 {
01308 fprintf(stdout, "%s\n", printname);
01309 return 0;
01310 }
01311
01312
01313
01314 if (count_only)
01315 {
01316 if (printname != NULL) fprintf(stdout, "%s:", printname);
01317 fprintf(stdout, "%d\n", count);
01318 }
01319
01320 return rc;
01321 }
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01334
01335
01336
01337
01338
01339
01340
01341
01342
01343
01344 static int
01345 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
01346 {
01347 int rc = 1;
01348 int sep;
01349 int frtype;
01350 int pathlen;
01351 void *handle;
01352 FILE *in = NULL;
01353
01354 #ifdef SUPPORT_LIBZ
01355 gzFile ingz = NULL;
01356 #endif
01357
01358 #ifdef SUPPORT_LIBBZ2
01359 BZFILE *inbz2 = NULL;
01360 #endif
01361
01362
01363
01364 if (strcmp(pathname, "-") == 0)
01365 {
01366 return pcregrep(stdin, FR_PLAIN,
01367 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
01368 stdin_name : NULL);
01369 }
01370
01371
01372
01373
01374
01375
01376 if ((sep = isdirectory(pathname)) != 0)
01377 {
01378 if (dee_action == dee_SKIP) return 1;
01379 if (dee_action == dee_RECURSE)
01380 {
01381 char buffer[1024];
01382 char *nextfile;
01383 directory_type *dir = opendirectory(pathname);
01384
01385 if (dir == NULL)
01386 {
01387 if (!silent)
01388 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
01389 strerror(errno));
01390 return 2;
01391 }
01392
01393 while ((nextfile = readdirectory(dir)) != NULL)
01394 {
01395 int frc, nflen;
01396 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
01397 nflen = strlen(nextfile);
01398
01399 if (isdirectory(buffer))
01400 {
01401 if (exclude_dir_compiled != NULL &&
01402 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
01403 continue;
01404
01405 if (include_dir_compiled != NULL &&
01406 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
01407 continue;
01408 }
01409 else
01410 {
01411 if (exclude_compiled != NULL &&
01412 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
01413 continue;
01414
01415 if (include_compiled != NULL &&
01416 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
01417 continue;
01418 }
01419
01420 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
01421 if (frc > 1) rc = frc;
01422 else if (frc == 0 && rc == 1) rc = 0;
01423 }
01424
01425 closedirectory(dir);
01426 return rc;
01427 }
01428 }
01429
01430
01431
01432
01433 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
01434
01435
01436
01437
01438
01439
01440
01441 pathlen = strlen(pathname);
01442
01443
01444
01445 #ifdef SUPPORT_LIBZ
01446 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
01447 {
01448 ingz = gzopen(pathname, "rb");
01449 if (ingz == NULL)
01450 {
01451 if (!silent)
01452 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
01453 strerror(errno));
01454 return 2;
01455 }
01456 handle = (void *)ingz;
01457 frtype = FR_LIBZ;
01458 }
01459 else
01460 #endif
01461
01462
01463
01464 #ifdef SUPPORT_LIBBZ2
01465 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
01466 {
01467 inbz2 = BZ2_bzopen(pathname, "rb");
01468 handle = (void *)inbz2;
01469 frtype = FR_LIBBZ2;
01470 }
01471 else
01472 #endif
01473
01474
01475
01476
01477 #ifdef SUPPORT_LIBBZ2
01478 PLAIN_FILE:
01479 #endif
01480 {
01481 in = fopen(pathname, "r");
01482 handle = (void *)in;
01483 frtype = FR_PLAIN;
01484 }
01485
01486
01487
01488 if (handle == NULL)
01489 {
01490 if (!silent)
01491 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
01492 strerror(errno));
01493 return 2;
01494 }
01495
01496
01497
01498 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
01499 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
01500
01501
01502
01503 #ifdef SUPPORT_LIBZ
01504 if (frtype == FR_LIBZ)
01505 gzclose(ingz);
01506 else
01507 #endif
01508
01509
01510
01511
01512
01513 #ifdef SUPPORT_LIBBZ2
01514 if (frtype == FR_LIBBZ2)
01515 {
01516 if (rc == 2)
01517 {
01518 int errnum;
01519 const char *err = BZ2_bzerror(inbz2, &errnum);
01520 if (errnum == BZ_DATA_ERROR_MAGIC)
01521 {
01522 BZ2_bzclose(inbz2);
01523 goto PLAIN_FILE;
01524 }
01525 else if (!silent)
01526 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
01527 pathname, err);
01528 }
01529 BZ2_bzclose(inbz2);
01530 }
01531 else
01532 #endif
01533
01534
01535
01536 fclose(in);
01537
01538
01539
01540 return rc;
01541 }
01542
01543
01544
01545
01546
01547
01548
01549
01550 static int
01551 usage(int rc)
01552 {
01553 option_item *op;
01554 fprintf(stderr, "Usage: pcregrep [-");
01555 for (op = optionlist; op->one_char != 0; op++)
01556 {
01557 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
01558 }
01559 fprintf(stderr, "] [long options] [pattern] [files]\n");
01560 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
01561 "options.\n");
01562 return rc;
01563 }
01564
01565
01566
01567
01568
01569
01570
01571
01572 static void
01573 help(void)
01574 {
01575 option_item *op;
01576
01577 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
01578 printf("Search for PATTERN in each FILE or standard input.\n");
01579 printf("PATTERN must be present if neither -e nor -f is used.\n");
01580 printf("\"-\" can be used as a file name to mean STDIN.\n");
01581
01582 #ifdef SUPPORT_LIBZ
01583 printf("Files whose names end in .gz are read using zlib.\n");
01584 #endif
01585
01586 #ifdef SUPPORT_LIBBZ2
01587 printf("Files whose names end in .bz2 are read using bzlib2.\n");
01588 #endif
01589
01590 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
01591 printf("Other files and the standard input are read as plain files.\n\n");
01592 #else
01593 printf("All files are read as plain files, without any interpretation.\n\n");
01594 #endif
01595
01596 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
01597 printf("Options:\n");
01598
01599 for (op = optionlist; op->one_char != 0; op++)
01600 {
01601 int n;
01602 char s[4];
01603 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
01604 n = 30 - printf(" %s --%s", s, op->long_name);
01605 if (n < 1) n = 1;
01606 printf("%.*s%s\n", n, " ", op->help_text);
01607 }
01608
01609 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
01610 printf("trailing white space is removed and blank lines are ignored.\n");
01611 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
01612
01613 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
01614 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
01615 }
01616
01617
01618
01619
01620
01621
01622
01623
01624 static int
01625 handle_option(int letter, int options)
01626 {
01627 switch(letter)
01628 {
01629 case N_FOFFSETS: file_offsets = TRUE; break;
01630 case N_HELP: help(); exit(0);
01631 case N_LOFFSETS: line_offsets = number = TRUE; break;
01632 case 'c': count_only = TRUE; break;
01633 case 'F': process_options |= PO_FIXED_STRINGS; break;
01634 case 'H': filenames = FN_FORCE; break;
01635 case 'h': filenames = FN_NONE; break;
01636 case 'i': options |= PCRE_CASELESS; break;
01637 case 'l': filenames = FN_ONLY; break;
01638 case 'L': filenames = FN_NOMATCH_ONLY; break;
01639 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
01640 case 'n': number = TRUE; break;
01641 case 'o': only_matching = TRUE; break;
01642 case 'q': quiet = TRUE; break;
01643 case 'r': dee_action = dee_RECURSE; break;
01644 case 's': silent = TRUE; break;
01645 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
01646 case 'v': invert = TRUE; break;
01647 case 'w': process_options |= PO_WORD_MATCH; break;
01648 case 'x': process_options |= PO_LINE_MATCH; break;
01649
01650 case 'V':
01651 fprintf(stderr, "pcregrep version %s\n", pcre_version());
01652 exit(0);
01653 break;
01654
01655 default:
01656 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
01657 exit(usage(2));
01658 }
01659
01660 return options;
01661 }
01662
01663
01664
01665
01666
01667
01668
01669
01670
01671
01672 static char *
01673 ordin(int n)
01674 {
01675 static char buffer[8];
01676 char *p = buffer;
01677 sprintf(p, "%d", n);
01678 while (*p != 0) p++;
01679 switch (n%10)
01680 {
01681 case 1: strcpy(p, "st"); break;
01682 case 2: strcpy(p, "nd"); break;
01683 case 3: strcpy(p, "rd"); break;
01684 default: strcpy(p, "th"); break;
01685 }
01686 return buffer;
01687 }
01688
01689
01690
01691
01692
01693
01694
01695
01696
01697
01698
01699
01700
01701
01702
01703
01704
01705
01706
01707
01708
01709 static BOOL
01710 compile_single_pattern(char *pattern, int options, char *filename, int count)
01711 {
01712 char buffer[MBUFTHIRD + 16];
01713 const char *error;
01714 int errptr;
01715
01716 if (pattern_count >= MAX_PATTERN_COUNT)
01717 {
01718 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
01719 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
01720 return FALSE;
01721 }
01722
01723 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
01724 suffix[process_options]);
01725 pattern_list[pattern_count] =
01726 pcre_compile(buffer, options, &error, &errptr, pcretables);
01727 if (pattern_list[pattern_count] != NULL)
01728 {
01729 pattern_count++;
01730 return TRUE;
01731 }
01732
01733
01734
01735 errptr -= (int)strlen(prefix[process_options]);
01736 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
01737
01738 if (filename == NULL)
01739 {
01740 if (count == 0)
01741 fprintf(stderr, "pcregrep: Error in command-line regex "
01742 "at offset %d: %s\n", errptr, error);
01743 else
01744 fprintf(stderr, "pcregrep: Error in %s command-line regex "
01745 "at offset %d: %s\n", ordin(count), errptr, error);
01746 }
01747 else
01748 {
01749 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
01750 "at offset %d: %s\n", count, filename, errptr, error);
01751 }
01752
01753 return FALSE;
01754 }
01755
01756
01757
01758
01759
01760
01761
01762
01763
01764
01765
01766
01767
01768
01769
01770
01771
01772
01773
01774
01775
01776 static BOOL
01777 compile_pattern(char *pattern, int options, char *filename, int count)
01778 {
01779 if ((process_options & PO_FIXED_STRINGS) != 0)
01780 {
01781 char *eop = pattern + strlen(pattern);
01782 char buffer[MBUFTHIRD];
01783 for(;;)
01784 {
01785 int ellength;
01786 char *p = end_of_line(pattern, eop, &ellength);
01787 if (ellength == 0)
01788 return compile_single_pattern(pattern, options, filename, count);
01789 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
01790 pattern = p;
01791 if (!compile_single_pattern(buffer, options, filename, count))
01792 return FALSE;
01793 }
01794 }
01795 else return compile_single_pattern(pattern, options, filename, count);
01796 }
01797
01798
01799
01800
01801
01802
01803
01804
01805
01806 int
01807 main(int argc, char **argv)
01808 {
01809 int i, j;
01810 int rc = 1;
01811 int pcre_options = 0;
01812 int cmd_pattern_count = 0;
01813 int hint_count = 0;
01814 int errptr;
01815 BOOL only_one_at_top;
01816 char *patterns[MAX_PATTERN_COUNT];
01817 const char *locale_from = "--locale";
01818 const char *error;
01819
01820
01821
01822
01823
01824 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
01825 switch(i)
01826 {
01827 default: newline = (char *)"lf"; break;
01828 case '\r': newline = (char *)"cr"; break;
01829 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
01830 case -1: newline = (char *)"any"; break;
01831 case -2: newline = (char *)"anycrlf"; break;
01832 }
01833
01834
01835
01836 for (i = 1; i < argc; i++)
01837 {
01838 option_item *op = NULL;
01839 char *option_data = (char *)"";
01840 BOOL longop;
01841 BOOL longopwasequals = FALSE;
01842
01843 if (argv[i][0] != '-') break;
01844
01845
01846
01847
01848 if (argv[i][1] == 0)
01849 {
01850 if (pattern_filename != NULL || pattern_count > 0) break;
01851 else exit(usage(2));
01852 }
01853
01854
01855
01856 if (argv[i][1] == '-')
01857 {
01858 char *arg = argv[i] + 2;
01859 char *argequals = strchr(arg, '=');
01860
01861 if (*arg == 0)
01862 {
01863 i++;
01864 break;
01865 }
01866
01867 longop = TRUE;
01868
01869
01870
01871
01872
01873
01874
01875
01876 for (op = optionlist; op->one_char != 0; op++)
01877 {
01878 char *opbra = strchr(op->long_name, '(');
01879 char *equals = strchr(op->long_name, '=');
01880 if (opbra == NULL)
01881 {
01882 if (equals == NULL)
01883 {
01884 if (strcmp(arg, op->long_name) == 0) break;
01885 }
01886 else
01887 {
01888 int oplen = equals - op->long_name;
01889 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
01890 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
01891 {
01892 option_data = arg + arglen;
01893 if (*option_data == '=')
01894 {
01895 option_data++;
01896 longopwasequals = TRUE;
01897 }
01898 break;
01899 }
01900 }
01901 }
01902 else
01903 {
01904 char buff1[24];
01905 char buff2[24];
01906 int baselen = opbra - op->long_name;
01907 sprintf(buff1, "%.*s", baselen, op->long_name);
01908 sprintf(buff2, "%s%.*s", buff1,
01909 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
01910 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
01911 break;
01912 }
01913 }
01914
01915 if (op->one_char == 0)
01916 {
01917 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
01918 exit(usage(2));
01919 }
01920 }
01921
01922
01923
01924
01925
01926
01927
01928
01929 #ifdef JFRIEDL_DEBUG
01930 else if (strcmp(argv[i], "-pre") == 0) {
01931 jfriedl_prefix = argv[++i];
01932 continue;
01933 } else if (strcmp(argv[i], "-post") == 0) {
01934 jfriedl_postfix = argv[++i];
01935 continue;
01936 } else if (strcmp(argv[i], "-XT") == 0) {
01937 sscanf(argv[++i], "%d", &jfriedl_XT);
01938 continue;
01939 } else if (strcmp(argv[i], "-XR") == 0) {
01940 sscanf(argv[++i], "%d", &jfriedl_XR);
01941 continue;
01942 }
01943 #endif
01944
01945
01946
01947
01948
01949 else
01950 {
01951 char *s = argv[i] + 1;
01952 longop = FALSE;
01953 while (*s != 0)
01954 {
01955 for (op = optionlist; op->one_char != 0; op++)
01956 { if (*s == op->one_char) break; }
01957 if (op->one_char == 0)
01958 {
01959 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
01960 *s, argv[i]);
01961 exit(usage(2));
01962 }
01963 if (op->type != OP_NODATA || s[1] == 0)
01964 {
01965 option_data = s+1;
01966 break;
01967 }
01968 pcre_options = handle_option(*s++, pcre_options);
01969 }
01970 }
01971
01972
01973
01974
01975
01976 if (op->type == OP_NODATA)
01977 {
01978 pcre_options = handle_option(op->one_char, pcre_options);
01979 continue;
01980 }
01981
01982
01983
01984
01985
01986
01987 if (*option_data == 0 &&
01988 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
01989 {
01990 switch (op->one_char)
01991 {
01992 case N_COLOUR:
01993 colour_option = (char *)"auto";
01994 break;
01995 #ifdef JFRIEDL_DEBUG
01996 case 'S':
01997 S_arg = 0;
01998 break;
01999 #endif
02000 }
02001 continue;
02002 }
02003
02004
02005
02006 if (*option_data == 0)
02007 {
02008 if (i >= argc - 1 || longopwasequals)
02009 {
02010 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
02011 exit(usage(2));
02012 }
02013 option_data = argv[++i];
02014 }
02015
02016
02017
02018
02019 if (op->type == OP_PATLIST)
02020 {
02021 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
02022 {
02023 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
02024 MAX_PATTERN_COUNT);
02025 return 2;
02026 }
02027 patterns[cmd_pattern_count++] = option_data;
02028 }
02029
02030
02031
02032 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
02033 {
02034 *((char **)op->dataptr) = option_data;
02035 }
02036 else
02037 {
02038 char *endptr;
02039 int n = strtoul(option_data, &endptr, 10);
02040 if (*endptr != 0)
02041 {
02042 if (longop)
02043 {
02044 char *equals = strchr(op->long_name, '=');
02045 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
02046 equals - op->long_name;
02047 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
02048 option_data, nlen, op->long_name);
02049 }
02050 else
02051 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
02052 option_data, op->one_char);
02053 exit(usage(2));
02054 }
02055 *((int *)op->dataptr) = n;
02056 }
02057 }
02058
02059
02060
02061
02062 if (both_context > 0)
02063 {
02064 if (after_context == 0) after_context = both_context;
02065 if (before_context == 0) before_context = both_context;
02066 }
02067
02068
02069
02070
02071 if ((only_matching && (file_offsets || line_offsets)) ||
02072 (file_offsets && line_offsets))
02073 {
02074 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
02075 "and/or --line-offsets\n");
02076 exit(usage(2));
02077 }
02078
02079 if (file_offsets || line_offsets) only_matching = TRUE;
02080
02081
02082
02083
02084 if (locale == NULL)
02085 {
02086 locale = getenv("LC_ALL");
02087 locale_from = "LCC_ALL";
02088 }
02089
02090 if (locale == NULL)
02091 {
02092 locale = getenv("LC_CTYPE");
02093 locale_from = "LC_CTYPE";
02094 }
02095
02096
02097
02098
02099 if (locale != NULL)
02100 {
02101 if (setlocale(LC_CTYPE, locale) == NULL)
02102 {
02103 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
02104 locale, locale_from);
02105 return 2;
02106 }
02107 pcretables = pcre_maketables();
02108 }
02109
02110
02111
02112 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
02113 {
02114 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
02115 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
02116 else
02117 {
02118 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
02119 colour_option);
02120 return 2;
02121 }
02122 if (do_colour)
02123 {
02124 char *cs = getenv("PCREGREP_COLOUR");
02125 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
02126 if (cs != NULL) colour_string = cs;
02127 }
02128 }
02129
02130
02131
02132 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
02133 {
02134 pcre_options |= PCRE_NEWLINE_CR;
02135 endlinetype = EL_CR;
02136 }
02137 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
02138 {
02139 pcre_options |= PCRE_NEWLINE_LF;
02140 endlinetype = EL_LF;
02141 }
02142 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
02143 {
02144 pcre_options |= PCRE_NEWLINE_CRLF;
02145 endlinetype = EL_CRLF;
02146 }
02147 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
02148 {
02149 pcre_options |= PCRE_NEWLINE_ANY;
02150 endlinetype = EL_ANY;
02151 }
02152 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
02153 {
02154 pcre_options |= PCRE_NEWLINE_ANYCRLF;
02155 endlinetype = EL_ANYCRLF;
02156 }
02157 else
02158 {
02159 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
02160 return 2;
02161 }
02162
02163
02164
02165 if (dee_option != NULL)
02166 {
02167 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
02168 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
02169 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
02170 else
02171 {
02172 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
02173 return 2;
02174 }
02175 }
02176
02177 if (DEE_option != NULL)
02178 {
02179 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
02180 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
02181 else
02182 {
02183 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
02184 return 2;
02185 }
02186 }
02187
02188
02189
02190 #ifdef JFRIEDL_DEBUG
02191 if (S_arg > 9)
02192 {
02193 fprintf(stderr, "pcregrep: bad value for -S option\n");
02194 return 2;
02195 }
02196 if (jfriedl_XT != 0 || jfriedl_XR != 0)
02197 {
02198 if (jfriedl_XT == 0) jfriedl_XT = 1;
02199 if (jfriedl_XR == 0) jfriedl_XR = 1;
02200 }
02201 #endif
02202
02203
02204
02205 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
02206 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
02207
02208 if (pattern_list == NULL || hints_list == NULL)
02209 {
02210 fprintf(stderr, "pcregrep: malloc failed\n");
02211 goto EXIT2;
02212 }
02213
02214
02215
02216
02217 if (cmd_pattern_count == 0 && pattern_filename == NULL)
02218 {
02219 if (i >= argc) return usage(2);
02220 patterns[cmd_pattern_count++] = argv[i++];
02221 }
02222
02223
02224
02225
02226 for (j = 0; j < cmd_pattern_count; j++)
02227 {
02228 if (!compile_pattern(patterns[j], pcre_options, NULL,
02229 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
02230 goto EXIT2;
02231 }
02232
02233
02234
02235 if (pattern_filename != NULL)
02236 {
02237 int linenumber = 0;
02238 FILE *f;
02239 char *filename;
02240 char buffer[MBUFTHIRD];
02241
02242 if (strcmp(pattern_filename, "-") == 0)
02243 {
02244 f = stdin;
02245 filename = stdin_name;
02246 }
02247 else
02248 {
02249 f = fopen(pattern_filename, "r");
02250 if (f == NULL)
02251 {
02252 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
02253 strerror(errno));
02254 goto EXIT2;
02255 }
02256 filename = pattern_filename;
02257 }
02258
02259 while (fgets(buffer, MBUFTHIRD, f) != NULL)
02260 {
02261 char *s = buffer + (int)strlen(buffer);
02262 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
02263 *s = 0;
02264 linenumber++;
02265 if (buffer[0] == 0) continue;
02266 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
02267 goto EXIT2;
02268 }
02269
02270 if (f != stdin) fclose(f);
02271 }
02272
02273
02274
02275 for (j = 0; j < pattern_count; j++)
02276 {
02277 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
02278 if (error != NULL)
02279 {
02280 char s[16];
02281 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
02282 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
02283 goto EXIT2;
02284 }
02285 hint_count++;
02286 }
02287
02288
02289
02290 if (exclude_pattern != NULL)
02291 {
02292 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
02293 pcretables);
02294 if (exclude_compiled == NULL)
02295 {
02296 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
02297 errptr, error);
02298 goto EXIT2;
02299 }
02300 }
02301
02302 if (include_pattern != NULL)
02303 {
02304 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
02305 pcretables);
02306 if (include_compiled == NULL)
02307 {
02308 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
02309 errptr, error);
02310 goto EXIT2;
02311 }
02312 }
02313
02314 if (exclude_dir_pattern != NULL)
02315 {
02316 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
02317 pcretables);
02318 if (exclude_dir_compiled == NULL)
02319 {
02320 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
02321 errptr, error);
02322 goto EXIT2;
02323 }
02324 }
02325
02326 if (include_dir_pattern != NULL)
02327 {
02328 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
02329 pcretables);
02330 if (include_dir_compiled == NULL)
02331 {
02332 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
02333 errptr, error);
02334 goto EXIT2;
02335 }
02336 }
02337
02338
02339
02340 if (i >= argc)
02341 {
02342 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
02343 goto EXIT;
02344 }
02345
02346
02347
02348
02349
02350
02351 only_one_at_top = i == argc - 1;
02352
02353 for (; i < argc; i++)
02354 {
02355 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
02356 only_one_at_top);
02357 if (frc > 1) rc = frc;
02358 else if (frc == 0 && rc == 1) rc = 0;
02359 }
02360
02361 EXIT:
02362 if (pattern_list != NULL)
02363 {
02364 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
02365 free(pattern_list);
02366 }
02367 if (hints_list != NULL)
02368 {
02369 for (i = 0; i < hint_count; i++) free(hints_list[i]);
02370 free(hints_list);
02371 }
02372 return rc;
02373
02374 EXIT2:
02375 rc = 2;
02376 goto EXIT;
02377 }
02378
02379