pcretest.c

Go to the documentation of this file.
00001 /*************************************************
00002 *             PCRE testing program               *
00003 *************************************************/
00004 
00005 /* This program was hacked up as a tester for PCRE. I really should have
00006 written it more tidily in the first place. Will I ever learn? It has grown and
00007 been extended and consequently is now rather, er, *very* untidy in places.
00008 
00009 -----------------------------------------------------------------------------
00010 Redistribution and use in source and binary forms, with or without
00011 modification, are permitted provided that the following conditions are met:
00012 
00013     * Redistributions of source code must retain the above copyright notice,
00014       this list of conditions and the following disclaimer.
00015 
00016     * Redistributions in binary form must reproduce the above copyright
00017       notice, this list of conditions and the following disclaimer in the
00018       documentation and/or other materials provided with the distribution.
00019 
00020     * Neither the name of the University of Cambridge nor the names of its
00021       contributors may be used to endorse or promote products derived from
00022       this software without specific prior written permission.
00023 
00024 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00025 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00026 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00027 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00028 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00029 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00030 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00031 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00032 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00033 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00034 POSSIBILITY OF SUCH DAMAGE.
00035 -----------------------------------------------------------------------------
00036 */
00037 
00038 
00039 #ifdef HAVE_CONFIG_H
00040 #include "config.h"
00041 #endif
00042 
00043 #include <ctype.h>
00044 #include <stdio.h>
00045 #include <string.h>
00046 #include <stdlib.h>
00047 #include <time.h>
00048 #include <locale.h>
00049 #include <errno.h>
00050 
00051 #ifdef SUPPORT_LIBREADLINE
00052 #ifdef HAVE_UNISTD_H
00053 #include <unistd.h>
00054 #endif
00055 #include <readline/readline.h>
00056 #include <readline/history.h>
00057 #endif
00058 
00059 
00060 /* A number of things vary for Windows builds. Originally, pcretest opened its
00061 input and output without "b"; then I was told that "b" was needed in some
00062 environments, so it was added for release 5.0 to both the input and output. (It
00063 makes no difference on Unix-like systems.) Later I was told that it is wrong
00064 for the input on Windows. I've now abstracted the modes into two macros that
00065 are set here, to make it easier to fiddle with them, and removed "b" from the
00066 input mode under Windows. */
00067 
00068 #if defined(_WIN32) || defined(WIN32)
00069 #include <io.h>                /* For _setmode() */
00070 #include <fcntl.h>             /* For _O_BINARY */
00071 #define INPUT_MODE   "r"
00072 #define OUTPUT_MODE  "wb"
00073 
00074 #define isatty _isatty         /* This is what Windows calls them, I'm told */
00075 #define fileno _fileno
00076 
00077 #else
00078 #include <sys/time.h>          /* These two includes are needed */
00079 #include <sys/resource.h>      /* for setrlimit(). */
00080 #define INPUT_MODE   "rb"
00081 #define OUTPUT_MODE  "wb"
00082 #endif
00083 
00084 
00085 /* We have to include pcre_internal.h because we need the internal info for
00086 displaying the results of pcre_study() and we also need to know about the
00087 internal macros, structures, and other internal data values; pcretest has
00088 "inside information" compared to a program that strictly follows the PCRE API.
00089 
00090 Although pcre_internal.h does itself include pcre.h, we explicitly include it
00091 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
00092 appropriately for an application, not for building PCRE. */
00093 
00094 #include "pcre.h"
00095 #include "pcre_internal.h"
00096 
00097 /* We need access to some of the data tables that PCRE uses. So as not to have
00098 to keep two copies, we include the source file here, changing the names of the
00099 external symbols to prevent clashes. */
00100 
00101 #define _pcre_ucp_gentype      ucp_gentype
00102 #define _pcre_utf8_table1      utf8_table1
00103 #define _pcre_utf8_table1_size utf8_table1_size
00104 #define _pcre_utf8_table2      utf8_table2
00105 #define _pcre_utf8_table3      utf8_table3
00106 #define _pcre_utf8_table4      utf8_table4
00107 #define _pcre_utt              utt
00108 #define _pcre_utt_size         utt_size
00109 #define _pcre_utt_names        utt_names
00110 #define _pcre_OP_lengths       OP_lengths
00111 
00112 #include "pcre_tables.c"
00113 
00114 /* We also need the pcre_printint() function for printing out compiled
00115 patterns. This function is in a separate file so that it can be included in
00116 pcre_compile.c when that module is compiled with debugging enabled.
00117 
00118 The definition of the macro PRINTABLE, which determines whether to print an
00119 output character as-is or as a hex value when showing compiled patterns, is
00120 contained in this file. We uses it here also, in cases when the locale has not
00121 been explicitly changed, so as to get consistent output from systems that
00122 differ in their output from isprint() even in the "C" locale. */
00123 
00124 #include "pcre_printint.src"
00125 
00126 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
00127 
00128 
00129 /* It is possible to compile this test program without including support for
00130 testing the POSIX interface, though this is not available via the standard
00131 Makefile. */
00132 
00133 #if !defined NOPOSIX
00134 #include "pcreposix.h"
00135 #endif
00136 
00137 /* It is also possible, for the benefit of the version currently imported into
00138 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
00139 interface to the DFA matcher (NODFA), and without the doublecheck of the old
00140 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
00141 UTF8 support if PCRE is built without it. */
00142 
00143 #ifndef SUPPORT_UTF8
00144 #ifndef NOUTF8
00145 #define NOUTF8
00146 #endif
00147 #endif
00148 
00149 
00150 /* Other parameters */
00151 
00152 #ifndef CLOCKS_PER_SEC
00153 #ifdef CLK_TCK
00154 #define CLOCKS_PER_SEC CLK_TCK
00155 #else
00156 #define CLOCKS_PER_SEC 100
00157 #endif
00158 #endif
00159 
00160 /* This is the default loop count for timing. */
00161 
00162 #define LOOPREPEAT 500000
00163 
00164 /* Static variables */
00165 
00166 static FILE *outfile;
00167 static int log_store = 0;
00168 static int callout_count;
00169 static int callout_extra;
00170 static int callout_fail_count;
00171 static int callout_fail_id;
00172 static int debug_lengths;
00173 static int first_callout;
00174 static int locale_set = 0;
00175 static int show_malloc;
00176 static int use_utf8;
00177 static size_t gotten_store;
00178 
00179 /* The buffers grow automatically if very long input lines are encountered. */
00180 
00181 static int buffer_size = 50000;
00182 static uschar *buffer = NULL;
00183 static uschar *dbuffer = NULL;
00184 static uschar *pbuffer = NULL;
00185 
00186 
00187 
00188 /*************************************************
00189 *        Read or extend an input line            *
00190 *************************************************/
00191 
00192 /* Input lines are read into buffer, but both patterns and data lines can be
00193 continued over multiple input lines. In addition, if the buffer fills up, we
00194 want to automatically expand it so as to be able to handle extremely large
00195 lines that are needed for certain stress tests. When the input buffer is
00196 expanded, the other two buffers must also be expanded likewise, and the
00197 contents of pbuffer, which are a copy of the input for callouts, must be
00198 preserved (for when expansion happens for a data line). This is not the most
00199 optimal way of handling this, but hey, this is just a test program!
00200 
00201 Arguments:
00202   f            the file to read
00203   start        where in buffer to start (this *must* be within buffer)
00204   prompt       for stdin or readline()
00205 
00206 Returns:       pointer to the start of new data
00207                could be a copy of start, or could be moved
00208                NULL if no data read and EOF reached
00209 */
00210 
00211 static uschar *
00212 extend_inputline(FILE *f, uschar *start, const char *prompt)
00213 {
00214 uschar *here = start;
00215 
00216 for (;;)
00217   {
00218   int rlen = buffer_size - (here - buffer);
00219 
00220   if (rlen > 1000)
00221     {
00222     int dlen;
00223 
00224     /* If libreadline support is required, use readline() to read a line if the
00225     input is a terminal. Note that readline() removes the trailing newline, so
00226     we must put it back again, to be compatible with fgets(). */
00227 
00228 #ifdef SUPPORT_LIBREADLINE
00229     if (isatty(fileno(f)))
00230       {
00231       size_t len;
00232       char *s = readline(prompt);
00233       if (s == NULL) return (here == start)? NULL : start;
00234       len = strlen(s);
00235       if (len > 0) add_history(s);
00236       if (len > rlen - 1) len = rlen - 1;
00237       memcpy(here, s, len);
00238       here[len] = '\n';
00239       here[len+1] = 0;
00240       free(s);
00241       }
00242     else
00243 #endif
00244 
00245     /* Read the next line by normal means, prompting if the file is stdin. */
00246 
00247       {
00248       if (f == stdin) printf(prompt);
00249       if (fgets((char *)here, rlen,  f) == NULL)
00250         return (here == start)? NULL : start;
00251       }
00252 
00253     dlen = (int)strlen((char *)here);
00254     if (dlen > 0 && here[dlen - 1] == '\n') return start;
00255     here += dlen;
00256     }
00257 
00258   else
00259     {
00260     int new_buffer_size = 2*buffer_size;
00261     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
00262     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
00263     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
00264 
00265     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
00266       {
00267       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
00268       exit(1);
00269       }
00270 
00271     memcpy(new_buffer, buffer, buffer_size);
00272     memcpy(new_pbuffer, pbuffer, buffer_size);
00273 
00274     buffer_size = new_buffer_size;
00275 
00276     start = new_buffer + (start - buffer);
00277     here = new_buffer + (here - buffer);
00278 
00279     free(buffer);
00280     free(dbuffer);
00281     free(pbuffer);
00282 
00283     buffer = new_buffer;
00284     dbuffer = new_dbuffer;
00285     pbuffer = new_pbuffer;
00286     }
00287   }
00288 
00289 return NULL;  /* Control never gets here */
00290 }
00291 
00292 
00293 
00294 
00295 
00296 
00297 
00298 /*************************************************
00299 *          Read number from string               *
00300 *************************************************/
00301 
00302 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
00303 around with conditional compilation, just do the job by hand. It is only used
00304 for unpicking arguments, so just keep it simple.
00305 
00306 Arguments:
00307   str           string to be converted
00308   endptr        where to put the end pointer
00309 
00310 Returns:        the unsigned long
00311 */
00312 
00313 static int
00314 get_value(unsigned char *str, unsigned char **endptr)
00315 {
00316 int result = 0;
00317 while(*str != 0 && isspace(*str)) str++;
00318 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
00319 *endptr = str;
00320 return(result);
00321 }
00322 
00323 
00324 
00325 
00326 /*************************************************
00327 *            Convert UTF-8 string to value       *
00328 *************************************************/
00329 
00330 /* This function takes one or more bytes that represents a UTF-8 character,
00331 and returns the value of the character.
00332 
00333 Argument:
00334   utf8bytes   a pointer to the byte vector
00335   vptr        a pointer to an int to receive the value
00336 
00337 Returns:      >  0 => the number of bytes consumed
00338               -6 to 0 => malformed UTF-8 character at offset = (-return)
00339 */
00340 
00341 #if !defined NOUTF8
00342 
00343 static int
00344 utf82ord(unsigned char *utf8bytes, int *vptr)
00345 {
00346 int c = *utf8bytes++;
00347 int d = c;
00348 int i, j, s;
00349 
00350 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
00351   {
00352   if ((d & 0x80) == 0) break;
00353   d <<= 1;
00354   }
00355 
00356 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
00357 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
00358 
00359 /* i now has a value in the range 1-5 */
00360 
00361 s = 6*i;
00362 d = (c & utf8_table3[i]) << s;
00363 
00364 for (j = 0; j < i; j++)
00365   {
00366   c = *utf8bytes++;
00367   if ((c & 0xc0) != 0x80) return -(j+1);
00368   s -= 6;
00369   d |= (c & 0x3f) << s;
00370   }
00371 
00372 /* Check that encoding was the correct unique one */
00373 
00374 for (j = 0; j < utf8_table1_size; j++)
00375   if (d <= utf8_table1[j]) break;
00376 if (j != i) return -(i+1);
00377 
00378 /* Valid value */
00379 
00380 *vptr = d;
00381 return i+1;
00382 }
00383 
00384 #endif
00385 
00386 
00387 
00388 /*************************************************
00389 *       Convert character value to UTF-8         *
00390 *************************************************/
00391 
00392 /* This function takes an integer value in the range 0 - 0x7fffffff
00393 and encodes it as a UTF-8 character in 0 to 6 bytes.
00394 
00395 Arguments:
00396   cvalue     the character value
00397   utf8bytes  pointer to buffer for result - at least 6 bytes long
00398 
00399 Returns:     number of characters placed in the buffer
00400 */
00401 
00402 #if !defined NOUTF8
00403 
00404 static int
00405 ord2utf8(int cvalue, uschar *utf8bytes)
00406 {
00407 register int i, j;
00408 for (i = 0; i < utf8_table1_size; i++)
00409   if (cvalue <= utf8_table1[i]) break;
00410 utf8bytes += i;
00411 for (j = i; j > 0; j--)
00412  {
00413  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
00414  cvalue >>= 6;
00415  }
00416 *utf8bytes = utf8_table2[i] | cvalue;
00417 return i + 1;
00418 }
00419 
00420 #endif
00421 
00422 
00423 
00424 /*************************************************
00425 *             Print character string             *
00426 *************************************************/
00427 
00428 /* Character string printing function. Must handle UTF-8 strings in utf8
00429 mode. Yields number of characters printed. If handed a NULL file, just counts
00430 chars without printing. */
00431 
00432 static int pchars(unsigned char *p, int length, FILE *f)
00433 {
00434 int c = 0;
00435 int yield = 0;
00436 
00437 while (length-- > 0)
00438   {
00439 #if !defined NOUTF8
00440   if (use_utf8)
00441     {
00442     int rc = utf82ord(p, &c);
00443 
00444     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
00445       {
00446       length -= rc - 1;
00447       p += rc;
00448       if (PRINTHEX(c))
00449         {
00450         if (f != NULL) fprintf(f, "%c", c);
00451         yield++;
00452         }
00453       else
00454         {
00455         int n = 4;
00456         if (f != NULL) fprintf(f, "\\x{%02x}", c);
00457         yield += (n <= 0x000000ff)? 2 :
00458                  (n <= 0x00000fff)? 3 :
00459                  (n <= 0x0000ffff)? 4 :
00460                  (n <= 0x000fffff)? 5 : 6;
00461         }
00462       continue;
00463       }
00464     }
00465 #endif
00466 
00467    /* Not UTF-8, or malformed UTF-8  */
00468 
00469   c = *p++;
00470   if (PRINTHEX(c))
00471     {
00472     if (f != NULL) fprintf(f, "%c", c);
00473     yield++;
00474     }
00475   else
00476     {
00477     if (f != NULL) fprintf(f, "\\x%02x", c);
00478     yield += 4;
00479     }
00480   }
00481 
00482 return yield;
00483 }
00484 
00485 
00486 
00487 /*************************************************
00488 *              Callout function                  *
00489 *************************************************/
00490 
00491 /* Called from PCRE as a result of the (?C) item. We print out where we are in
00492 the match. Yield zero unless more callouts than the fail count, or the callout
00493 data is not zero. */
00494 
00495 static int callout(pcre_callout_block *cb)
00496 {
00497 FILE *f = (first_callout | callout_extra)? outfile : NULL;
00498 int i, pre_start, post_start, subject_length;
00499 
00500 if (callout_extra)
00501   {
00502   fprintf(f, "Callout %d: last capture = %d\n",
00503     cb->callout_number, cb->capture_last);
00504 
00505   for (i = 0; i < cb->capture_top * 2; i += 2)
00506     {
00507     if (cb->offset_vector[i] < 0)
00508       fprintf(f, "%2d: <unset>\n", i/2);
00509     else
00510       {
00511       fprintf(f, "%2d: ", i/2);
00512       (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
00513         cb->offset_vector[i+1] - cb->offset_vector[i], f);
00514       fprintf(f, "\n");
00515       }
00516     }
00517   }
00518 
00519 /* Re-print the subject in canonical form, the first time or if giving full
00520 datails. On subsequent calls in the same match, we use pchars just to find the
00521 printed lengths of the substrings. */
00522 
00523 if (f != NULL) fprintf(f, "--->");
00524 
00525 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
00526 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
00527   cb->current_position - cb->start_match, f);
00528 
00529 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
00530 
00531 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
00532   cb->subject_length - cb->current_position, f);
00533 
00534 if (f != NULL) fprintf(f, "\n");
00535 
00536 /* Always print appropriate indicators, with callout number if not already
00537 shown. For automatic callouts, show the pattern offset. */
00538 
00539 if (cb->callout_number == 255)
00540   {
00541   fprintf(outfile, "%+3d ", cb->pattern_position);
00542   if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
00543   }
00544 else
00545   {
00546   if (callout_extra) fprintf(outfile, "    ");
00547     else fprintf(outfile, "%3d ", cb->callout_number);
00548   }
00549 
00550 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
00551 fprintf(outfile, "^");
00552 
00553 if (post_start > 0)
00554   {
00555   for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
00556   fprintf(outfile, "^");
00557   }
00558 
00559 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
00560   fprintf(outfile, " ");
00561 
00562 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
00563   pbuffer + cb->pattern_position);
00564 
00565 fprintf(outfile, "\n");
00566 first_callout = 0;
00567 
00568 if (cb->callout_data != NULL)
00569   {
00570   int callout_data = *((int *)(cb->callout_data));
00571   if (callout_data != 0)
00572     {
00573     fprintf(outfile, "Callout data = %d\n", callout_data);
00574     return callout_data;
00575     }
00576   }
00577 
00578 return (cb->callout_number != callout_fail_id)? 0 :
00579        (++callout_count >= callout_fail_count)? 1 : 0;
00580 }
00581 
00582 
00583 /*************************************************
00584 *            Local malloc functions              *
00585 *************************************************/
00586 
00587 /* Alternative malloc function, to test functionality and show the size of the
00588 compiled re. */
00589 
00590 static void *new_malloc(size_t size)
00591 {
00592 void *block = malloc(size);
00593 gotten_store = size;
00594 if (show_malloc)
00595   fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
00596 return block;
00597 }
00598 
00599 static void new_free(void *block)
00600 {
00601 if (show_malloc)
00602   fprintf(outfile, "free             %p\n", block);
00603 free(block);
00604 }
00605 
00606 
00607 /* For recursion malloc/free, to test stacking calls */
00608 
00609 static void *stack_malloc(size_t size)
00610 {
00611 void *block = malloc(size);
00612 if (show_malloc)
00613   fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
00614 return block;
00615 }
00616 
00617 static void stack_free(void *block)
00618 {
00619 if (show_malloc)
00620   fprintf(outfile, "stack_free       %p\n", block);
00621 free(block);
00622 }
00623 
00624 
00625 /*************************************************
00626 *          Call pcre_fullinfo()                  *
00627 *************************************************/
00628 
00629 /* Get one piece of information from the pcre_fullinfo() function */
00630 
00631 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
00632 {
00633 int rc;
00634 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
00635   fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
00636 }
00637 
00638 
00639 
00640 /*************************************************
00641 *         Byte flipping function                 *
00642 *************************************************/
00643 
00644 static unsigned long int
00645 byteflip(unsigned long int value, int n)
00646 {
00647 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
00648 return ((value & 0x000000ff) << 24) |
00649        ((value & 0x0000ff00) <<  8) |
00650        ((value & 0x00ff0000) >>  8) |
00651        ((value & 0xff000000) >> 24);
00652 }
00653 
00654 
00655 
00656 
00657 /*************************************************
00658 *        Check match or recursion limit          *
00659 *************************************************/
00660 
00661 static int
00662 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
00663   int start_offset, int options, int *use_offsets, int use_size_offsets,
00664   int flag, unsigned long int *limit, int errnumber, const char *msg)
00665 {
00666 int count;
00667 int min = 0;
00668 int mid = 64;
00669 int max = -1;
00670 
00671 extra->flags |= flag;
00672 
00673 for (;;)
00674   {
00675   *limit = mid;
00676 
00677   count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
00678     use_offsets, use_size_offsets);
00679 
00680   if (count == errnumber)
00681     {
00682     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
00683     min = mid;
00684     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
00685     }
00686 
00687   else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
00688                          count == PCRE_ERROR_PARTIAL)
00689     {
00690     if (mid == min + 1)
00691       {
00692       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
00693       break;
00694       }
00695     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
00696     max = mid;
00697     mid = (min + mid)/2;
00698     }
00699   else break;    /* Some other error */
00700   }
00701 
00702 extra->flags &= ~flag;
00703 return count;
00704 }
00705 
00706 
00707 
00708 /*************************************************
00709 *         Case-independent strncmp() function    *
00710 *************************************************/
00711 
00712 /*
00713 Arguments:
00714   s         first string
00715   t         second string
00716   n         number of characters to compare
00717 
00718 Returns:    < 0, = 0, or > 0, according to the comparison
00719 */
00720 
00721 static int
00722 strncmpic(uschar *s, uschar *t, int n)
00723 {
00724 while (n--)
00725   {
00726   int c = tolower(*s++) - tolower(*t++);
00727   if (c) return c;
00728   }
00729 return 0;
00730 }
00731 
00732 
00733 
00734 /*************************************************
00735 *         Check newline indicator                *
00736 *************************************************/
00737 
00738 /* This is used both at compile and run-time to check for <xxx> escapes, where
00739 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
00740 no match.
00741 
00742 Arguments:
00743   p           points after the leading '<'
00744   f           file for error message
00745 
00746 Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
00747 */
00748 
00749 static int
00750 check_newline(uschar *p, FILE *f)
00751 {
00752 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
00753 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
00754 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
00755 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
00756 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
00757 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
00758 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
00759 fprintf(f, "Unknown newline type at: <%s\n", p);
00760 return 0;
00761 }
00762 
00763 
00764 
00765 /*************************************************
00766 *             Usage function                     *
00767 *************************************************/
00768 
00769 static void
00770 usage(void)
00771 {
00772 printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
00773 printf("Input and output default to stdin and stdout.\n");
00774 #ifdef SUPPORT_LIBREADLINE
00775 printf("If input is a terminal, readline() is used to read from it.\n");
00776 #else
00777 printf("This version of pcretest is not linked with readline().\n");
00778 #endif
00779 printf("\nOptions:\n");
00780 printf("  -b       show compiled code (bytecode)\n");
00781 printf("  -C       show PCRE compile-time options and exit\n");
00782 printf("  -d       debug: show compiled code and information (-b and -i)\n");
00783 #if !defined NODFA
00784 printf("  -dfa     force DFA matching for all subjects\n");
00785 #endif
00786 printf("  -help    show usage information\n");
00787 printf("  -i       show information about compiled patterns\n"
00788        "  -m       output memory used information\n"
00789        "  -o <n>   set size of offsets vector to <n>\n");
00790 #if !defined NOPOSIX
00791 printf("  -p       use POSIX interface\n");
00792 #endif
00793 printf("  -q       quiet: do not output PCRE version number at start\n");
00794 printf("  -S <n>   set stack size to <n> megabytes\n");
00795 printf("  -s       output store (memory) used information\n"
00796        "  -t       time compilation and execution\n");
00797 printf("  -t <n>   time compilation and execution, repeating <n> times\n");
00798 printf("  -tm      time execution (matching) only\n");
00799 printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
00800 }
00801 
00802 
00803 
00804 /*************************************************
00805 *                Main Program                    *
00806 *************************************************/
00807 
00808 /* Read lines from named file or stdin and write to named file or stdout; lines
00809 consist of a regular expression, in delimiters and optionally followed by
00810 options, followed by a set of test data, terminated by an empty line. */
00811 
00812 int main(int argc, char **argv)
00813 {
00814 FILE *infile = stdin;
00815 int options = 0;
00816 int study_options = 0;
00817 int op = 1;
00818 int timeit = 0;
00819 int timeitm = 0;
00820 int showinfo = 0;
00821 int showstore = 0;
00822 int quiet = 0;
00823 int size_offsets = 45;
00824 int size_offsets_max;
00825 int *offsets = NULL;
00826 #if !defined NOPOSIX
00827 int posix = 0;
00828 #endif
00829 int debug = 0;
00830 int done = 0;
00831 int all_use_dfa = 0;
00832 int yield = 0;
00833 int stack_size;
00834 
00835 /* These vectors store, end-to-end, a list of captured substring names. Assume
00836 that 1024 is plenty long enough for the few names we'll be testing. */
00837 
00838 uschar copynames[1024];
00839 uschar getnames[1024];
00840 
00841 uschar *copynamesptr;
00842 uschar *getnamesptr;
00843 
00844 /* Get buffers from malloc() so that Electric Fence will check their misuse
00845 when I am debugging. They grow automatically when very long lines are read. */
00846 
00847 buffer = (unsigned char *)malloc(buffer_size);
00848 dbuffer = (unsigned char *)malloc(buffer_size);
00849 pbuffer = (unsigned char *)malloc(buffer_size);
00850 
00851 /* The outfile variable is static so that new_malloc can use it. */
00852 
00853 outfile = stdout;
00854 
00855 /* The following  _setmode() stuff is some Windows magic that tells its runtime
00856 library to translate CRLF into a single LF character. At least, that's what
00857 I've been told: never having used Windows I take this all on trust. Originally
00858 it set 0x8000, but then I was advised that _O_BINARY was better. */
00859 
00860 #if defined(_WIN32) || defined(WIN32)
00861 _setmode( _fileno( stdout ), _O_BINARY );
00862 #endif
00863 
00864 /* Scan options */
00865 
00866 while (argc > 1 && argv[op][0] == '-')
00867   {
00868   unsigned char *endptr;
00869 
00870   if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
00871     showstore = 1;
00872   else if (strcmp(argv[op], "-q") == 0) quiet = 1;
00873   else if (strcmp(argv[op], "-b") == 0) debug = 1;
00874   else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
00875   else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
00876 #if !defined NODFA
00877   else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
00878 #endif
00879   else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
00880       ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
00881         *endptr == 0))
00882     {
00883     op++;
00884     argc--;
00885     }
00886   else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
00887     {
00888     int both = argv[op][2] == 0;
00889     int temp;
00890     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
00891                      *endptr == 0))
00892       {
00893       timeitm = temp;
00894       op++;
00895       argc--;
00896       }
00897     else timeitm = LOOPREPEAT;
00898     if (both) timeit = timeitm;
00899     }
00900   else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
00901       ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
00902         *endptr == 0))
00903     {
00904 #if defined(_WIN32) || defined(WIN32)
00905     printf("PCRE: -S not supported on this OS\n");
00906     exit(1);
00907 #else
00908     int rc;
00909     struct rlimit rlim;
00910     getrlimit(RLIMIT_STACK, &rlim);
00911     rlim.rlim_cur = stack_size * 1024 * 1024;
00912     rc = setrlimit(RLIMIT_STACK, &rlim);
00913     if (rc != 0)
00914       {
00915     printf("PCRE: setrlimit() failed with error %d\n", rc);
00916     exit(1);
00917       }
00918     op++;
00919     argc--;
00920 #endif
00921     }
00922 #if !defined NOPOSIX
00923   else if (strcmp(argv[op], "-p") == 0) posix = 1;
00924 #endif
00925   else if (strcmp(argv[op], "-C") == 0)
00926     {
00927     int rc;
00928     printf("PCRE version %s\n", pcre_version());
00929     printf("Compiled with\n");
00930     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
00931     printf("  %sUTF-8 support\n", rc? "" : "No ");
00932     (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
00933     printf("  %sUnicode properties support\n", rc? "" : "No ");
00934     (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
00935     printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
00936       (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
00937       (rc == -2)? "ANYCRLF" :
00938       (rc == -1)? "ANY" : "???");
00939     (void)pcre_config(PCRE_CONFIG_BSR, &rc);
00940     printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
00941                                      "all Unicode newlines");
00942     (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
00943     printf("  Internal link size = %d\n", rc);
00944     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
00945     printf("  POSIX malloc threshold = %d\n", rc);
00946     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
00947     printf("  Default match limit = %d\n", rc);
00948     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
00949     printf("  Default recursion depth limit = %d\n", rc);
00950     (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
00951     printf("  Match recursion uses %s\n", rc? "stack" : "heap");
00952     goto EXIT;
00953     }
00954   else if (strcmp(argv[op], "-help") == 0 ||
00955            strcmp(argv[op], "--help") == 0)
00956     {
00957     usage();
00958     goto EXIT;
00959     }
00960   else
00961     {
00962     printf("** Unknown or malformed option %s\n", argv[op]);
00963     usage();
00964     yield = 1;
00965     goto EXIT;
00966     }
00967   op++;
00968   argc--;
00969   }
00970 
00971 /* Get the store for the offsets vector, and remember what it was */
00972 
00973 size_offsets_max = size_offsets;
00974 offsets = (int *)malloc(size_offsets_max * sizeof(int));
00975 if (offsets == NULL)
00976   {
00977   printf("** Failed to get %d bytes of memory for offsets vector\n",
00978     (int)(size_offsets_max * sizeof(int)));
00979   yield = 1;
00980   goto EXIT;
00981   }
00982 
00983 /* Sort out the input and output files */
00984 
00985 if (argc > 1)
00986   {
00987   infile = fopen(argv[op], INPUT_MODE);
00988   if (infile == NULL)
00989     {
00990     printf("** Failed to open %s\n", argv[op]);
00991     yield = 1;
00992     goto EXIT;
00993     }
00994   }
00995 
00996 if (argc > 2)
00997   {
00998   outfile = fopen(argv[op+1], OUTPUT_MODE);
00999   if (outfile == NULL)
01000     {
01001     printf("** Failed to open %s\n", argv[op+1]);
01002     yield = 1;
01003     goto EXIT;
01004     }
01005   }
01006 
01007 /* Set alternative malloc function */
01008 
01009 pcre_malloc = new_malloc;
01010 pcre_free = new_free;
01011 pcre_stack_malloc = stack_malloc;
01012 pcre_stack_free = stack_free;
01013 
01014 /* Heading line unless quiet, then prompt for first regex if stdin */
01015 
01016 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
01017 
01018 /* Main loop */
01019 
01020 while (!done)
01021   {
01022   pcre *re = NULL;
01023   pcre_extra *extra = NULL;
01024 
01025 #if !defined NOPOSIX  /* There are still compilers that require no indent */
01026   regex_t preg;
01027   int do_posix = 0;
01028 #endif
01029 
01030   const char *error;
01031   unsigned char *p, *pp, *ppp;
01032   unsigned char *to_file = NULL;
01033   const unsigned char *tables = NULL;
01034   unsigned long int true_size, true_study_size = 0;
01035   size_t size, regex_gotten_store;
01036   int do_study = 0;
01037   int do_debug = debug;
01038   int do_G = 0;
01039   int do_g = 0;
01040   int do_showinfo = showinfo;
01041   int do_showrest = 0;
01042   int do_flip = 0;
01043   int erroroffset, len, delimiter, poffset;
01044 
01045   use_utf8 = 0;
01046   debug_lengths = 1;
01047 
01048   if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
01049   if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
01050   fflush(outfile);
01051 
01052   p = buffer;
01053   while (isspace(*p)) p++;
01054   if (*p == 0) continue;
01055 
01056   /* See if the pattern is to be loaded pre-compiled from a file. */
01057 
01058   if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
01059     {
01060     unsigned long int magic, get_options;
01061     uschar sbuf[8];
01062     FILE *f;
01063 
01064     p++;
01065     pp = p + (int)strlen((char *)p);
01066     while (isspace(pp[-1])) pp--;
01067     *pp = 0;
01068 
01069     f = fopen((char *)p, "rb");
01070     if (f == NULL)
01071       {
01072       fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
01073       continue;
01074       }
01075 
01076     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
01077 
01078     true_size =
01079       (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
01080     true_study_size =
01081       (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
01082 
01083     re = (real_pcre *)new_malloc(true_size);
01084     regex_gotten_store = gotten_store;
01085 
01086     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
01087 
01088     magic = ((real_pcre *)re)->magic_number;
01089     if (magic != MAGIC_NUMBER)
01090       {
01091       if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
01092         {
01093         do_flip = 1;
01094         }
01095       else
01096         {
01097         fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
01098         fclose(f);
01099         continue;
01100         }
01101       }
01102 
01103     fprintf(outfile, "Compiled regex%s loaded from %s\n",
01104       do_flip? " (byte-inverted)" : "", p);
01105 
01106     /* Need to know if UTF-8 for printing data strings */
01107 
01108     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
01109     use_utf8 = (get_options & PCRE_UTF8) != 0;
01110 
01111     /* Now see if there is any following study data */
01112 
01113     if (true_study_size != 0)
01114       {
01115       pcre_study_data *psd;
01116 
01117       extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
01118       extra->flags = PCRE_EXTRA_STUDY_DATA;
01119 
01120       psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
01121       extra->study_data = psd;
01122 
01123       if (fread(psd, 1, true_study_size, f) != true_study_size)
01124         {
01125         FAIL_READ:
01126         fprintf(outfile, "Failed to read data from %s\n", p);
01127         if (extra != NULL) new_free(extra);
01128         if (re != NULL) new_free(re);
01129         fclose(f);
01130         continue;
01131         }
01132       fprintf(outfile, "Study data loaded from %s\n", p);
01133       do_study = 1;     /* To get the data output if requested */
01134       }
01135     else fprintf(outfile, "No study data\n");
01136 
01137     fclose(f);
01138     goto SHOW_INFO;
01139     }
01140 
01141   /* In-line pattern (the usual case). Get the delimiter and seek the end of
01142   the pattern; if is isn't complete, read more. */
01143 
01144   delimiter = *p++;
01145 
01146   if (isalnum(delimiter) || delimiter == '\\')
01147     {
01148     fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
01149     goto SKIP_DATA;
01150     }
01151 
01152   pp = p;
01153   poffset = p - buffer;
01154 
01155   for(;;)
01156     {
01157     while (*pp != 0)
01158       {
01159       if (*pp == '\\' && pp[1] != 0) pp++;
01160         else if (*pp == delimiter) break;
01161       pp++;
01162       }
01163     if (*pp != 0) break;
01164     if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
01165       {
01166       fprintf(outfile, "** Unexpected EOF\n");
01167       done = 1;
01168       goto CONTINUE;
01169       }
01170     if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
01171     }
01172 
01173   /* The buffer may have moved while being extended; reset the start of data
01174   pointer to the correct relative point in the buffer. */
01175 
01176   p = buffer + poffset;
01177 
01178   /* If the first character after the delimiter is backslash, make
01179   the pattern end with backslash. This is purely to provide a way
01180   of testing for the error message when a pattern ends with backslash. */
01181 
01182   if (pp[1] == '\\') *pp++ = '\\';
01183 
01184   /* Terminate the pattern at the delimiter, and save a copy of the pattern
01185   for callouts. */
01186 
01187   *pp++ = 0;
01188   strcpy((char *)pbuffer, (char *)p);
01189 
01190   /* Look for options after final delimiter */
01191 
01192   options = 0;
01193   study_options = 0;
01194   log_store = showstore;  /* default from command line */
01195 
01196   while (*pp != 0)
01197     {
01198     switch (*pp++)
01199       {
01200       case 'f': options |= PCRE_FIRSTLINE; break;
01201       case 'g': do_g = 1; break;
01202       case 'i': options |= PCRE_CASELESS; break;
01203       case 'm': options |= PCRE_MULTILINE; break;
01204       case 's': options |= PCRE_DOTALL; break;
01205       case 'x': options |= PCRE_EXTENDED; break;
01206 
01207       case '+': do_showrest = 1; break;
01208       case 'A': options |= PCRE_ANCHORED; break;
01209       case 'B': do_debug = 1; break;
01210       case 'C': options |= PCRE_AUTO_CALLOUT; break;
01211       case 'D': do_debug = do_showinfo = 1; break;
01212       case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
01213       case 'F': do_flip = 1; break;
01214       case 'G': do_G = 1; break;
01215       case 'I': do_showinfo = 1; break;
01216       case 'J': options |= PCRE_DUPNAMES; break;
01217       case 'M': log_store = 1; break;
01218       case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
01219 
01220 #if !defined NOPOSIX
01221       case 'P': do_posix = 1; break;
01222 #endif
01223 
01224       case 'S': do_study = 1; break;
01225       case 'U': options |= PCRE_UNGREEDY; break;
01226       case 'X': options |= PCRE_EXTRA; break;
01227       case 'Z': debug_lengths = 0; break;
01228       case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
01229       case '?': options |= PCRE_NO_UTF8_CHECK; break;
01230 
01231       case 'L':
01232       ppp = pp;
01233       /* The '\r' test here is so that it works on Windows. */
01234       /* The '0' test is just in case this is an unterminated line. */
01235       while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
01236       *ppp = 0;
01237       if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
01238         {
01239         fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
01240         goto SKIP_DATA;
01241         }
01242       locale_set = 1;
01243       tables = pcre_maketables();
01244       pp = ppp;
01245       break;
01246 
01247       case '>':
01248       to_file = pp;
01249       while (*pp != 0) pp++;
01250       while (isspace(pp[-1])) pp--;
01251       *pp = 0;
01252       break;
01253 
01254       case '<':
01255         {
01256         if (strncmp((char *)pp, "JS>", 3) == 0)
01257           {
01258           options |= PCRE_JAVASCRIPT_COMPAT;
01259           pp += 3;
01260           }
01261         else
01262           {
01263           int x = check_newline(pp, outfile);
01264           if (x == 0) goto SKIP_DATA;
01265           options |= x;
01266           while (*pp++ != '>');
01267           }
01268         }
01269       break;
01270 
01271       case '\r':                      /* So that it works in Windows */
01272       case '\n':
01273       case ' ':
01274       break;
01275 
01276       default:
01277       fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
01278       goto SKIP_DATA;
01279       }
01280     }
01281 
01282   /* Handle compiling via the POSIX interface, which doesn't support the
01283   timing, showing, or debugging options, nor the ability to pass over
01284   local character tables. */
01285 
01286 #if !defined NOPOSIX
01287   if (posix || do_posix)
01288     {
01289     int rc;
01290     int cflags = 0;
01291 
01292     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
01293     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
01294     if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
01295     if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
01296     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
01297 
01298     rc = regcomp(&preg, (char *)p, cflags);
01299 
01300     /* Compilation failed; go back for another re, skipping to blank line
01301     if non-interactive. */
01302 
01303     if (rc != 0)
01304       {
01305       (void)regerror(rc, &preg, (char *)buffer, buffer_size);
01306       fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
01307       goto SKIP_DATA;
01308       }
01309     }
01310 
01311   /* Handle compiling via the native interface */
01312 
01313   else
01314 #endif  /* !defined NOPOSIX */
01315 
01316     {
01317     if (timeit > 0)
01318       {
01319       register int i;
01320       clock_t time_taken;
01321       clock_t start_time = clock();
01322       for (i = 0; i < timeit; i++)
01323         {
01324         re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
01325         if (re != NULL) free(re);
01326         }
01327       time_taken = clock() - start_time;
01328       fprintf(outfile, "Compile time %.4f milliseconds\n",
01329         (((double)time_taken * 1000.0) / (double)timeit) /
01330           (double)CLOCKS_PER_SEC);
01331       }
01332 
01333     re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
01334 
01335     /* Compilation failed; go back for another re, skipping to blank line
01336     if non-interactive. */
01337 
01338     if (re == NULL)
01339       {
01340       fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
01341       SKIP_DATA:
01342       if (infile != stdin)
01343         {
01344         for (;;)
01345           {
01346           if (extend_inputline(infile, buffer, NULL) == NULL)
01347             {
01348             done = 1;
01349             goto CONTINUE;
01350             }
01351           len = (int)strlen((char *)buffer);
01352           while (len > 0 && isspace(buffer[len-1])) len--;
01353           if (len == 0) break;
01354           }
01355         fprintf(outfile, "\n");
01356         }
01357       goto CONTINUE;
01358       }
01359 
01360     /* Compilation succeeded; print data if required. There are now two
01361     info-returning functions. The old one has a limited interface and
01362     returns only limited data. Check that it agrees with the newer one. */
01363 
01364     if (log_store)
01365       fprintf(outfile, "Memory allocation (code space): %d\n",
01366         (int)(gotten_store -
01367               sizeof(real_pcre) -
01368               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
01369 
01370     /* Extract the size for possible writing before possibly flipping it,
01371     and remember the store that was got. */
01372 
01373     true_size = ((real_pcre *)re)->size;
01374     regex_gotten_store = gotten_store;
01375 
01376     /* If /S was present, study the regexp to generate additional info to
01377     help with the matching. */
01378 
01379     if (do_study)
01380       {
01381       if (timeit > 0)
01382         {
01383         register int i;
01384         clock_t time_taken;
01385         clock_t start_time = clock();
01386         for (i = 0; i < timeit; i++)
01387           extra = pcre_study(re, study_options, &error);
01388         time_taken = clock() - start_time;
01389         if (extra != NULL) free(extra);
01390         fprintf(outfile, "  Study time %.4f milliseconds\n",
01391           (((double)time_taken * 1000.0) / (double)timeit) /
01392             (double)CLOCKS_PER_SEC);
01393         }
01394       extra = pcre_study(re, study_options, &error);
01395       if (error != NULL)
01396         fprintf(outfile, "Failed to study: %s\n", error);
01397       else if (extra != NULL)
01398         true_study_size = ((pcre_study_data *)(extra->study_data))->size;
01399       }
01400 
01401     /* If the 'F' option was present, we flip the bytes of all the integer
01402     fields in the regex data block and the study block. This is to make it
01403     possible to test PCRE's handling of byte-flipped patterns, e.g. those
01404     compiled on a different architecture. */
01405 
01406     if (do_flip)
01407       {
01408       real_pcre *rre = (real_pcre *)re;
01409       rre->magic_number =
01410         byteflip(rre->magic_number, sizeof(rre->magic_number));
01411       rre->size = byteflip(rre->size, sizeof(rre->size));
01412       rre->options = byteflip(rre->options, sizeof(rre->options));
01413       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
01414       rre->top_bracket =
01415         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
01416       rre->top_backref =
01417         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
01418       rre->first_byte =
01419         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
01420       rre->req_byte =
01421         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
01422       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
01423         sizeof(rre->name_table_offset));
01424       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
01425         sizeof(rre->name_entry_size));
01426       rre->name_count = (pcre_uint16)byteflip(rre->name_count,
01427         sizeof(rre->name_count));
01428 
01429       if (extra != NULL)
01430         {
01431         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
01432         rsd->size = byteflip(rsd->size, sizeof(rsd->size));
01433         rsd->options = byteflip(rsd->options, sizeof(rsd->options));
01434         }
01435       }
01436 
01437     /* Extract information from the compiled data if required */
01438 
01439     SHOW_INFO:
01440 
01441     if (do_debug)
01442       {
01443       fprintf(outfile, "------------------------------------------------------------------\n");
01444       pcre_printint(re, outfile, debug_lengths);
01445       }
01446 
01447     if (do_showinfo)
01448       {
01449       unsigned long int get_options, all_options;
01450 #if !defined NOINFOCHECK
01451       int old_first_char, old_options, old_count;
01452 #endif
01453       int count, backrefmax, first_char, need_char, okpartial, jchanged,
01454         hascrorlf;
01455       int nameentrysize, namecount;
01456       const uschar *nametable;
01457 
01458       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
01459       new_info(re, NULL, PCRE_INFO_SIZE, &size);
01460       new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
01461       new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
01462       new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
01463       new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
01464       new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
01465       new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
01466       new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
01467       new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
01468       new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
01469       new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
01470 
01471 #if !defined NOINFOCHECK
01472       old_count = pcre_info(re, &old_options, &old_first_char);
01473       if (count < 0) fprintf(outfile,
01474         "Error %d from pcre_info()\n", count);
01475       else
01476         {
01477         if (old_count != count) fprintf(outfile,
01478           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
01479             old_count);
01480 
01481         if (old_first_char != first_char) fprintf(outfile,
01482           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
01483             first_char, old_first_char);
01484 
01485         if (old_options != (int)get_options) fprintf(outfile,
01486           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
01487             get_options, old_options);
01488         }
01489 #endif
01490 
01491       if (size != regex_gotten_store) fprintf(outfile,
01492         "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
01493         (int)size, (int)regex_gotten_store);
01494 
01495       fprintf(outfile, "Capturing subpattern count = %d\n", count);
01496       if (backrefmax > 0)
01497         fprintf(outfile, "Max back reference = %d\n", backrefmax);
01498 
01499       if (namecount > 0)
01500         {
01501         fprintf(outfile, "Named capturing subpatterns:\n");
01502         while (namecount-- > 0)
01503           {
01504           fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
01505             nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
01506             GET2(nametable, 0));
01507           nametable += nameentrysize;
01508           }
01509         }
01510 
01511       if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
01512       if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
01513 
01514       all_options = ((real_pcre *)re)->options;
01515       if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
01516 
01517       if (get_options == 0) fprintf(outfile, "No options\n");
01518         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
01519           ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
01520           ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
01521           ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
01522           ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
01523           ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
01524           ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
01525           ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
01526           ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
01527           ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
01528           ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
01529           ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
01530           ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
01531           ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
01532           ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
01533           ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
01534 
01535       if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
01536 
01537       switch (get_options & PCRE_NEWLINE_BITS)
01538         {
01539         case PCRE_NEWLINE_CR:
01540         fprintf(outfile, "Forced newline sequence: CR\n");
01541         break;
01542 
01543         case PCRE_NEWLINE_LF:
01544         fprintf(outfile, "Forced newline sequence: LF\n");
01545         break;
01546 
01547         case PCRE_NEWLINE_CRLF:
01548         fprintf(outfile, "Forced newline sequence: CRLF\n");
01549         break;
01550 
01551         case PCRE_NEWLINE_ANYCRLF:
01552         fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
01553         break;
01554 
01555         case PCRE_NEWLINE_ANY:
01556         fprintf(outfile, "Forced newline sequence: ANY\n");
01557         break;
01558 
01559         default:
01560         break;
01561         }
01562 
01563       if (first_char == -1)
01564         {
01565         fprintf(outfile, "First char at start or follows newline\n");
01566         }
01567       else if (first_char < 0)
01568         {
01569         fprintf(outfile, "No first char\n");
01570         }
01571       else
01572         {
01573         int ch = first_char & 255;
01574         const char *caseless = ((first_char & REQ_CASELESS) == 0)?
01575           "" : " (caseless)";
01576         if (PRINTHEX(ch))
01577           fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
01578         else
01579           fprintf(outfile, "First char = %d%s\n", ch, caseless);
01580         }
01581 
01582       if (need_char < 0)
01583         {
01584         fprintf(outfile, "No need char\n");
01585         }
01586       else
01587         {
01588         int ch = need_char & 255;
01589         const char *caseless = ((need_char & REQ_CASELESS) == 0)?
01590           "" : " (caseless)";
01591         if (PRINTHEX(ch))
01592           fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
01593         else
01594           fprintf(outfile, "Need char = %d%s\n", ch, caseless);
01595         }
01596 
01597       /* Don't output study size; at present it is in any case a fixed
01598       value, but it varies, depending on the computer architecture, and
01599       so messes up the test suite. (And with the /F option, it might be
01600       flipped.) */
01601 
01602       if (do_study)
01603         {
01604         if (extra == NULL)
01605           fprintf(outfile, "Study returned NULL\n");
01606         else
01607           {
01608           uschar *start_bits = NULL;
01609           new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
01610 
01611           if (start_bits == NULL)
01612             fprintf(outfile, "No starting byte set\n");
01613           else
01614             {
01615             int i;
01616             int c = 24;
01617             fprintf(outfile, "Starting byte set: ");
01618             for (i = 0; i < 256; i++)
01619               {
01620               if ((start_bits[i/8] & (1<<(i&7))) != 0)
01621                 {
01622                 if (c > 75)
01623                   {
01624                   fprintf(outfile, "\n  ");
01625                   c = 2;
01626                   }
01627                 if (PRINTHEX(i) && i != ' ')
01628                   {
01629                   fprintf(outfile, "%c ", i);
01630                   c += 2;
01631                   }
01632                 else
01633                   {
01634                   fprintf(outfile, "\\x%02x ", i);
01635                   c += 5;
01636                   }
01637                 }
01638               }
01639             fprintf(outfile, "\n");
01640             }
01641           }
01642         }
01643       }
01644 
01645     /* If the '>' option was present, we write out the regex to a file, and
01646     that is all. The first 8 bytes of the file are the regex length and then
01647     the study length, in big-endian order. */
01648 
01649     if (to_file != NULL)
01650       {
01651       FILE *f = fopen((char *)to_file, "wb");
01652       if (f == NULL)
01653         {
01654         fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
01655         }
01656       else
01657         {
01658         uschar sbuf[8];
01659         sbuf[0] = (uschar)((true_size >> 24) & 255);
01660         sbuf[1] = (uschar)((true_size >> 16) & 255);
01661         sbuf[2] = (uschar)((true_size >>  8) & 255);
01662         sbuf[3] = (uschar)((true_size) & 255);
01663 
01664         sbuf[4] = (uschar)((true_study_size >> 24) & 255);
01665         sbuf[5] = (uschar)((true_study_size >> 16) & 255);
01666         sbuf[6] = (uschar)((true_study_size >>  8) & 255);
01667         sbuf[7] = (uschar)((true_study_size) & 255);
01668 
01669         if (fwrite(sbuf, 1, 8, f) < 8 ||
01670             fwrite(re, 1, true_size, f) < true_size)
01671           {
01672           fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
01673           }
01674         else
01675           {
01676           fprintf(outfile, "Compiled regex written to %s\n", to_file);
01677           if (extra != NULL)
01678             {
01679             if (fwrite(extra->study_data, 1, true_study_size, f) <
01680                 true_study_size)
01681               {
01682               fprintf(outfile, "Write error on %s: %s\n", to_file,
01683                 strerror(errno));
01684               }
01685             else fprintf(outfile, "Study data written to %s\n", to_file);
01686 
01687             }
01688           }
01689         fclose(f);
01690         }
01691 
01692       new_free(re);
01693       if (extra != NULL) new_free(extra);
01694       if (tables != NULL) new_free((void *)tables);
01695       continue;  /* With next regex */
01696       }
01697     }        /* End of non-POSIX compile */
01698 
01699   /* Read data lines and test them */
01700 
01701   for (;;)
01702     {
01703     uschar *q;
01704     uschar *bptr;
01705     int *use_offsets = offsets;
01706     int use_size_offsets = size_offsets;
01707     int callout_data = 0;
01708     int callout_data_set = 0;
01709     int count, c;
01710     int copystrings = 0;
01711     int find_match_limit = 0;
01712     int getstrings = 0;
01713     int getlist = 0;
01714     int gmatched = 0;
01715     int start_offset = 0;
01716     int g_notempty = 0;
01717     int use_dfa = 0;
01718 
01719     options = 0;
01720 
01721     *copynames = 0;
01722     *getnames = 0;
01723 
01724     copynamesptr = copynames;
01725     getnamesptr = getnames;
01726 
01727     pcre_callout = callout;
01728     first_callout = 1;
01729     callout_extra = 0;
01730     callout_count = 0;
01731     callout_fail_count = 999999;
01732     callout_fail_id = -1;
01733     show_malloc = 0;
01734 
01735     if (extra != NULL) extra->flags &=
01736       ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
01737 
01738     len = 0;
01739     for (;;)
01740       {
01741       if (extend_inputline(infile, buffer + len, "data> ") == NULL)
01742         {
01743         if (len > 0) break;
01744         done = 1;
01745         goto CONTINUE;
01746         }
01747       if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
01748       len = (int)strlen((char *)buffer);
01749       if (buffer[len-1] == '\n') break;
01750       }
01751 
01752     while (len > 0 && isspace(buffer[len-1])) len--;
01753     buffer[len] = 0;
01754     if (len == 0) break;
01755 
01756     p = buffer;
01757     while (isspace(*p)) p++;
01758 
01759     bptr = q = dbuffer;
01760     while ((c = *p++) != 0)
01761       {
01762       int i = 0;
01763       int n = 0;
01764 
01765       if (c == '\\') switch ((c = *p++))
01766         {
01767         case 'a': c =    7; break;
01768         case 'b': c = '\b'; break;
01769         case 'e': c =   27; break;
01770         case 'f': c = '\f'; break;
01771         case 'n': c = '\n'; break;
01772         case 'r': c = '\r'; break;
01773         case 't': c = '\t'; break;
01774         case 'v': c = '\v'; break;
01775 
01776         case '0': case '1': case '2': case '3':
01777         case '4': case '5': case '6': case '7':
01778         c -= '0';
01779         while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
01780           c = c * 8 + *p++ - '0';
01781 
01782 #if !defined NOUTF8
01783         if (use_utf8 && c > 255)
01784           {
01785           unsigned char buff8[8];
01786           int ii, utn;
01787           utn = ord2utf8(c, buff8);
01788           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
01789           c = buff8[ii];   /* Last byte */
01790           }
01791 #endif
01792         break;
01793 
01794         case 'x':
01795 
01796         /* Handle \x{..} specially - new Perl thing for utf8 */
01797 
01798 #if !defined NOUTF8
01799         if (*p == '{')
01800           {
01801           unsigned char *pt = p;
01802           c = 0;
01803           while (isxdigit(*(++pt)))
01804             c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
01805           if (*pt == '}')
01806             {
01807             unsigned char buff8[8];
01808             int ii, utn;
01809             if (use_utf8)
01810               {
01811               utn = ord2utf8(c, buff8);
01812               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
01813               c = buff8[ii];   /* Last byte */
01814               }
01815             else
01816              {
01817              if (c > 255)
01818                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
01819                  "UTF-8 mode is not enabled.\n"
01820                  "** Truncation will probably give the wrong result.\n", c);
01821              }
01822             p = pt + 1;
01823             break;
01824             }
01825           /* Not correct form; fall through */
01826           }
01827 #endif
01828 
01829         /* Ordinary \x */
01830 
01831         c = 0;
01832         while (i++ < 2 && isxdigit(*p))
01833           {
01834           c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
01835           p++;
01836           }
01837         break;
01838 
01839         case 0:   /* \ followed by EOF allows for an empty line */
01840         p--;
01841         continue;
01842 
01843         case '>':
01844         while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
01845         continue;
01846 
01847         case 'A':  /* Option setting */
01848         options |= PCRE_ANCHORED;
01849         continue;
01850 
01851         case 'B':
01852         options |= PCRE_NOTBOL;
01853         continue;
01854 
01855         case 'C':
01856         if (isdigit(*p))    /* Set copy string */
01857           {
01858           while(isdigit(*p)) n = n * 10 + *p++ - '0';
01859           copystrings |= 1 << n;
01860           }
01861         else if (isalnum(*p))
01862           {
01863           uschar *npp = copynamesptr;
01864           while (isalnum(*p)) *npp++ = *p++;
01865           *npp++ = 0;
01866           *npp = 0;
01867           n = pcre_get_stringnumber(re, (char *)copynamesptr);
01868           if (n < 0)
01869             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
01870           copynamesptr = npp;
01871           }
01872         else if (*p == '+')
01873           {
01874           callout_extra = 1;
01875           p++;
01876           }
01877         else if (*p == '-')
01878           {
01879           pcre_callout = NULL;
01880           p++;
01881           }
01882         else if (*p == '!')
01883           {
01884           callout_fail_id = 0;
01885           p++;
01886           while(isdigit(*p))
01887             callout_fail_id = callout_fail_id * 10 + *p++ - '0';
01888           callout_fail_count = 0;
01889           if (*p == '!')
01890             {
01891             p++;
01892             while(isdigit(*p))
01893               callout_fail_count = callout_fail_count * 10 + *p++ - '0';
01894             }
01895           }
01896         else if (*p == '*')
01897           {
01898           int sign = 1;
01899           callout_data = 0;
01900           if (*(++p) == '-') { sign = -1; p++; }
01901           while(isdigit(*p))
01902             callout_data = callout_data * 10 + *p++ - '0';
01903           callout_data *= sign;
01904           callout_data_set = 1;
01905           }
01906         continue;
01907 
01908 #if !defined NODFA
01909         case 'D':
01910 #if !defined NOPOSIX
01911         if (posix || do_posix)
01912           printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
01913         else
01914 #endif
01915           use_dfa = 1;
01916         continue;
01917 
01918         case 'F':
01919         options |= PCRE_DFA_SHORTEST;
01920         continue;
01921 #endif
01922 
01923         case 'G':
01924         if (isdigit(*p))
01925           {
01926           while(isdigit(*p)) n = n * 10 + *p++ - '0';
01927           getstrings |= 1 << n;
01928           }
01929         else if (isalnum(*p))
01930           {
01931           uschar *npp = getnamesptr;
01932           while (isalnum(*p)) *npp++ = *p++;
01933           *npp++ = 0;
01934           *npp = 0;
01935           n = pcre_get_stringnumber(re, (char *)getnamesptr);
01936           if (n < 0)
01937             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
01938           getnamesptr = npp;
01939           }
01940         continue;
01941 
01942         case 'L':
01943         getlist = 1;
01944         continue;
01945 
01946         case 'M':
01947         find_match_limit = 1;
01948         continue;
01949 
01950         case 'N':
01951         options |= PCRE_NOTEMPTY;
01952         continue;
01953 
01954         case 'O':
01955         while(isdigit(*p)) n = n * 10 + *p++ - '0';
01956         if (n > size_offsets_max)
01957           {
01958           size_offsets_max = n;
01959           free(offsets);
01960           use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
01961           if (offsets == NULL)
01962             {
01963             printf("** Failed to get %d bytes of memory for offsets vector\n",
01964               (int)(size_offsets_max * sizeof(int)));
01965             yield = 1;
01966             goto EXIT;
01967             }
01968           }
01969         use_size_offsets = n;
01970         if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
01971         continue;
01972 
01973         case 'P':
01974         options |= PCRE_PARTIAL;
01975         continue;
01976 
01977         case 'Q':
01978         while(isdigit(*p)) n = n * 10 + *p++ - '0';
01979         if (extra == NULL)
01980           {
01981           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
01982           extra->flags = 0;
01983           }
01984         extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
01985         extra->match_limit_recursion = n;
01986         continue;
01987 
01988         case 'q':
01989         while(isdigit(*p)) n = n * 10 + *p++ - '0';
01990         if (extra == NULL)
01991           {
01992           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
01993           extra->flags = 0;
01994           }
01995         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
01996         extra->match_limit = n;
01997         continue;
01998 
01999 #if !defined NODFA
02000         case 'R':
02001         options |= PCRE_DFA_RESTART;
02002         continue;
02003 #endif
02004 
02005         case 'S':
02006         show_malloc = 1;
02007         continue;
02008 
02009         case 'Z':
02010         options |= PCRE_NOTEOL;
02011         continue;
02012 
02013         case '?':
02014         options |= PCRE_NO_UTF8_CHECK;
02015         continue;
02016 
02017         case '<':
02018           {
02019           int x = check_newline(p, outfile);
02020           if (x == 0) goto NEXT_DATA;
02021           options |= x;
02022           while (*p++ != '>');
02023           }
02024         continue;
02025         }
02026       *q++ = c;
02027       }
02028     *q = 0;
02029     len = q - dbuffer;
02030 
02031     /* Move the data to the end of the buffer so that a read over the end of
02032     the buffer will be seen by valgrind, even if it doesn't cause a crash. If
02033     we are using the POSIX interface, we must include the terminating zero. */
02034 
02035 #if !defined NOPOSIX
02036     if (posix || do_posix)
02037       {
02038       memmove(bptr + buffer_size - len - 1, bptr, len + 1);
02039       bptr += buffer_size - len - 1;
02040       }
02041     else
02042 #endif
02043       {
02044       memmove(bptr + buffer_size - len, bptr, len);
02045       bptr += buffer_size - len;
02046       }
02047 
02048     if ((all_use_dfa || use_dfa) && find_match_limit)
02049       {
02050       printf("**Match limit not relevant for DFA matching: ignored\n");
02051       find_match_limit = 0;
02052       }
02053 
02054     /* Handle matching via the POSIX interface, which does not
02055     support timing or playing with the match limit or callout data. */
02056 
02057 #if !defined NOPOSIX
02058     if (posix || do_posix)
02059       {
02060       int rc;
02061       int eflags = 0;
02062       regmatch_t *pmatch = NULL;
02063       if (use_size_offsets > 0)
02064         pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
02065       if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
02066       if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
02067 
02068       rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
02069 
02070       if (rc != 0)
02071         {
02072         (void)regerror(rc, &preg, (char *)buffer, buffer_size);
02073         fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
02074         }
02075       else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
02076               != 0)
02077         {
02078         fprintf(outfile, "Matched with REG_NOSUB\n");
02079         }
02080       else
02081         {
02082         size_t i;
02083         for (i = 0; i < (size_t)use_size_offsets; i++)
02084           {
02085           if (pmatch[i].rm_so >= 0)
02086             {
02087             fprintf(outfile, "%2d: ", (int)i);
02088             (void)pchars(dbuffer + pmatch[i].rm_so,
02089               pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
02090             fprintf(outfile, "\n");
02091             if (i == 0 && do_showrest)
02092               {
02093               fprintf(outfile, " 0+ ");
02094               (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
02095                 outfile);
02096               fprintf(outfile, "\n");
02097               }
02098             }
02099           }
02100         }
02101       free(pmatch);
02102       }
02103 
02104     /* Handle matching via the native interface - repeats for /g and /G */
02105 
02106     else
02107 #endif  /* !defined NOPOSIX */
02108 
02109     for (;; gmatched++)    /* Loop for /g or /G */
02110       {
02111       if (timeitm > 0)
02112         {
02113         register int i;
02114         clock_t time_taken;
02115         clock_t start_time = clock();
02116 
02117 #if !defined NODFA
02118         if (all_use_dfa || use_dfa)
02119           {
02120           int workspace[1000];
02121           for (i = 0; i < timeitm; i++)
02122             count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
02123               options | g_notempty, use_offsets, use_size_offsets, workspace,
02124               sizeof(workspace)/sizeof(int));
02125           }
02126         else
02127 #endif
02128 
02129         for (i = 0; i < timeitm; i++)
02130           count = pcre_exec(re, extra, (char *)bptr, len,
02131             start_offset, options | g_notempty, use_offsets, use_size_offsets);
02132 
02133         time_taken = clock() - start_time;
02134         fprintf(outfile, "Execute time %.4f milliseconds\n",
02135           (((double)time_taken * 1000.0) / (double)timeitm) /
02136             (double)CLOCKS_PER_SEC);
02137         }
02138 
02139       /* If find_match_limit is set, we want to do repeated matches with
02140       varying limits in order to find the minimum value for the match limit and
02141       for the recursion limit. */
02142 
02143       if (find_match_limit)
02144         {
02145         if (extra == NULL)
02146           {
02147           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
02148           extra->flags = 0;
02149           }
02150 
02151         (void)check_match_limit(re, extra, bptr, len, start_offset,
02152           options|g_notempty, use_offsets, use_size_offsets,
02153           PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
02154           PCRE_ERROR_MATCHLIMIT, "match()");
02155 
02156         count = check_match_limit(re, extra, bptr, len, start_offset,
02157           options|g_notempty, use_offsets, use_size_offsets,
02158           PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
02159           PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
02160         }
02161 
02162       /* If callout_data is set, use the interface with additional data */
02163 
02164       else if (callout_data_set)
02165         {
02166         if (extra == NULL)
02167           {
02168           extra = (pcre_extra *)malloc(sizeof(pcre_extra));
02169           extra->flags = 0;
02170           }
02171         extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
02172         extra->callout_data = &callout_data;
02173         count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
02174           options | g_notempty, use_offsets, use_size_offsets);
02175         extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
02176         }
02177 
02178       /* The normal case is just to do the match once, with the default
02179       value of match_limit. */
02180 
02181 #if !defined NODFA
02182       else if (all_use_dfa || use_dfa)
02183         {
02184         int workspace[1000];
02185         count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
02186           options | g_notempty, use_offsets, use_size_offsets, workspace,
02187           sizeof(workspace)/sizeof(int));
02188         if (count == 0)
02189           {
02190           fprintf(outfile, "Matched, but too many subsidiary matches\n");
02191           count = use_size_offsets/2;
02192           }
02193         }
02194 #endif
02195 
02196       else
02197         {
02198         count = pcre_exec(re, extra, (char *)bptr, len,
02199           start_offset, options | g_notempty, use_offsets, use_size_offsets);
02200         if (count == 0)
02201           {
02202           fprintf(outfile, "Matched, but too many substrings\n");
02203           count = use_size_offsets/3;
02204           }
02205         }
02206 
02207       /* Matched */
02208 
02209       if (count >= 0)
02210         {
02211         int i, maxcount;
02212 
02213 #if !defined NODFA
02214         if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
02215 #endif
02216           maxcount = use_size_offsets/3;
02217 
02218         /* This is a check against a lunatic return value. */
02219 
02220         if (count > maxcount)
02221           {
02222           fprintf(outfile,
02223             "** PCRE error: returned count %d is too big for offset size %d\n",
02224             count, use_size_offsets);
02225           count = use_size_offsets/3;
02226           if (do_g || do_G)
02227             {
02228             fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
02229             do_g = do_G = FALSE;        /* Break g/G loop */
02230             }
02231           }
02232 
02233         for (i = 0; i < count * 2; i += 2)
02234           {
02235           if (use_offsets[i] < 0)
02236             fprintf(outfile, "%2d: <unset>\n", i/2);
02237           else
02238             {
02239             fprintf(outfile, "%2d: ", i/2);
02240             (void)pchars(bptr + use_offsets[i],
02241               use_offsets[i+1] - use_offsets[i], outfile);
02242             fprintf(outfile, "\n");
02243             if (i == 0)
02244               {
02245               if (do_showrest)
02246                 {
02247                 fprintf(outfile, " 0+ ");
02248                 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
02249                   outfile);
02250                 fprintf(outfile, "\n");
02251                 }
02252               }
02253             }
02254           }
02255 
02256         for (i = 0; i < 32; i++)
02257           {
02258           if ((copystrings & (1 << i)) != 0)
02259             {
02260             char copybuffer[256];
02261             int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
02262               i, copybuffer, sizeof(copybuffer));
02263             if (rc < 0)
02264               fprintf(outfile, "copy substring %d failed %d\n", i, rc);
02265             else
02266               fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
02267             }
02268           }
02269 
02270         for (copynamesptr = copynames;
02271              *copynamesptr != 0;
02272              copynamesptr += (int)strlen((char*)copynamesptr) + 1)
02273           {
02274           char copybuffer[256];
02275           int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
02276             count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
02277           if (rc < 0)
02278             fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
02279           else
02280             fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
02281           }
02282 
02283         for (i = 0; i < 32; i++)
02284           {
02285           if ((getstrings & (1 << i)) != 0)
02286             {
02287             const char *substring;
02288             int rc = pcre_get_substring((char *)bptr, use_offsets, count,
02289               i, &substring);
02290             if (rc < 0)
02291               fprintf(outfile, "get substring %d failed %d\n", i, rc);
02292             else
02293               {
02294               fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
02295               pcre_free_substring(substring);
02296               }
02297             }
02298           }
02299 
02300         for (getnamesptr = getnames;
02301              *getnamesptr != 0;
02302              getnamesptr += (int)strlen((char*)getnamesptr) + 1)
02303           {
02304           const char *substring;
02305           int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
02306             count, (char *)getnamesptr, &substring);
02307           if (rc < 0)
02308             fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
02309           else
02310             {
02311             fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
02312             pcre_free_substring(substring);
02313             }
02314           }
02315 
02316         if (getlist)
02317           {
02318           const char **stringlist;
02319           int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
02320             &stringlist);
02321           if (rc < 0)
02322             fprintf(outfile, "get substring list failed %d\n", rc);
02323           else
02324             {
02325             for (i = 0; i < count; i++)
02326               fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
02327             if (stringlist[i] != NULL)
02328               fprintf(outfile, "string list not terminated by NULL\n");
02329             /* free((void *)stringlist); */
02330             pcre_free_substring_list(stringlist);
02331             }
02332           }
02333         }
02334 
02335       /* There was a partial match */
02336 
02337       else if (count == PCRE_ERROR_PARTIAL)
02338         {
02339         fprintf(outfile, "Partial match");
02340 #if !defined NODFA
02341         if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
02342           fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
02343             bptr + use_offsets[0]);
02344 #endif
02345         fprintf(outfile, "\n");
02346         break;  /* Out of the /g loop */
02347         }
02348 
02349       /* Failed to match. If this is a /g or /G loop and we previously set
02350       g_notempty after a null match, this is not necessarily the end. We want
02351       to advance the start offset, and continue. We won't be at the end of the
02352       string - that was checked before setting g_notempty.
02353 
02354       Complication arises in the case when the newline option is "any" or
02355       "anycrlf". If the previous match was at the end of a line terminated by
02356       CRLF, an advance of one character just passes the \r, whereas we should
02357       prefer the longer newline sequence, as does the code in pcre_exec().
02358       Fudge the offset value to achieve this.
02359 
02360       Otherwise, in the case of UTF-8 matching, the advance must be one
02361       character, not one byte. */
02362 
02363       else
02364         {
02365         if (g_notempty != 0)
02366           {
02367           int onechar = 1;
02368           unsigned int obits = ((real_pcre *)re)->options;
02369           use_offsets[0] = start_offset;
02370           if ((obits & PCRE_NEWLINE_BITS) == 0)
02371             {
02372             int d;
02373             (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
02374             obits = (d == '\r')? PCRE_NEWLINE_CR :
02375                     (d == '\n')? PCRE_NEWLINE_LF :
02376                     (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
02377                     (d == -2)? PCRE_NEWLINE_ANYCRLF :
02378                     (d == -1)? PCRE_NEWLINE_ANY : 0;
02379             }
02380           if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
02381                (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
02382               &&
02383               start_offset < len - 1 &&
02384               bptr[start_offset] == '\r' &&
02385               bptr[start_offset+1] == '\n')
02386             onechar++;
02387           else if (use_utf8)
02388             {
02389             while (start_offset + onechar < len)
02390               {
02391               int tb = bptr[start_offset+onechar];
02392               if (tb <= 127) break;
02393               tb &= 0xc0;
02394               if (tb != 0 && tb != 0xc0) onechar++;
02395               }
02396             }
02397           use_offsets[1] = start_offset + onechar;
02398           }
02399         else
02400           {
02401           if (count == PCRE_ERROR_NOMATCH)
02402             {
02403             if (gmatched == 0) fprintf(outfile, "No match\n");
02404             }
02405           else fprintf(outfile, "Error %d\n", count);
02406           break;  /* Out of the /g loop */
02407           }
02408         }
02409 
02410       /* If not /g or /G we are done */
02411 
02412       if (!do_g && !do_G) break;
02413 
02414       /* If we have matched an empty string, first check to see if we are at
02415       the end of the subject. If so, the /g loop is over. Otherwise, mimic
02416       what Perl's /g options does. This turns out to be rather cunning. First
02417       we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
02418       same point. If this fails (picked up above) we advance to the next
02419       character. */
02420 
02421       g_notempty = 0;
02422 
02423       if (use_offsets[0] == use_offsets[1])
02424         {
02425         if (use_offsets[0] == len) break;
02426         g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
02427         }
02428 
02429       /* For /g, update the start offset, leaving the rest alone */
02430 
02431       if (do_g) start_offset = use_offsets[1];
02432 
02433       /* For /G, update the pointer and length */
02434 
02435       else
02436         {
02437         bptr += use_offsets[1];
02438         len -= use_offsets[1];
02439         }
02440       }  /* End of loop for /g and /G */
02441 
02442     NEXT_DATA: continue;
02443     }    /* End of loop for data lines */
02444 
02445   CONTINUE:
02446 
02447 #if !defined NOPOSIX
02448   if (posix || do_posix) regfree(&preg);
02449 #endif
02450 
02451   if (re != NULL) new_free(re);
02452   if (extra != NULL) new_free(extra);
02453   if (tables != NULL)
02454     {
02455     new_free((void *)tables);
02456     setlocale(LC_CTYPE, "C");
02457     locale_set = 0;
02458     }
02459   }
02460 
02461 if (infile == stdin) fprintf(outfile, "\n");
02462 
02463 EXIT:
02464 
02465 if (infile != NULL && infile != stdin) fclose(infile);
02466 if (outfile != NULL && outfile != stdout) fclose(outfile);
02467 
02468 free(buffer);
02469 free(dbuffer);
02470 free(pbuffer);
02471 free(offsets);
02472 
02473 return yield;
02474 }
02475 
02476 /* End of pcretest.c */

Generated on Tue Jul 5 14:11:58 2011 for ROOT_528-00b_version by  doxygen 1.5.1