pcre_exec.c

Go to the documentation of this file.
00001 /*************************************************
00002 *      Perl-Compatible Regular Expressions       *
00003 *************************************************/
00004 
00005 /* PCRE is a library of functions to support regular expressions whose syntax
00006 and semantics are as close as possible to those of the Perl 5 language.
00007 
00008                        Written by Philip Hazel
00009            Copyright (c) 1997-2008 University of Cambridge
00010 
00011 -----------------------------------------------------------------------------
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014 
00015     * Redistributions of source code must retain the above copyright notice,
00016       this list of conditions and the following disclaimer.
00017 
00018     * Redistributions in binary form must reproduce the above copyright
00019       notice, this list of conditions and the following disclaimer in the
00020       documentation and/or other materials provided with the distribution.
00021 
00022     * Neither the name of the University of Cambridge nor the names of its
00023       contributors may be used to endorse or promote products derived from
00024       this software without specific prior written permission.
00025 
00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00036 POSSIBILITY OF SUCH DAMAGE.
00037 -----------------------------------------------------------------------------
00038 */
00039 
00040 
00041 /* This module contains pcre_exec(), the externally visible function that does
00042 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
00043 possible. There are also some static supporting functions. */
00044 
00045 #ifdef HAVE_CONFIG_H
00046 #include "config.h"
00047 #endif
00048 
00049 #define NLBLOCK md             /* Block containing newline information */
00050 #define PSSTART start_subject  /* Field containing processed string start */
00051 #define PSEND   end_subject    /* Field containing processed string end */
00052 
00053 #include "pcre_internal.h"
00054 
00055 /* Undefine some potentially clashing cpp symbols */
00056 
00057 #undef min
00058 #undef max
00059 
00060 /* Flag bits for the match() function */
00061 
00062 #define match_condassert     0x01  /* Called to check a condition assertion */
00063 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
00064 
00065 /* Non-error returns from the match() function. Error returns are externally
00066 defined PCRE_ERROR_xxx codes, which are all negative. */
00067 
00068 #define MATCH_MATCH        1
00069 #define MATCH_NOMATCH      0
00070 
00071 /* Special internal returns from the match() function. Make them sufficiently
00072 negative to avoid the external error codes. */
00073 
00074 #define MATCH_COMMIT       (-999)
00075 #define MATCH_PRUNE        (-998)
00076 #define MATCH_SKIP         (-997)
00077 #define MATCH_THEN         (-996)
00078 
00079 /* Maximum number of ints of offset to save on the stack for recursive calls.
00080 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
00081 because the offset vector is always a multiple of 3 long. */
00082 
00083 #define REC_STACK_SAVE_MAX 30
00084 
00085 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
00086 
00087 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
00088 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
00089 
00090 
00091 
00092 #ifdef DEBUG
00093 /*************************************************
00094 *        Debugging function to print chars       *
00095 *************************************************/
00096 
00097 /* Print a sequence of chars in printable format, stopping at the end of the
00098 subject if the requested.
00099 
00100 Arguments:
00101   p           points to characters
00102   length      number to print
00103   is_subject  TRUE if printing from within md->start_subject
00104   md          pointer to matching data block, if is_subject is TRUE
00105 
00106 Returns:     nothing
00107 */
00108 
00109 static void
00110 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
00111 {
00112 unsigned int c;
00113 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
00114 while (length-- > 0)
00115   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
00116 }
00117 #endif
00118 
00119 
00120 
00121 /*************************************************
00122 *          Match a back-reference                *
00123 *************************************************/
00124 
00125 /* If a back reference hasn't been set, the length that is passed is greater
00126 than the number of characters left in the string, so the match fails.
00127 
00128 Arguments:
00129   offset      index into the offset vector
00130   eptr        points into the subject
00131   length      length to be matched
00132   md          points to match data block
00133   ims         the ims flags
00134 
00135 Returns:      TRUE if matched
00136 */
00137 
00138 static BOOL
00139 match_ref(int offset, register USPTR eptr, int length, match_data *md,
00140   unsigned long int ims)
00141 {
00142 USPTR p = md->start_subject + md->offset_vector[offset];
00143 
00144 #ifdef DEBUG
00145 if (eptr >= md->end_subject)
00146   printf("matching subject <null>");
00147 else
00148   {
00149   printf("matching subject ");
00150   pchars(eptr, length, TRUE, md);
00151   }
00152 printf(" against backref ");
00153 pchars(p, length, FALSE, md);
00154 printf("\n");
00155 #endif
00156 
00157 /* Always fail if not enough characters left */
00158 
00159 if (length > md->end_subject - eptr) return FALSE;
00160 
00161 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
00162 properly if Unicode properties are supported. Otherwise, we can check only
00163 ASCII characters. */
00164 
00165 if ((ims & PCRE_CASELESS) != 0)
00166   {
00167 #ifdef SUPPORT_UTF8
00168 #ifdef SUPPORT_UCP
00169   if (md->utf8)
00170     {
00171     USPTR endptr = eptr + length;
00172     while (eptr < endptr)
00173       {
00174       int c, d;
00175       GETCHARINC(c, eptr);
00176       GETCHARINC(d, p);
00177       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
00178       }
00179     }
00180   else
00181 #endif
00182 #endif
00183 
00184   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
00185   is no UCP support. */
00186 
00187   while (length-- > 0)
00188     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
00189   }
00190 
00191 /* In the caseful case, we can just compare the bytes, whether or not we
00192 are in UTF-8 mode. */
00193 
00194 else
00195   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
00196 
00197 return TRUE;
00198 }
00199 
00200 
00201 
00202 /***************************************************************************
00203 ****************************************************************************
00204                    RECURSION IN THE match() FUNCTION
00205 
00206 The match() function is highly recursive, though not every recursive call
00207 increases the recursive depth. Nevertheless, some regular expressions can cause
00208 it to recurse to a great depth. I was writing for Unix, so I just let it call
00209 itself recursively. This uses the stack for saving everything that has to be
00210 saved for a recursive call. On Unix, the stack can be large, and this works
00211 fine.
00212 
00213 It turns out that on some non-Unix-like systems there are problems with
00214 programs that use a lot of stack. (This despite the fact that every last chip
00215 has oodles of memory these days, and techniques for extending the stack have
00216 been known for decades.) So....
00217 
00218 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
00219 calls by keeping local variables that need to be preserved in blocks of memory
00220 obtained from malloc() instead instead of on the stack. Macros are used to
00221 achieve this so that the actual code doesn't look very different to what it
00222 always used to.
00223 
00224 The original heap-recursive code used longjmp(). However, it seems that this
00225 can be very slow on some operating systems. Following a suggestion from Stan
00226 Switzer, the use of longjmp() has been abolished, at the cost of having to
00227 provide a unique number for each call to RMATCH. There is no way of generating
00228 a sequence of numbers at compile time in C. I have given them names, to make
00229 them stand out more clearly.
00230 
00231 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
00232 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
00233 tests. Furthermore, not using longjmp() means that local dynamic variables
00234 don't have indeterminate values; this has meant that the frame size can be
00235 reduced because the result can be "passed back" by straight setting of the
00236 variable instead of being passed in the frame.
00237 ****************************************************************************
00238 ***************************************************************************/
00239 
00240 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
00241 below must be updated in sync.  */
00242 
00243 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
00244        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
00245        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
00246        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
00247        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
00248        RM51,  RM52, RM53, RM54 };
00249 
00250 /* These versions of the macros use the stack, as normal. There are debugging
00251 versions and production versions. Note that the "rw" argument of RMATCH isn't
00252 actuall used in this definition. */
00253 
00254 #ifndef NO_RECURSE
00255 #define REGISTER register
00256 
00257 #ifdef DEBUG
00258 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
00259   { \
00260   printf("match() called in line %d\n", __LINE__); \
00261   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
00262   printf("to line %d\n", __LINE__); \
00263   }
00264 #define RRETURN(ra) \
00265   { \
00266   printf("match() returned %d from line %d ", ra, __LINE__); \
00267   return ra; \
00268   }
00269 #else
00270 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
00271   rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
00272 #define RRETURN(ra) return ra
00273 #endif
00274 
00275 #else
00276 
00277 
00278 /* These versions of the macros manage a private stack on the heap. Note that
00279 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
00280 argument of match(), which never changes. */
00281 
00282 #define REGISTER
00283 
00284 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
00285   {\
00286   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
00287   frame->Xwhere = rw; \
00288   newframe->Xeptr = ra;\
00289   newframe->Xecode = rb;\
00290   newframe->Xmstart = mstart;\
00291   newframe->Xoffset_top = rc;\
00292   newframe->Xims = re;\
00293   newframe->Xeptrb = rf;\
00294   newframe->Xflags = rg;\
00295   newframe->Xrdepth = frame->Xrdepth + 1;\
00296   newframe->Xprevframe = frame;\
00297   frame = newframe;\
00298   DPRINTF(("restarting from line %d\n", __LINE__));\
00299   goto HEAP_RECURSE;\
00300   L_##rw:\
00301   DPRINTF(("jumped back to line %d\n", __LINE__));\
00302   }
00303 
00304 #define RRETURN(ra)\
00305   {\
00306   heapframe *newframe = frame;\
00307   frame = newframe->Xprevframe;\
00308   (pcre_stack_free)(newframe);\
00309   if (frame != NULL)\
00310     {\
00311     rrc = ra;\
00312     goto HEAP_RETURN;\
00313     }\
00314   return ra;\
00315   }
00316 
00317 
00318 /* Structure for remembering the local variables in a private frame */
00319 
00320 typedef struct heapframe {
00321   struct heapframe *Xprevframe;
00322 
00323   /* Function arguments that may change */
00324 
00325   const uschar *Xeptr;
00326   const uschar *Xecode;
00327   const uschar *Xmstart;
00328   int Xoffset_top;
00329   long int Xims;
00330   eptrblock *Xeptrb;
00331   int Xflags;
00332   unsigned int Xrdepth;
00333 
00334   /* Function local variables */
00335 
00336   const uschar *Xcallpat;
00337   const uschar *Xcharptr;
00338   const uschar *Xdata;
00339   const uschar *Xnext;
00340   const uschar *Xpp;
00341   const uschar *Xprev;
00342   const uschar *Xsaved_eptr;
00343 
00344   recursion_info Xnew_recursive;
00345 
00346   BOOL Xcur_is_word;
00347   BOOL Xcondition;
00348   BOOL Xprev_is_word;
00349 
00350   unsigned long int Xoriginal_ims;
00351 
00352 #ifdef SUPPORT_UCP
00353   int Xprop_type;
00354   int Xprop_value;
00355   int Xprop_fail_result;
00356   int Xprop_category;
00357   int Xprop_chartype;
00358   int Xprop_script;
00359   int Xoclength;
00360   uschar Xocchars[8];
00361 #endif
00362 
00363   int Xctype;
00364   unsigned int Xfc;
00365   int Xfi;
00366   int Xlength;
00367   int Xmax;
00368   int Xmin;
00369   int Xnumber;
00370   int Xoffset;
00371   int Xop;
00372   int Xsave_capture_last;
00373   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
00374   int Xstacksave[REC_STACK_SAVE_MAX];
00375 
00376   eptrblock Xnewptrb;
00377 
00378   /* Where to jump back to */
00379 
00380   int Xwhere;
00381 
00382 } heapframe;
00383 
00384 #endif
00385 
00386 
00387 /***************************************************************************
00388 ***************************************************************************/
00389 
00390 
00391 
00392 /*************************************************
00393 *         Match from current position            *
00394 *************************************************/
00395 
00396 /* This function is called recursively in many circumstances. Whenever it
00397 returns a negative (error) response, the outer incarnation must also return the
00398 same response.
00399 
00400 Performance note: It might be tempting to extract commonly used fields from the
00401 md structure (e.g. utf8, end_subject) into individual variables to improve
00402 performance. Tests using gcc on a SPARC disproved this; in the first case, it
00403 made performance worse.
00404 
00405 Arguments:
00406    eptr        pointer to current character in subject
00407    ecode       pointer to current position in compiled code
00408    mstart      pointer to the current match start position (can be modified
00409                  by encountering \K)
00410    offset_top  current top pointer
00411    md          pointer to "static" info for the match
00412    ims         current /i, /m, and /s options
00413    eptrb       pointer to chain of blocks containing eptr at start of
00414                  brackets - for testing for empty matches
00415    flags       can contain
00416                  match_condassert - this is an assertion condition
00417                  match_cbegroup - this is the start of an unlimited repeat
00418                    group that can match an empty string
00419    rdepth      the recursion depth
00420 
00421 Returns:       MATCH_MATCH if matched            )  these values are >= 0
00422                MATCH_NOMATCH if failed to match  )
00423                a negative PCRE_ERROR_xxx value if aborted by an error condition
00424                  (e.g. stopped by repeated call or recursion limit)
00425 */
00426 
00427 static int
00428 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
00429   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
00430   int flags, unsigned int rdepth)
00431 {
00432 /* These variables do not need to be preserved over recursion in this function,
00433 so they can be ordinary variables in all cases. Mark some of them with
00434 "register" because they are used a lot in loops. */
00435 
00436 register int  rrc;         /* Returns from recursive calls */
00437 register int  i;           /* Used for loops not involving calls to RMATCH() */
00438 register unsigned int c;   /* Character values not kept over RMATCH() calls */
00439 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
00440 
00441 BOOL minimize, possessive; /* Quantifier options */
00442 
00443 /* When recursion is not being used, all "local" variables that have to be
00444 preserved over calls to RMATCH() are part of a "frame" which is obtained from
00445 heap storage. Set up the top-level frame here; others are obtained from the
00446 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
00447 
00448 #ifdef NO_RECURSE
00449 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
00450 frame->Xprevframe = NULL;            /* Marks the top level */
00451 
00452 /* Copy in the original argument variables */
00453 
00454 frame->Xeptr = eptr;
00455 frame->Xecode = ecode;
00456 frame->Xmstart = mstart;
00457 frame->Xoffset_top = offset_top;
00458 frame->Xims = ims;
00459 frame->Xeptrb = eptrb;
00460 frame->Xflags = flags;
00461 frame->Xrdepth = rdepth;
00462 
00463 /* This is where control jumps back to to effect "recursion" */
00464 
00465 HEAP_RECURSE:
00466 
00467 /* Macros make the argument variables come from the current frame */
00468 
00469 #define eptr               frame->Xeptr
00470 #define ecode              frame->Xecode
00471 #define mstart             frame->Xmstart
00472 #define offset_top         frame->Xoffset_top
00473 #define ims                frame->Xims
00474 #define eptrb              frame->Xeptrb
00475 #define flags              frame->Xflags
00476 #define rdepth             frame->Xrdepth
00477 
00478 /* Ditto for the local variables */
00479 
00480 #ifdef SUPPORT_UTF8
00481 #define charptr            frame->Xcharptr
00482 #endif
00483 #define callpat            frame->Xcallpat
00484 #define data               frame->Xdata
00485 #define next               frame->Xnext
00486 #define pp                 frame->Xpp
00487 #define prev               frame->Xprev
00488 #define saved_eptr         frame->Xsaved_eptr
00489 
00490 #define new_recursive      frame->Xnew_recursive
00491 
00492 #define cur_is_word        frame->Xcur_is_word
00493 #define condition          frame->Xcondition
00494 #define prev_is_word       frame->Xprev_is_word
00495 
00496 #define original_ims       frame->Xoriginal_ims
00497 
00498 #ifdef SUPPORT_UCP
00499 #define prop_type          frame->Xprop_type
00500 #define prop_value         frame->Xprop_value
00501 #define prop_fail_result   frame->Xprop_fail_result
00502 #define prop_category      frame->Xprop_category
00503 #define prop_chartype      frame->Xprop_chartype
00504 #define prop_script        frame->Xprop_script
00505 #define oclength           frame->Xoclength
00506 #define occhars            frame->Xocchars
00507 #endif
00508 
00509 #define ctype              frame->Xctype
00510 #define fc                 frame->Xfc
00511 #define fi                 frame->Xfi
00512 #define length             frame->Xlength
00513 #define max                frame->Xmax
00514 #define min                frame->Xmin
00515 #define number             frame->Xnumber
00516 #define offset             frame->Xoffset
00517 #define op                 frame->Xop
00518 #define save_capture_last  frame->Xsave_capture_last
00519 #define save_offset1       frame->Xsave_offset1
00520 #define save_offset2       frame->Xsave_offset2
00521 #define save_offset3       frame->Xsave_offset3
00522 #define stacksave          frame->Xstacksave
00523 
00524 #define newptrb            frame->Xnewptrb
00525 
00526 /* When recursion is being used, local variables are allocated on the stack and
00527 get preserved during recursion in the normal way. In this environment, fi and
00528 i, and fc and c, can be the same variables. */
00529 
00530 #else         /* NO_RECURSE not defined */
00531 #define fi i
00532 #define fc c
00533 
00534 
00535 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
00536 const uschar *charptr;             /* in small blocks of the code. My normal */
00537 #endif                             /* style of coding would have declared    */
00538 const uschar *callpat;             /* them within each of those blocks.      */
00539 const uschar *data;                /* However, in order to accommodate the   */
00540 const uschar *next;                /* version of this code that uses an      */
00541 USPTR         pp;                  /* external "stack" implemented on the    */
00542 const uschar *prev;                /* heap, it is easier to declare them all */
00543 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
00544                                    /* out in a block. The only declarations  */
00545 recursion_info new_recursive;      /* within blocks below are for variables  */
00546                                    /* that do not have to be preserved over  */
00547 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
00548 BOOL condition;
00549 BOOL prev_is_word;
00550 
00551 unsigned long int original_ims;
00552 
00553 #ifdef SUPPORT_UCP
00554 int prop_type;
00555 int prop_value;
00556 int prop_fail_result;
00557 int prop_category;
00558 int prop_chartype;
00559 int prop_script;
00560 int oclength;
00561 uschar occhars[8];
00562 #endif
00563 
00564 int ctype;
00565 int length;
00566 int max;
00567 int min;
00568 int number;
00569 int offset;
00570 int op;
00571 int save_capture_last;
00572 int save_offset1, save_offset2, save_offset3;
00573 int stacksave[REC_STACK_SAVE_MAX];
00574 
00575 eptrblock newptrb;
00576 #endif     /* NO_RECURSE */
00577 
00578 /* These statements are here to stop the compiler complaining about unitialized
00579 variables. */
00580 
00581 #ifdef SUPPORT_UCP
00582 prop_value = 0;
00583 prop_fail_result = 0;
00584 #endif
00585 
00586 
00587 /* This label is used for tail recursion, which is used in a few cases even
00588 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
00589 used. Thanks to Ian Taylor for noticing this possibility and sending the
00590 original patch. */
00591 
00592 TAIL_RECURSE:
00593 
00594 /* OK, now we can get on with the real code of the function. Recursive calls
00595 are specified by the macro RMATCH and RRETURN is used to return. When
00596 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
00597 and a "return", respectively (possibly with some debugging if DEBUG is
00598 defined). However, RMATCH isn't like a function call because it's quite a
00599 complicated macro. It has to be used in one particular way. This shouldn't,
00600 however, impact performance when true recursion is being used. */
00601 
00602 #ifdef SUPPORT_UTF8
00603 utf8 = md->utf8;       /* Local copy of the flag */
00604 #else
00605 utf8 = FALSE;
00606 #endif
00607 
00608 /* First check that we haven't called match() too many times, or that we
00609 haven't exceeded the recursive call limit. */
00610 
00611 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
00612 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
00613 
00614 original_ims = ims;    /* Save for resetting on ')' */
00615 
00616 /* At the start of a group with an unlimited repeat that may match an empty
00617 string, the match_cbegroup flag is set. When this is the case, add the current
00618 subject pointer to the chain of such remembered pointers, to be checked when we
00619 hit the closing ket, in order to break infinite loops that match no characters.
00620 When match() is called in other circumstances, don't add to the chain. The
00621 match_cbegroup flag must NOT be used with tail recursion, because the memory
00622 block that is used is on the stack, so a new one may be required for each
00623 match(). */
00624 
00625 if ((flags & match_cbegroup) != 0)
00626   {
00627   newptrb.epb_saved_eptr = eptr;
00628   newptrb.epb_prev = eptrb;
00629   eptrb = &newptrb;
00630   }
00631 
00632 /* Now start processing the opcodes. */
00633 
00634 for (;;)
00635   {
00636   minimize = possessive = FALSE;
00637   op = *ecode;
00638 
00639   /* For partial matching, remember if we ever hit the end of the subject after
00640   matching at least one subject character. */
00641 
00642   if (md->partial &&
00643       eptr >= md->end_subject &&
00644       eptr > mstart)
00645     md->hitend = TRUE;
00646 
00647   switch(op)
00648     {
00649     case OP_FAIL:
00650     RRETURN(MATCH_NOMATCH);
00651 
00652     case OP_PRUNE:
00653     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00654       ims, eptrb, flags, RM51);
00655     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00656     RRETURN(MATCH_PRUNE);
00657 
00658     case OP_COMMIT:
00659     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00660       ims, eptrb, flags, RM52);
00661     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00662     RRETURN(MATCH_COMMIT);
00663 
00664     case OP_SKIP:
00665     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00666       ims, eptrb, flags, RM53);
00667     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00668     md->start_match_ptr = eptr;   /* Pass back current position */
00669     RRETURN(MATCH_SKIP);
00670 
00671     case OP_THEN:
00672     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00673       ims, eptrb, flags, RM54);
00674     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00675     RRETURN(MATCH_THEN);
00676 
00677     /* Handle a capturing bracket. If there is space in the offset vector, save
00678     the current subject position in the working slot at the top of the vector.
00679     We mustn't change the current values of the data slot, because they may be
00680     set from a previous iteration of this group, and be referred to by a
00681     reference inside the group.
00682 
00683     If the bracket fails to match, we need to restore this value and also the
00684     values of the final offsets, in case they were set by a previous iteration
00685     of the same bracket.
00686 
00687     If there isn't enough space in the offset vector, treat this as if it were
00688     a non-capturing bracket. Don't worry about setting the flag for the error
00689     case here; that is handled in the code for KET. */
00690 
00691     case OP_CBRA:
00692     case OP_SCBRA:
00693     number = GET2(ecode, 1+LINK_SIZE);
00694     offset = number << 1;
00695 
00696 #ifdef DEBUG
00697     printf("start bracket %d\n", number);
00698     printf("subject=");
00699     pchars(eptr, 16, TRUE, md);
00700     printf("\n");
00701 #endif
00702 
00703     if (offset < md->offset_max)
00704       {
00705       save_offset1 = md->offset_vector[offset];
00706       save_offset2 = md->offset_vector[offset+1];
00707       save_offset3 = md->offset_vector[md->offset_end - number];
00708       save_capture_last = md->capture_last;
00709 
00710       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
00711       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
00712 
00713       flags = (op == OP_SCBRA)? match_cbegroup : 0;
00714       do
00715         {
00716         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00717           ims, eptrb, flags, RM1);
00718         if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00719         md->capture_last = save_capture_last;
00720         ecode += GET(ecode, 1);
00721         }
00722       while (*ecode == OP_ALT);
00723 
00724       DPRINTF(("bracket %d failed\n", number));
00725 
00726       md->offset_vector[offset] = save_offset1;
00727       md->offset_vector[offset+1] = save_offset2;
00728       md->offset_vector[md->offset_end - number] = save_offset3;
00729 
00730       RRETURN(MATCH_NOMATCH);
00731       }
00732 
00733     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
00734     as a non-capturing bracket. */
00735 
00736     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00737     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00738 
00739     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
00740 
00741     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00742     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
00743 
00744     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
00745     final alternative within the brackets, we would return the result of a
00746     recursive call to match() whatever happened. We can reduce stack usage by
00747     turning this into a tail recursion, except in the case when match_cbegroup
00748     is set.*/
00749 
00750     case OP_BRA:
00751     case OP_SBRA:
00752     DPRINTF(("start non-capturing bracket\n"));
00753     flags = (op >= OP_SBRA)? match_cbegroup : 0;
00754     for (;;)
00755       {
00756       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
00757         {
00758         if (flags == 0)    /* Not a possibly empty group */
00759           {
00760           ecode += _pcre_OP_lengths[*ecode];
00761           DPRINTF(("bracket 0 tail recursion\n"));
00762           goto TAIL_RECURSE;
00763           }
00764 
00765         /* Possibly empty group; can't use tail recursion. */
00766 
00767         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
00768           eptrb, flags, RM48);
00769         RRETURN(rrc);
00770         }
00771 
00772       /* For non-final alternatives, continue the loop for a NOMATCH result;
00773       otherwise return. */
00774 
00775       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
00776         eptrb, flags, RM2);
00777       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00778       ecode += GET(ecode, 1);
00779       }
00780     /* Control never reaches here. */
00781 
00782     /* Conditional group: compilation checked that there are no more than
00783     two branches. If the condition is false, skipping the first branch takes us
00784     past the end if there is only one branch, but that's OK because that is
00785     exactly what going to the ket would do. As there is only one branch to be
00786     obeyed, we can use tail recursion to avoid using another stack frame. */
00787 
00788     case OP_COND:
00789     case OP_SCOND:
00790     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
00791       {
00792       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
00793       condition = md->recursive != NULL &&
00794         (offset == RREF_ANY || offset == md->recursive->group_num);
00795       ecode += condition? 3 : GET(ecode, 1);
00796       }
00797 
00798     else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
00799       {
00800       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
00801       condition = offset < offset_top && md->offset_vector[offset] >= 0;
00802       ecode += condition? 3 : GET(ecode, 1);
00803       }
00804 
00805     else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
00806       {
00807       condition = FALSE;
00808       ecode += GET(ecode, 1);
00809       }
00810 
00811     /* The condition is an assertion. Call match() to evaluate it - setting
00812     the final argument match_condassert causes it to stop at the end of an
00813     assertion. */
00814 
00815     else
00816       {
00817       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
00818           match_condassert, RM3);
00819       if (rrc == MATCH_MATCH)
00820         {
00821         condition = TRUE;
00822         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
00823         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
00824         }
00825       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
00826         {
00827         RRETURN(rrc);         /* Need braces because of following else */
00828         }
00829       else
00830         {
00831         condition = FALSE;
00832         ecode += GET(ecode, 1);
00833         }
00834       }
00835 
00836     /* We are now at the branch that is to be obeyed. As there is only one,
00837     we can use tail recursion to avoid using another stack frame, except when
00838     match_cbegroup is required for an unlimited repeat of a possibly empty
00839     group. If the second alternative doesn't exist, we can just plough on. */
00840 
00841     if (condition || *ecode == OP_ALT)
00842       {
00843       ecode += 1 + LINK_SIZE;
00844       if (op == OP_SCOND)        /* Possibly empty group */
00845         {
00846         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
00847         RRETURN(rrc);
00848         }
00849       else                       /* Group must match something */
00850         {
00851         flags = 0;
00852         goto TAIL_RECURSE;
00853         }
00854       }
00855     else                         /* Condition false & no 2nd alternative */
00856       {
00857       ecode += 1 + LINK_SIZE;
00858       }
00859     break;
00860 
00861 
00862     /* End of the pattern, either real or forced. If we are in a top-level
00863     recursion, we should restore the offsets appropriately and continue from
00864     after the call. */
00865 
00866     case OP_ACCEPT:
00867     case OP_END:
00868     if (md->recursive != NULL && md->recursive->group_num == 0)
00869       {
00870       recursion_info *rec = md->recursive;
00871       DPRINTF(("End of pattern in a (?0) recursion\n"));
00872       md->recursive = rec->prevrec;
00873       memmove(md->offset_vector, rec->offset_save,
00874         rec->saved_max * sizeof(int));
00875       mstart = rec->save_start;
00876       ims = original_ims;
00877       ecode = rec->after_call;
00878       break;
00879       }
00880 
00881     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
00882     string - backtracking will then try other alternatives, if any. */
00883 
00884     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
00885     md->end_match_ptr = eptr;           /* Record where we ended */
00886     md->end_offset_top = offset_top;    /* and how many extracts were taken */
00887     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
00888     RRETURN(MATCH_MATCH);
00889 
00890     /* Change option settings */
00891 
00892     case OP_OPT:
00893     ims = ecode[1];
00894     ecode += 2;
00895     DPRINTF(("ims set to %02lx\n", ims));
00896     break;
00897 
00898     /* Assertion brackets. Check the alternative branches in turn - the
00899     matching won't pass the KET for an assertion. If any one branch matches,
00900     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
00901     start of each branch to move the current point backwards, so the code at
00902     this level is identical to the lookahead case. */
00903 
00904     case OP_ASSERT:
00905     case OP_ASSERTBACK:
00906     do
00907       {
00908       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
00909         RM4);
00910       if (rrc == MATCH_MATCH) break;
00911       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00912       ecode += GET(ecode, 1);
00913       }
00914     while (*ecode == OP_ALT);
00915     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
00916 
00917     /* If checking an assertion for a condition, return MATCH_MATCH. */
00918 
00919     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
00920 
00921     /* Continue from after the assertion, updating the offsets high water
00922     mark, since extracts may have been taken during the assertion. */
00923 
00924     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
00925     ecode += 1 + LINK_SIZE;
00926     offset_top = md->end_offset_top;
00927     continue;
00928 
00929     /* Negative assertion: all branches must fail to match */
00930 
00931     case OP_ASSERT_NOT:
00932     case OP_ASSERTBACK_NOT:
00933     do
00934       {
00935       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
00936         RM5);
00937       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
00938       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00939       ecode += GET(ecode,1);
00940       }
00941     while (*ecode == OP_ALT);
00942 
00943     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
00944 
00945     ecode += 1 + LINK_SIZE;
00946     continue;
00947 
00948     /* Move the subject pointer back. This occurs only at the start of
00949     each branch of a lookbehind assertion. If we are too close to the start to
00950     move back, this match function fails. When working with UTF-8 we move
00951     back a number of characters, not bytes. */
00952 
00953     case OP_REVERSE:
00954 #ifdef SUPPORT_UTF8
00955     if (utf8)
00956       {
00957       i = GET(ecode, 1);
00958       while (i-- > 0)
00959         {
00960         eptr--;
00961         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
00962         BACKCHAR(eptr);
00963         }
00964       }
00965     else
00966 #endif
00967 
00968     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
00969 
00970       {
00971       eptr -= GET(ecode, 1);
00972       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
00973       }
00974 
00975     /* Skip to next op code */
00976 
00977     ecode += 1 + LINK_SIZE;
00978     break;
00979 
00980     /* The callout item calls an external function, if one is provided, passing
00981     details of the match so far. This is mainly for debugging, though the
00982     function is able to force a failure. */
00983 
00984     case OP_CALLOUT:
00985     if (pcre_callout != NULL)
00986       {
00987       pcre_callout_block cb;
00988       cb.version          = 1;   /* Version 1 of the callout block */
00989       cb.callout_number   = ecode[1];
00990       cb.offset_vector    = md->offset_vector;
00991       cb.subject          = (PCRE_SPTR)md->start_subject;
00992       cb.subject_length   = md->end_subject - md->start_subject;
00993       cb.start_match      = mstart - md->start_subject;
00994       cb.current_position = eptr - md->start_subject;
00995       cb.pattern_position = GET(ecode, 2);
00996       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
00997       cb.capture_top      = offset_top/2;
00998       cb.capture_last     = md->capture_last;
00999       cb.callout_data     = md->callout_data;
01000       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
01001       if (rrc < 0) RRETURN(rrc);
01002       }
01003     ecode += 2 + 2*LINK_SIZE;
01004     break;
01005 
01006     /* Recursion either matches the current regex, or some subexpression. The
01007     offset data is the offset to the starting bracket from the start of the
01008     whole pattern. (This is so that it works from duplicated subpatterns.)
01009 
01010     If there are any capturing brackets started but not finished, we have to
01011     save their starting points and reinstate them after the recursion. However,
01012     we don't know how many such there are (offset_top records the completed
01013     total) so we just have to save all the potential data. There may be up to
01014     65535 such values, which is too large to put on the stack, but using malloc
01015     for small numbers seems expensive. As a compromise, the stack is used when
01016     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
01017     is used. A problem is what to do if the malloc fails ... there is no way of
01018     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
01019     values on the stack, and accept that the rest may be wrong.
01020 
01021     There are also other values that have to be saved. We use a chained
01022     sequence of blocks that actually live on the stack. Thanks to Robin Houston
01023     for the original version of this logic. */
01024 
01025     case OP_RECURSE:
01026       {
01027       callpat = md->start_code + GET(ecode, 1);
01028       new_recursive.group_num = (callpat == md->start_code)? 0 :
01029         GET2(callpat, 1 + LINK_SIZE);
01030 
01031       /* Add to "recursing stack" */
01032 
01033       new_recursive.prevrec = md->recursive;
01034       md->recursive = &new_recursive;
01035 
01036       /* Find where to continue from afterwards */
01037 
01038       ecode += 1 + LINK_SIZE;
01039       new_recursive.after_call = ecode;
01040 
01041       /* Now save the offset data. */
01042 
01043       new_recursive.saved_max = md->offset_end;
01044       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
01045         new_recursive.offset_save = stacksave;
01046       else
01047         {
01048         new_recursive.offset_save =
01049           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
01050         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
01051         }
01052 
01053       memcpy(new_recursive.offset_save, md->offset_vector,
01054             new_recursive.saved_max * sizeof(int));
01055       new_recursive.save_start = mstart;
01056       mstart = eptr;
01057 
01058       /* OK, now we can do the recursion. For each top-level alternative we
01059       restore the offset and recursion data. */
01060 
01061       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
01062       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
01063       do
01064         {
01065         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
01066           md, ims, eptrb, flags, RM6);
01067         if (rrc == MATCH_MATCH)
01068           {
01069           DPRINTF(("Recursion matched\n"));
01070           md->recursive = new_recursive.prevrec;
01071           if (new_recursive.offset_save != stacksave)
01072             (pcre_free)(new_recursive.offset_save);
01073           RRETURN(MATCH_MATCH);
01074           }
01075         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
01076           {
01077           DPRINTF(("Recursion gave error %d\n", rrc));
01078           RRETURN(rrc);
01079           }
01080 
01081         md->recursive = &new_recursive;
01082         memcpy(md->offset_vector, new_recursive.offset_save,
01083             new_recursive.saved_max * sizeof(int));
01084         callpat += GET(callpat, 1);
01085         }
01086       while (*callpat == OP_ALT);
01087 
01088       DPRINTF(("Recursion didn't match\n"));
01089       md->recursive = new_recursive.prevrec;
01090       if (new_recursive.offset_save != stacksave)
01091         (pcre_free)(new_recursive.offset_save);
01092       RRETURN(MATCH_NOMATCH);
01093       }
01094     /* Control never reaches here */
01095 
01096     /* "Once" brackets are like assertion brackets except that after a match,
01097     the point in the subject string is not moved back. Thus there can never be
01098     a move back into the brackets. Friedl calls these "atomic" subpatterns.
01099     Check the alternative branches in turn - the matching won't pass the KET
01100     for this kind of subpattern. If any one branch matches, we carry on as at
01101     the end of a normal bracket, leaving the subject pointer. */
01102 
01103     case OP_ONCE:
01104     prev = ecode;
01105     saved_eptr = eptr;
01106 
01107     do
01108       {
01109       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
01110       if (rrc == MATCH_MATCH) break;
01111       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
01112       ecode += GET(ecode,1);
01113       }
01114     while (*ecode == OP_ALT);
01115 
01116     /* If hit the end of the group (which could be repeated), fail */
01117 
01118     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
01119 
01120     /* Continue as from after the assertion, updating the offsets high water
01121     mark, since extracts may have been taken. */
01122 
01123     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
01124 
01125     offset_top = md->end_offset_top;
01126     eptr = md->end_match_ptr;
01127 
01128     /* For a non-repeating ket, just continue at this level. This also
01129     happens for a repeating ket if no characters were matched in the group.
01130     This is the forcible breaking of infinite loops as implemented in Perl
01131     5.005. If there is an options reset, it will get obeyed in the normal
01132     course of events. */
01133 
01134     if (*ecode == OP_KET || eptr == saved_eptr)
01135       {
01136       ecode += 1+LINK_SIZE;
01137       break;
01138       }
01139 
01140     /* The repeating kets try the rest of the pattern or restart from the
01141     preceding bracket, in the appropriate order. The second "call" of match()
01142     uses tail recursion, to avoid using another stack frame. We need to reset
01143     any options that changed within the bracket before re-running it, so
01144     check the next opcode. */
01145 
01146     if (ecode[1+LINK_SIZE] == OP_OPT)
01147       {
01148       ims = (ims & ~PCRE_IMS) | ecode[4];
01149       DPRINTF(("ims set to %02lx at group repeat\n", ims));
01150       }
01151 
01152     if (*ecode == OP_KETRMIN)
01153       {
01154       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
01155       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01156       ecode = prev;
01157       flags = 0;
01158       goto TAIL_RECURSE;
01159       }
01160     else  /* OP_KETRMAX */
01161       {
01162       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
01163       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01164       ecode += 1 + LINK_SIZE;
01165       flags = 0;
01166       goto TAIL_RECURSE;
01167       }
01168     /* Control never gets here */
01169 
01170     /* An alternation is the end of a branch; scan along to find the end of the
01171     bracketed group and go to there. */
01172 
01173     case OP_ALT:
01174     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
01175     break;
01176 
01177     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
01178     indicating that it may occur zero times. It may repeat infinitely, or not
01179     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
01180     with fixed upper repeat limits are compiled as a number of copies, with the
01181     optional ones preceded by BRAZERO or BRAMINZERO. */
01182 
01183     case OP_BRAZERO:
01184       {
01185       next = ecode+1;
01186       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
01187       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01188       do next += GET(next,1); while (*next == OP_ALT);
01189       ecode = next + 1 + LINK_SIZE;
01190       }
01191     break;
01192 
01193     case OP_BRAMINZERO:
01194       {
01195       next = ecode+1;
01196       do next += GET(next, 1); while (*next == OP_ALT);
01197       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
01198       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01199       ecode++;
01200       }
01201     break;
01202 
01203     case OP_SKIPZERO:
01204       {
01205       next = ecode+1;
01206       do next += GET(next,1); while (*next == OP_ALT);
01207       ecode = next + 1 + LINK_SIZE;
01208       }
01209     break;
01210 
01211     /* End of a group, repeated or non-repeating. */
01212 
01213     case OP_KET:
01214     case OP_KETRMIN:
01215     case OP_KETRMAX:
01216     prev = ecode - GET(ecode, 1);
01217 
01218     /* If this was a group that remembered the subject start, in order to break
01219     infinite repeats of empty string matches, retrieve the subject start from
01220     the chain. Otherwise, set it NULL. */
01221 
01222     if (*prev >= OP_SBRA)
01223       {
01224       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
01225       eptrb = eptrb->epb_prev;              /* Backup to previous group */
01226       }
01227     else saved_eptr = NULL;
01228 
01229     /* If we are at the end of an assertion group, stop matching and return
01230     MATCH_MATCH, but record the current high water mark for use by positive
01231     assertions. Do this also for the "once" (atomic) groups. */
01232 
01233     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
01234         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
01235         *prev == OP_ONCE)
01236       {
01237       md->end_match_ptr = eptr;      /* For ONCE */
01238       md->end_offset_top = offset_top;
01239       RRETURN(MATCH_MATCH);
01240       }
01241 
01242     /* For capturing groups we have to check the group number back at the start
01243     and if necessary complete handling an extraction by setting the offsets and
01244     bumping the high water mark. Note that whole-pattern recursion is coded as
01245     a recurse into group 0, so it won't be picked up here. Instead, we catch it
01246     when the OP_END is reached. Other recursion is handled here. */
01247 
01248     if (*prev == OP_CBRA || *prev == OP_SCBRA)
01249       {
01250       number = GET2(prev, 1+LINK_SIZE);
01251       offset = number << 1;
01252 
01253 #ifdef DEBUG
01254       printf("end bracket %d", number);
01255       printf("\n");
01256 #endif
01257 
01258       md->capture_last = number;
01259       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
01260         {
01261         md->offset_vector[offset] =
01262           md->offset_vector[md->offset_end - number];
01263         md->offset_vector[offset+1] = eptr - md->start_subject;
01264         if (offset_top <= offset) offset_top = offset + 2;
01265         }
01266 
01267       /* Handle a recursively called group. Restore the offsets
01268       appropriately and continue from after the call. */
01269 
01270       if (md->recursive != NULL && md->recursive->group_num == number)
01271         {
01272         recursion_info *rec = md->recursive;
01273         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
01274         md->recursive = rec->prevrec;
01275         mstart = rec->save_start;
01276         memcpy(md->offset_vector, rec->offset_save,
01277           rec->saved_max * sizeof(int));
01278         ecode = rec->after_call;
01279         ims = original_ims;
01280         break;
01281         }
01282       }
01283 
01284     /* For both capturing and non-capturing groups, reset the value of the ims
01285     flags, in case they got changed during the group. */
01286 
01287     ims = original_ims;
01288     DPRINTF(("ims reset to %02lx\n", ims));
01289 
01290     /* For a non-repeating ket, just continue at this level. This also
01291     happens for a repeating ket if no characters were matched in the group.
01292     This is the forcible breaking of infinite loops as implemented in Perl
01293     5.005. If there is an options reset, it will get obeyed in the normal
01294     course of events. */
01295 
01296     if (*ecode == OP_KET || eptr == saved_eptr)
01297       {
01298       ecode += 1 + LINK_SIZE;
01299       break;
01300       }
01301 
01302     /* The repeating kets try the rest of the pattern or restart from the
01303     preceding bracket, in the appropriate order. In the second case, we can use
01304     tail recursion to avoid using another stack frame, unless we have an
01305     unlimited repeat of a group that can match an empty string. */
01306 
01307     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
01308 
01309     if (*ecode == OP_KETRMIN)
01310       {
01311       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
01312       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01313       if (flags != 0)    /* Could match an empty string */
01314         {
01315         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
01316         RRETURN(rrc);
01317         }
01318       ecode = prev;
01319       goto TAIL_RECURSE;
01320       }
01321     else  /* OP_KETRMAX */
01322       {
01323       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
01324       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01325       ecode += 1 + LINK_SIZE;
01326       flags = 0;
01327       goto TAIL_RECURSE;
01328       }
01329     /* Control never gets here */
01330 
01331     /* Start of subject unless notbol, or after internal newline if multiline */
01332 
01333     case OP_CIRC:
01334     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
01335     if ((ims & PCRE_MULTILINE) != 0)
01336       {
01337       if (eptr != md->start_subject &&
01338           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
01339         RRETURN(MATCH_NOMATCH);
01340       ecode++;
01341       break;
01342       }
01343     /* ... else fall through */
01344 
01345     /* Start of subject assertion */
01346 
01347     case OP_SOD:
01348     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
01349     ecode++;
01350     break;
01351 
01352     /* Start of match assertion */
01353 
01354     case OP_SOM:
01355     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
01356     ecode++;
01357     break;
01358 
01359     /* Reset the start of match point */
01360 
01361     case OP_SET_SOM:
01362     mstart = eptr;
01363     ecode++;
01364     break;
01365 
01366     /* Assert before internal newline if multiline, or before a terminating
01367     newline unless endonly is set, else end of subject unless noteol is set. */
01368 
01369     case OP_DOLL:
01370     if ((ims & PCRE_MULTILINE) != 0)
01371       {
01372       if (eptr < md->end_subject)
01373         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
01374       else
01375         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
01376       ecode++;
01377       break;
01378       }
01379     else
01380       {
01381       if (md->noteol) RRETURN(MATCH_NOMATCH);
01382       if (!md->endonly)
01383         {
01384         if (eptr != md->end_subject &&
01385             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
01386           RRETURN(MATCH_NOMATCH);
01387         ecode++;
01388         break;
01389         }
01390       }
01391     /* ... else fall through for endonly */
01392 
01393     /* End of subject assertion (\z) */
01394 
01395     case OP_EOD:
01396     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
01397     ecode++;
01398     break;
01399 
01400     /* End of subject or ending \n assertion (\Z) */
01401 
01402     case OP_EODN:
01403     if (eptr != md->end_subject &&
01404         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
01405       RRETURN(MATCH_NOMATCH);
01406     ecode++;
01407     break;
01408 
01409     /* Word boundary assertions */
01410 
01411     case OP_NOT_WORD_BOUNDARY:
01412     case OP_WORD_BOUNDARY:
01413       {
01414 
01415       /* Find out if the previous and current characters are "word" characters.
01416       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
01417       be "non-word" characters. */
01418 
01419 #ifdef SUPPORT_UTF8
01420       if (utf8)
01421         {
01422         if (eptr == md->start_subject) prev_is_word = FALSE; else
01423           {
01424           const uschar *lastptr = eptr - 1;
01425           while((*lastptr & 0xc0) == 0x80) lastptr--;
01426           GETCHAR(c, lastptr);
01427           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
01428           }
01429         if (eptr >= md->end_subject) cur_is_word = FALSE; else
01430           {
01431           GETCHAR(c, eptr);
01432           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
01433           }
01434         }
01435       else
01436 #endif
01437 
01438       /* More streamlined when not in UTF-8 mode */
01439 
01440         {
01441         prev_is_word = (eptr != md->start_subject) &&
01442           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
01443         cur_is_word = (eptr < md->end_subject) &&
01444           ((md->ctypes[*eptr] & ctype_word) != 0);
01445         }
01446 
01447       /* Now see if the situation is what we want */
01448 
01449       if ((*ecode++ == OP_WORD_BOUNDARY)?
01450            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
01451         RRETURN(MATCH_NOMATCH);
01452       }
01453     break;
01454 
01455     /* Match a single character type; inline for speed */
01456 
01457     case OP_ANY:
01458     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
01459     /* Fall through */
01460 
01461     case OP_ALLANY:
01462     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
01463     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
01464     ecode++;
01465     break;
01466 
01467     /* Match a single byte, even in UTF-8 mode. This opcode really does match
01468     any byte, even newline, independent of the setting of PCRE_DOTALL. */
01469 
01470     case OP_ANYBYTE:
01471     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
01472     ecode++;
01473     break;
01474 
01475     case OP_NOT_DIGIT:
01476     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01477     GETCHARINCTEST(c, eptr);
01478     if (
01479 #ifdef SUPPORT_UTF8
01480        c < 256 &&
01481 #endif
01482        (md->ctypes[c] & ctype_digit) != 0
01483        )
01484       RRETURN(MATCH_NOMATCH);
01485     ecode++;
01486     break;
01487 
01488     case OP_DIGIT:
01489     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01490     GETCHARINCTEST(c, eptr);
01491     if (
01492 #ifdef SUPPORT_UTF8
01493        c >= 256 ||
01494 #endif
01495        (md->ctypes[c] & ctype_digit) == 0
01496        )
01497       RRETURN(MATCH_NOMATCH);
01498     ecode++;
01499     break;
01500 
01501     case OP_NOT_WHITESPACE:
01502     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01503     GETCHARINCTEST(c, eptr);
01504     if (
01505 #ifdef SUPPORT_UTF8
01506        c < 256 &&
01507 #endif
01508        (md->ctypes[c] & ctype_space) != 0
01509        )
01510       RRETURN(MATCH_NOMATCH);
01511     ecode++;
01512     break;
01513 
01514     case OP_WHITESPACE:
01515     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01516     GETCHARINCTEST(c, eptr);
01517     if (
01518 #ifdef SUPPORT_UTF8
01519        c >= 256 ||
01520 #endif
01521        (md->ctypes[c] & ctype_space) == 0
01522        )
01523       RRETURN(MATCH_NOMATCH);
01524     ecode++;
01525     break;
01526 
01527     case OP_NOT_WORDCHAR:
01528     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01529     GETCHARINCTEST(c, eptr);
01530     if (
01531 #ifdef SUPPORT_UTF8
01532        c < 256 &&
01533 #endif
01534        (md->ctypes[c] & ctype_word) != 0
01535        )
01536       RRETURN(MATCH_NOMATCH);
01537     ecode++;
01538     break;
01539 
01540     case OP_WORDCHAR:
01541     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01542     GETCHARINCTEST(c, eptr);
01543     if (
01544 #ifdef SUPPORT_UTF8
01545        c >= 256 ||
01546 #endif
01547        (md->ctypes[c] & ctype_word) == 0
01548        )
01549       RRETURN(MATCH_NOMATCH);
01550     ecode++;
01551     break;
01552 
01553     case OP_ANYNL:
01554     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01555     GETCHARINCTEST(c, eptr);
01556     switch(c)
01557       {
01558       default: RRETURN(MATCH_NOMATCH);
01559       case 0x000d:
01560       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
01561       break;
01562 
01563       case 0x000a:
01564       break;
01565 
01566       case 0x000b:
01567       case 0x000c:
01568       case 0x0085:
01569       case 0x2028:
01570       case 0x2029:
01571       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
01572       break;
01573       }
01574     ecode++;
01575     break;
01576 
01577     case OP_NOT_HSPACE:
01578     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01579     GETCHARINCTEST(c, eptr);
01580     switch(c)
01581       {
01582       default: break;
01583       case 0x09:      /* HT */
01584       case 0x20:      /* SPACE */
01585       case 0xa0:      /* NBSP */
01586       case 0x1680:    /* OGHAM SPACE MARK */
01587       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
01588       case 0x2000:    /* EN QUAD */
01589       case 0x2001:    /* EM QUAD */
01590       case 0x2002:    /* EN SPACE */
01591       case 0x2003:    /* EM SPACE */
01592       case 0x2004:    /* THREE-PER-EM SPACE */
01593       case 0x2005:    /* FOUR-PER-EM SPACE */
01594       case 0x2006:    /* SIX-PER-EM SPACE */
01595       case 0x2007:    /* FIGURE SPACE */
01596       case 0x2008:    /* PUNCTUATION SPACE */
01597       case 0x2009:    /* THIN SPACE */
01598       case 0x200A:    /* HAIR SPACE */
01599       case 0x202f:    /* NARROW NO-BREAK SPACE */
01600       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
01601       case 0x3000:    /* IDEOGRAPHIC SPACE */
01602       RRETURN(MATCH_NOMATCH);
01603       }
01604     ecode++;
01605     break;
01606 
01607     case OP_HSPACE:
01608     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01609     GETCHARINCTEST(c, eptr);
01610     switch(c)
01611       {
01612       default: RRETURN(MATCH_NOMATCH);
01613       case 0x09:      /* HT */
01614       case 0x20:      /* SPACE */
01615       case 0xa0:      /* NBSP */
01616       case 0x1680:    /* OGHAM SPACE MARK */
01617       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
01618       case 0x2000:    /* EN QUAD */
01619       case 0x2001:    /* EM QUAD */
01620       case 0x2002:    /* EN SPACE */
01621       case 0x2003:    /* EM SPACE */
01622       case 0x2004:    /* THREE-PER-EM SPACE */
01623       case 0x2005:    /* FOUR-PER-EM SPACE */
01624       case 0x2006:    /* SIX-PER-EM SPACE */
01625       case 0x2007:    /* FIGURE SPACE */
01626       case 0x2008:    /* PUNCTUATION SPACE */
01627       case 0x2009:    /* THIN SPACE */
01628       case 0x200A:    /* HAIR SPACE */
01629       case 0x202f:    /* NARROW NO-BREAK SPACE */
01630       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
01631       case 0x3000:    /* IDEOGRAPHIC SPACE */
01632       break;
01633       }
01634     ecode++;
01635     break;
01636 
01637     case OP_NOT_VSPACE:
01638     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01639     GETCHARINCTEST(c, eptr);
01640     switch(c)
01641       {
01642       default: break;
01643       case 0x0a:      /* LF */
01644       case 0x0b:      /* VT */
01645       case 0x0c:      /* FF */
01646       case 0x0d:      /* CR */
01647       case 0x85:      /* NEL */
01648       case 0x2028:    /* LINE SEPARATOR */
01649       case 0x2029:    /* PARAGRAPH SEPARATOR */
01650       RRETURN(MATCH_NOMATCH);
01651       }
01652     ecode++;
01653     break;
01654 
01655     case OP_VSPACE:
01656     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01657     GETCHARINCTEST(c, eptr);
01658     switch(c)
01659       {
01660       default: RRETURN(MATCH_NOMATCH);
01661       case 0x0a:      /* LF */
01662       case 0x0b:      /* VT */
01663       case 0x0c:      /* FF */
01664       case 0x0d:      /* CR */
01665       case 0x85:      /* NEL */
01666       case 0x2028:    /* LINE SEPARATOR */
01667       case 0x2029:    /* PARAGRAPH SEPARATOR */
01668       break;
01669       }
01670     ecode++;
01671     break;
01672 
01673 #ifdef SUPPORT_UCP
01674     /* Check the next character by Unicode property. We will get here only
01675     if the support is in the binary; otherwise a compile-time error occurs. */
01676 
01677     case OP_PROP:
01678     case OP_NOTPROP:
01679     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01680     GETCHARINCTEST(c, eptr);
01681       {
01682       const ucd_record * prop = GET_UCD(c);
01683 
01684       switch(ecode[1])
01685         {
01686         case PT_ANY:
01687         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
01688         break;
01689 
01690         case PT_LAMP:
01691         if ((prop->chartype == ucp_Lu ||
01692              prop->chartype == ucp_Ll ||
01693              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
01694           RRETURN(MATCH_NOMATCH);
01695          break;
01696 
01697         case PT_GC:
01698         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
01699           RRETURN(MATCH_NOMATCH);
01700         break;
01701 
01702         case PT_PC:
01703         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
01704           RRETURN(MATCH_NOMATCH);
01705         break;
01706 
01707         case PT_SC:
01708         if ((ecode[2] != prop->script) == (op == OP_PROP))
01709           RRETURN(MATCH_NOMATCH);
01710         break;
01711 
01712         default:
01713         RRETURN(PCRE_ERROR_INTERNAL);
01714         }
01715 
01716       ecode += 3;
01717       }
01718     break;
01719 
01720     /* Match an extended Unicode sequence. We will get here only if the support
01721     is in the binary; otherwise a compile-time error occurs. */
01722 
01723     case OP_EXTUNI:
01724     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01725     GETCHARINCTEST(c, eptr);
01726       {
01727       int category = UCD_CATEGORY(c);
01728       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
01729       while (eptr < md->end_subject)
01730         {
01731         int len = 1;
01732         if (!utf8) c = *eptr; else
01733           {
01734           GETCHARLEN(c, eptr, len);
01735           }
01736         category = UCD_CATEGORY(c);
01737         if (category != ucp_M) break;
01738         eptr += len;
01739         }
01740       }
01741     ecode++;
01742     break;
01743 #endif
01744 
01745 
01746     /* Match a back reference, possibly repeatedly. Look past the end of the
01747     item to see if there is repeat information following. The code is similar
01748     to that for character classes, but repeated for efficiency. Then obey
01749     similar code to character type repeats - written out again for speed.
01750     However, if the referenced string is the empty string, always treat
01751     it as matched, any number of times (otherwise there could be infinite
01752     loops). */
01753 
01754     case OP_REF:
01755       {
01756       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
01757       ecode += 3;
01758 
01759       /* If the reference is unset, there are two possibilities:
01760 
01761       (a) In the default, Perl-compatible state, set the length to be longer
01762       than the amount of subject left; this ensures that every attempt at a
01763       match fails. We can't just fail here, because of the possibility of
01764       quantifiers with zero minima.
01765 
01766       (b) If the JavaScript compatibility flag is set, set the length to zero
01767       so that the back reference matches an empty string.
01768 
01769       Otherwise, set the length to the length of what was matched by the
01770       referenced subpattern. */
01771 
01772       if (offset >= offset_top || md->offset_vector[offset] < 0)
01773         length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
01774       else
01775         length = md->offset_vector[offset+1] - md->offset_vector[offset];
01776 
01777       /* Set up for repetition, or handle the non-repeated case */
01778 
01779       switch (*ecode)
01780         {
01781         case OP_CRSTAR:
01782         case OP_CRMINSTAR:
01783         case OP_CRPLUS:
01784         case OP_CRMINPLUS:
01785         case OP_CRQUERY:
01786         case OP_CRMINQUERY:
01787         c = *ecode++ - OP_CRSTAR;
01788         minimize = (c & 1) != 0;
01789         min = rep_min[c];                 /* Pick up values from tables; */
01790         max = rep_max[c];                 /* zero for max => infinity */
01791         if (max == 0) max = INT_MAX;
01792         break;
01793 
01794         case OP_CRRANGE:
01795         case OP_CRMINRANGE:
01796         minimize = (*ecode == OP_CRMINRANGE);
01797         min = GET2(ecode, 1);
01798         max = GET2(ecode, 3);
01799         if (max == 0) max = INT_MAX;
01800         ecode += 5;
01801         break;
01802 
01803         default:               /* No repeat follows */
01804         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
01805         eptr += length;
01806         continue;              /* With the main loop */
01807         }
01808 
01809       /* If the length of the reference is zero, just continue with the
01810       main loop. */
01811 
01812       if (length == 0) continue;
01813 
01814       /* First, ensure the minimum number of matches are present. We get back
01815       the length of the reference string explicitly rather than passing the
01816       address of eptr, so that eptr can be a register variable. */
01817 
01818       for (i = 1; i <= min; i++)
01819         {
01820         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
01821         eptr += length;
01822         }
01823 
01824       /* If min = max, continue at the same level without recursion.
01825       They are not both allowed to be zero. */
01826 
01827       if (min == max) continue;
01828 
01829       /* If minimizing, keep trying and advancing the pointer */
01830 
01831       if (minimize)
01832         {
01833         for (fi = min;; fi++)
01834           {
01835           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
01836           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01837           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
01838             RRETURN(MATCH_NOMATCH);
01839           eptr += length;
01840           }
01841         /* Control never gets here */
01842         }
01843 
01844       /* If maximizing, find the longest string and work backwards */
01845 
01846       else
01847         {
01848         pp = eptr;
01849         for (i = min; i < max; i++)
01850           {
01851           if (!match_ref(offset, eptr, length, md, ims)) break;
01852           eptr += length;
01853           }
01854         while (eptr >= pp)
01855           {
01856           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
01857           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01858           eptr -= length;
01859           }
01860         RRETURN(MATCH_NOMATCH);
01861         }
01862       }
01863     /* Control never gets here */
01864 
01865 
01866 
01867     /* Match a bit-mapped character class, possibly repeatedly. This op code is
01868     used when all the characters in the class have values in the range 0-255,
01869     and either the matching is caseful, or the characters are in the range
01870     0-127 when UTF-8 processing is enabled. The only difference between
01871     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
01872     encountered.
01873 
01874     First, look past the end of the item to see if there is repeat information
01875     following. Then obey similar code to character type repeats - written out
01876     again for speed. */
01877 
01878     case OP_NCLASS:
01879     case OP_CLASS:
01880       {
01881       data = ecode + 1;                /* Save for matching */
01882       ecode += 33;                     /* Advance past the item */
01883 
01884       switch (*ecode)
01885         {
01886         case OP_CRSTAR:
01887         case OP_CRMINSTAR:
01888         case OP_CRPLUS:
01889         case OP_CRMINPLUS:
01890         case OP_CRQUERY:
01891         case OP_CRMINQUERY:
01892         c = *ecode++ - OP_CRSTAR;
01893         minimize = (c & 1) != 0;
01894         min = rep_min[c];                 /* Pick up values from tables; */
01895         max = rep_max[c];                 /* zero for max => infinity */
01896         if (max == 0) max = INT_MAX;
01897         break;
01898 
01899         case OP_CRRANGE:
01900         case OP_CRMINRANGE:
01901         minimize = (*ecode == OP_CRMINRANGE);
01902         min = GET2(ecode, 1);
01903         max = GET2(ecode, 3);
01904         if (max == 0) max = INT_MAX;
01905         ecode += 5;
01906         break;
01907 
01908         default:               /* No repeat follows */
01909         min = max = 1;
01910         break;
01911         }
01912 
01913       /* First, ensure the minimum number of matches are present. */
01914 
01915 #ifdef SUPPORT_UTF8
01916       /* UTF-8 mode */
01917       if (utf8)
01918         {
01919         for (i = 1; i <= min; i++)
01920           {
01921           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01922           GETCHARINC(c, eptr);
01923           if (c > 255)
01924             {
01925             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
01926             }
01927           else
01928             {
01929             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
01930             }
01931           }
01932         }
01933       else
01934 #endif
01935       /* Not UTF-8 mode */
01936         {
01937         for (i = 1; i <= min; i++)
01938           {
01939           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01940           c = *eptr++;
01941           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
01942           }
01943         }
01944 
01945       /* If max == min we can continue with the main loop without the
01946       need to recurse. */
01947 
01948       if (min == max) continue;
01949 
01950       /* If minimizing, keep testing the rest of the expression and advancing
01951       the pointer while it matches the class. */
01952 
01953       if (minimize)
01954         {
01955 #ifdef SUPPORT_UTF8
01956         /* UTF-8 mode */
01957         if (utf8)
01958           {
01959           for (fi = min;; fi++)
01960             {
01961             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
01962             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01963             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01964             GETCHARINC(c, eptr);
01965             if (c > 255)
01966               {
01967               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
01968               }
01969             else
01970               {
01971               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
01972               }
01973             }
01974           }
01975         else
01976 #endif
01977         /* Not UTF-8 mode */
01978           {
01979           for (fi = min;; fi++)
01980             {
01981             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
01982             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01983             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01984             c = *eptr++;
01985             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
01986             }
01987           }
01988         /* Control never gets here */
01989         }
01990 
01991       /* If maximizing, find the longest possible run, then work backwards. */
01992 
01993       else
01994         {
01995         pp = eptr;
01996 
01997 #ifdef SUPPORT_UTF8
01998         /* UTF-8 mode */
01999         if (utf8)
02000           {
02001           for (i = min; i < max; i++)
02002             {
02003             int len = 1;
02004             if (eptr >= md->end_subject) break;
02005             GETCHARLEN(c, eptr, len);
02006             if (c > 255)
02007               {
02008               if (op == OP_CLASS) break;
02009               }
02010             else
02011               {
02012               if ((data[c/8] & (1 << (c&7))) == 0) break;
02013               }
02014             eptr += len;
02015             }
02016           for (;;)
02017             {
02018             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
02019             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02020             if (eptr-- == pp) break;        /* Stop if tried at original pos */
02021             BACKCHAR(eptr);
02022             }
02023           }
02024         else
02025 #endif
02026           /* Not UTF-8 mode */
02027           {
02028           for (i = min; i < max; i++)
02029             {
02030             if (eptr >= md->end_subject) break;
02031             c = *eptr;
02032             if ((data[c/8] & (1 << (c&7))) == 0) break;
02033             eptr++;
02034             }
02035           while (eptr >= pp)
02036             {
02037             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
02038             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02039             eptr--;
02040             }
02041           }
02042 
02043         RRETURN(MATCH_NOMATCH);
02044         }
02045       }
02046     /* Control never gets here */
02047 
02048 
02049     /* Match an extended character class. This opcode is encountered only
02050     in UTF-8 mode, because that's the only time it is compiled. */
02051 
02052 #ifdef SUPPORT_UTF8
02053     case OP_XCLASS:
02054       {
02055       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
02056       ecode += GET(ecode, 1);                      /* Advance past the item */
02057 
02058       switch (*ecode)
02059         {
02060         case OP_CRSTAR:
02061         case OP_CRMINSTAR:
02062         case OP_CRPLUS:
02063         case OP_CRMINPLUS:
02064         case OP_CRQUERY:
02065         case OP_CRMINQUERY:
02066         c = *ecode++ - OP_CRSTAR;
02067         minimize = (c & 1) != 0;
02068         min = rep_min[c];                 /* Pick up values from tables; */
02069         max = rep_max[c];                 /* zero for max => infinity */
02070         if (max == 0) max = INT_MAX;
02071         break;
02072 
02073         case OP_CRRANGE:
02074         case OP_CRMINRANGE:
02075         minimize = (*ecode == OP_CRMINRANGE);
02076         min = GET2(ecode, 1);
02077         max = GET2(ecode, 3);
02078         if (max == 0) max = INT_MAX;
02079         ecode += 5;
02080         break;
02081 
02082         default:               /* No repeat follows */
02083         min = max = 1;
02084         break;
02085         }
02086 
02087       /* First, ensure the minimum number of matches are present. */
02088 
02089       for (i = 1; i <= min; i++)
02090         {
02091         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02092         GETCHARINC(c, eptr);
02093         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
02094         }
02095 
02096       /* If max == min we can continue with the main loop without the
02097       need to recurse. */
02098 
02099       if (min == max) continue;
02100 
02101       /* If minimizing, keep testing the rest of the expression and advancing
02102       the pointer while it matches the class. */
02103 
02104       if (minimize)
02105         {
02106         for (fi = min;; fi++)
02107           {
02108           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
02109           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02110           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02111           GETCHARINC(c, eptr);
02112           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
02113           }
02114         /* Control never gets here */
02115         }
02116 
02117       /* If maximizing, find the longest possible run, then work backwards. */
02118 
02119       else
02120         {
02121         pp = eptr;
02122         for (i = min; i < max; i++)
02123           {
02124           int len = 1;
02125           if (eptr >= md->end_subject) break;
02126           GETCHARLEN(c, eptr, len);
02127           if (!_pcre_xclass(c, data)) break;
02128           eptr += len;
02129           }
02130         for(;;)
02131           {
02132           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
02133           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02134           if (eptr-- == pp) break;        /* Stop if tried at original pos */
02135           if (utf8) BACKCHAR(eptr);
02136           }
02137         RRETURN(MATCH_NOMATCH);
02138         }
02139 
02140       /* Control never gets here */
02141       }
02142 #endif    /* End of XCLASS */
02143 
02144     /* Match a single character, casefully */
02145 
02146     case OP_CHAR:
02147 #ifdef SUPPORT_UTF8
02148     if (utf8)
02149       {
02150       length = 1;
02151       ecode++;
02152       GETCHARLEN(fc, ecode, length);
02153       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02154       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
02155       }
02156     else
02157 #endif
02158 
02159     /* Non-UTF-8 mode */
02160       {
02161       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
02162       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
02163       ecode += 2;
02164       }
02165     break;
02166 
02167     /* Match a single character, caselessly */
02168 
02169     case OP_CHARNC:
02170 #ifdef SUPPORT_UTF8
02171     if (utf8)
02172       {
02173       length = 1;
02174       ecode++;
02175       GETCHARLEN(fc, ecode, length);
02176 
02177       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02178 
02179       /* If the pattern character's value is < 128, we have only one byte, and
02180       can use the fast lookup table. */
02181 
02182       if (fc < 128)
02183         {
02184         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02185         }
02186 
02187       /* Otherwise we must pick up the subject character */
02188 
02189       else
02190         {
02191         unsigned int dc;
02192         GETCHARINC(dc, eptr);
02193         ecode += length;
02194 
02195         /* If we have Unicode property support, we can use it to test the other
02196         case of the character, if there is one. */
02197 
02198         if (fc != dc)
02199           {
02200 #ifdef SUPPORT_UCP
02201           if (dc != UCD_OTHERCASE(fc))
02202 #endif
02203             RRETURN(MATCH_NOMATCH);
02204           }
02205         }
02206       }
02207     else
02208 #endif   /* SUPPORT_UTF8 */
02209 
02210     /* Non-UTF-8 mode */
02211       {
02212       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
02213       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02214       ecode += 2;
02215       }
02216     break;
02217 
02218     /* Match a single character repeatedly. */
02219 
02220     case OP_EXACT:
02221     min = max = GET2(ecode, 1);
02222     ecode += 3;
02223     goto REPEATCHAR;
02224 
02225     case OP_POSUPTO:
02226     possessive = TRUE;
02227     /* Fall through */
02228 
02229     case OP_UPTO:
02230     case OP_MINUPTO:
02231     min = 0;
02232     max = GET2(ecode, 1);
02233     minimize = *ecode == OP_MINUPTO;
02234     ecode += 3;
02235     goto REPEATCHAR;
02236 
02237     case OP_POSSTAR:
02238     possessive = TRUE;
02239     min = 0;
02240     max = INT_MAX;
02241     ecode++;
02242     goto REPEATCHAR;
02243 
02244     case OP_POSPLUS:
02245     possessive = TRUE;
02246     min = 1;
02247     max = INT_MAX;
02248     ecode++;
02249     goto REPEATCHAR;
02250 
02251     case OP_POSQUERY:
02252     possessive = TRUE;
02253     min = 0;
02254     max = 1;
02255     ecode++;
02256     goto REPEATCHAR;
02257 
02258     case OP_STAR:
02259     case OP_MINSTAR:
02260     case OP_PLUS:
02261     case OP_MINPLUS:
02262     case OP_QUERY:
02263     case OP_MINQUERY:
02264     c = *ecode++ - OP_STAR;
02265     minimize = (c & 1) != 0;
02266     min = rep_min[c];                 /* Pick up values from tables; */
02267     max = rep_max[c];                 /* zero for max => infinity */
02268     if (max == 0) max = INT_MAX;
02269 
02270     /* Common code for all repeated single-character matches. We can give
02271     up quickly if there are fewer than the minimum number of characters left in
02272     the subject. */
02273 
02274     REPEATCHAR:
02275 #ifdef SUPPORT_UTF8
02276     if (utf8)
02277       {
02278       length = 1;
02279       charptr = ecode;
02280       GETCHARLEN(fc, ecode, length);
02281       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02282       ecode += length;
02283 
02284       /* Handle multibyte character matching specially here. There is
02285       support for caseless matching if UCP support is present. */
02286 
02287       if (length > 1)
02288         {
02289 #ifdef SUPPORT_UCP
02290         unsigned int othercase;
02291         if ((ims & PCRE_CASELESS) != 0 &&
02292             (othercase = UCD_OTHERCASE(fc)) != fc)
02293           oclength = _pcre_ord2utf8(othercase, occhars);
02294         else oclength = 0;
02295 #endif  /* SUPPORT_UCP */
02296 
02297         for (i = 1; i <= min; i++)
02298           {
02299           if (memcmp(eptr, charptr, length) == 0) eptr += length;
02300 #ifdef SUPPORT_UCP
02301           /* Need braces because of following else */
02302           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
02303           else
02304             {
02305             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
02306             eptr += oclength;
02307             }
02308 #else   /* without SUPPORT_UCP */
02309           else { RRETURN(MATCH_NOMATCH); }
02310 #endif  /* SUPPORT_UCP */
02311           }
02312 
02313         if (min == max) continue;
02314 
02315         if (minimize)
02316           {
02317           for (fi = min;; fi++)
02318             {
02319             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
02320             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02321             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02322             if (memcmp(eptr, charptr, length) == 0) eptr += length;
02323 #ifdef SUPPORT_UCP
02324             /* Need braces because of following else */
02325             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
02326             else
02327               {
02328               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
02329               eptr += oclength;
02330               }
02331 #else   /* without SUPPORT_UCP */
02332             else { RRETURN (MATCH_NOMATCH); }
02333 #endif  /* SUPPORT_UCP */
02334             }
02335           /* Control never gets here */
02336           }
02337 
02338         else  /* Maximize */
02339           {
02340           pp = eptr;
02341           for (i = min; i < max; i++)
02342             {
02343             if (eptr > md->end_subject - length) break;
02344             if (memcmp(eptr, charptr, length) == 0) eptr += length;
02345 #ifdef SUPPORT_UCP
02346             else if (oclength == 0) break;
02347             else
02348               {
02349               if (memcmp(eptr, occhars, oclength) != 0) break;
02350               eptr += oclength;
02351               }
02352 #else   /* without SUPPORT_UCP */
02353             else break;
02354 #endif  /* SUPPORT_UCP */
02355             }
02356 
02357           if (possessive) continue;
02358           for(;;)
02359            {
02360            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
02361            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02362            if (eptr == pp) RRETURN(MATCH_NOMATCH);
02363 #ifdef SUPPORT_UCP
02364            eptr--;
02365            BACKCHAR(eptr);
02366 #else   /* without SUPPORT_UCP */
02367            eptr -= length;
02368 #endif  /* SUPPORT_UCP */
02369            }
02370           }
02371         /* Control never gets here */
02372         }
02373 
02374       /* If the length of a UTF-8 character is 1, we fall through here, and
02375       obey the code as for non-UTF-8 characters below, though in this case the
02376       value of fc will always be < 128. */
02377       }
02378     else
02379 #endif  /* SUPPORT_UTF8 */
02380 
02381     /* When not in UTF-8 mode, load a single-byte character. */
02382       {
02383       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02384       fc = *ecode++;
02385       }
02386 
02387     /* The value of fc at this point is always less than 256, though we may or
02388     may not be in UTF-8 mode. The code is duplicated for the caseless and
02389     caseful cases, for speed, since matching characters is likely to be quite
02390     common. First, ensure the minimum number of matches are present. If min =
02391     max, continue at the same level without recursing. Otherwise, if
02392     minimizing, keep trying the rest of the expression and advancing one
02393     matching character if failing, up to the maximum. Alternatively, if
02394     maximizing, find the maximum number of characters and work backwards. */
02395 
02396     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
02397       max, eptr));
02398 
02399     if ((ims & PCRE_CASELESS) != 0)
02400       {
02401       fc = md->lcc[fc];
02402       for (i = 1; i <= min; i++)
02403         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02404       if (min == max) continue;
02405       if (minimize)
02406         {
02407         for (fi = min;; fi++)
02408           {
02409           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
02410           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02411           if (fi >= max || eptr >= md->end_subject ||
02412               fc != md->lcc[*eptr++])
02413             RRETURN(MATCH_NOMATCH);
02414           }
02415         /* Control never gets here */
02416         }
02417       else  /* Maximize */
02418         {
02419         pp = eptr;
02420         for (i = min; i < max; i++)
02421           {
02422           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
02423           eptr++;
02424           }
02425         if (possessive) continue;
02426         while (eptr >= pp)
02427           {
02428           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
02429           eptr--;
02430           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02431           }
02432         RRETURN(MATCH_NOMATCH);
02433         }
02434       /* Control never gets here */
02435       }
02436 
02437     /* Caseful comparisons (includes all multi-byte characters) */
02438 
02439     else
02440       {
02441       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
02442       if (min == max) continue;
02443       if (minimize)
02444         {
02445         for (fi = min;; fi++)
02446           {
02447           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
02448           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02449           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
02450             RRETURN(MATCH_NOMATCH);
02451           }
02452         /* Control never gets here */
02453         }
02454       else  /* Maximize */
02455         {
02456         pp = eptr;
02457         for (i = min; i < max; i++)
02458           {
02459           if (eptr >= md->end_subject || fc != *eptr) break;
02460           eptr++;
02461           }
02462         if (possessive) continue;
02463         while (eptr >= pp)
02464           {
02465           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
02466           eptr--;
02467           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02468           }
02469         RRETURN(MATCH_NOMATCH);
02470         }
02471       }
02472     /* Control never gets here */
02473 
02474     /* Match a negated single one-byte character. The character we are
02475     checking can be multibyte. */
02476 
02477     case OP_NOT:
02478     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02479     ecode++;
02480     GETCHARINCTEST(c, eptr);
02481     if ((ims & PCRE_CASELESS) != 0)
02482       {
02483 #ifdef SUPPORT_UTF8
02484       if (c < 256)
02485 #endif
02486       c = md->lcc[c];
02487       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
02488       }
02489     else
02490       {
02491       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
02492       }
02493     break;
02494 
02495     /* Match a negated single one-byte character repeatedly. This is almost a
02496     repeat of the code for a repeated single character, but I haven't found a
02497     nice way of commoning these up that doesn't require a test of the
02498     positive/negative option for each character match. Maybe that wouldn't add
02499     very much to the time taken, but character matching *is* what this is all
02500     about... */
02501 
02502     case OP_NOTEXACT:
02503     min = max = GET2(ecode, 1);
02504     ecode += 3;
02505     goto REPEATNOTCHAR;
02506 
02507     case OP_NOTUPTO:
02508     case OP_NOTMINUPTO:
02509     min = 0;
02510     max = GET2(ecode, 1);
02511     minimize = *ecode == OP_NOTMINUPTO;
02512     ecode += 3;
02513     goto REPEATNOTCHAR;
02514 
02515     case OP_NOTPOSSTAR:
02516     possessive = TRUE;
02517     min = 0;
02518     max = INT_MAX;
02519     ecode++;
02520     goto REPEATNOTCHAR;
02521 
02522     case OP_NOTPOSPLUS:
02523     possessive = TRUE;
02524     min = 1;
02525     max = INT_MAX;
02526     ecode++;
02527     goto REPEATNOTCHAR;
02528 
02529     case OP_NOTPOSQUERY:
02530     possessive = TRUE;
02531     min = 0;
02532     max = 1;
02533     ecode++;
02534     goto REPEATNOTCHAR;
02535 
02536     case OP_NOTPOSUPTO:
02537     possessive = TRUE;
02538     min = 0;
02539     max = GET2(ecode, 1);
02540     ecode += 3;
02541     goto REPEATNOTCHAR;
02542 
02543     case OP_NOTSTAR:
02544     case OP_NOTMINSTAR:
02545     case OP_NOTPLUS:
02546     case OP_NOTMINPLUS:
02547     case OP_NOTQUERY:
02548     case OP_NOTMINQUERY:
02549     c = *ecode++ - OP_NOTSTAR;
02550     minimize = (c & 1) != 0;
02551     min = rep_min[c];                 /* Pick up values from tables; */
02552     max = rep_max[c];                 /* zero for max => infinity */
02553     if (max == 0) max = INT_MAX;
02554 
02555     /* Common code for all repeated single-byte matches. We can give up quickly
02556     if there are fewer than the minimum number of bytes left in the
02557     subject. */
02558 
02559     REPEATNOTCHAR:
02560     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02561     fc = *ecode++;
02562 
02563     /* The code is duplicated for the caseless and caseful cases, for speed,
02564     since matching characters is likely to be quite common. First, ensure the
02565     minimum number of matches are present. If min = max, continue at the same
02566     level without recursing. Otherwise, if minimizing, keep trying the rest of
02567     the expression and advancing one matching character if failing, up to the
02568     maximum. Alternatively, if maximizing, find the maximum number of
02569     characters and work backwards. */
02570 
02571     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
02572       max, eptr));
02573 
02574     if ((ims & PCRE_CASELESS) != 0)
02575       {
02576       fc = md->lcc[fc];
02577 
02578 #ifdef SUPPORT_UTF8
02579       /* UTF-8 mode */
02580       if (utf8)
02581         {
02582         register unsigned int d;
02583         for (i = 1; i <= min; i++)
02584           {
02585           GETCHARINC(d, eptr);
02586           if (d < 256) d = md->lcc[d];
02587           if (fc == d) RRETURN(MATCH_NOMATCH);
02588           }
02589         }
02590       else
02591 #endif
02592 
02593       /* Not UTF-8 mode */
02594         {
02595         for (i = 1; i <= min; i++)
02596           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02597         }
02598 
02599       if (min == max) continue;
02600 
02601       if (minimize)
02602         {
02603 #ifdef SUPPORT_UTF8
02604         /* UTF-8 mode */
02605         if (utf8)
02606           {
02607           register unsigned int d;
02608           for (fi = min;; fi++)
02609             {
02610             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
02611             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02612             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02613             GETCHARINC(d, eptr);
02614             if (d < 256) d = md->lcc[d];
02615             if (fc == d) RRETURN(MATCH_NOMATCH);
02616 
02617             }
02618           }
02619         else
02620 #endif
02621         /* Not UTF-8 mode */
02622           {
02623           for (fi = min;; fi++)
02624             {
02625             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
02626             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02627             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
02628               RRETURN(MATCH_NOMATCH);
02629             }
02630           }
02631         /* Control never gets here */
02632         }
02633 
02634       /* Maximize case */
02635 
02636       else
02637         {
02638         pp = eptr;
02639 
02640 #ifdef SUPPORT_UTF8
02641         /* UTF-8 mode */
02642         if (utf8)
02643           {
02644           register unsigned int d;
02645           for (i = min; i < max; i++)
02646             {
02647             int len = 1;
02648             if (eptr >= md->end_subject) break;
02649             GETCHARLEN(d, eptr, len);
02650             if (d < 256) d = md->lcc[d];
02651             if (fc == d) break;
02652             eptr += len;
02653             }
02654         if (possessive) continue;
02655         for(;;)
02656             {
02657             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
02658             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02659             if (eptr-- == pp) break;        /* Stop if tried at original pos */
02660             BACKCHAR(eptr);
02661             }
02662           }
02663         else
02664 #endif
02665         /* Not UTF-8 mode */
02666           {
02667           for (i = min; i < max; i++)
02668             {
02669             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
02670             eptr++;
02671             }
02672           if (possessive) continue;
02673           while (eptr >= pp)
02674             {
02675             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
02676             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02677             eptr--;
02678             }
02679           }
02680 
02681         RRETURN(MATCH_NOMATCH);
02682         }
02683       /* Control never gets here */
02684       }
02685 
02686     /* Caseful comparisons */
02687 
02688     else
02689       {
02690 #ifdef SUPPORT_UTF8
02691       /* UTF-8 mode */
02692       if (utf8)
02693         {
02694         register unsigned int d;
02695         for (i = 1; i <= min; i++)
02696           {
02697           GETCHARINC(d, eptr);
02698           if (fc == d) RRETURN(MATCH_NOMATCH);
02699           }
02700         }
02701       else
02702 #endif
02703       /* Not UTF-8 mode */
02704         {
02705         for (i = 1; i <= min; i++)
02706           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
02707         }
02708 
02709       if (min == max) continue;
02710 
02711       if (minimize)
02712         {
02713 #ifdef SUPPORT_UTF8
02714         /* UTF-8 mode */
02715         if (utf8)
02716           {
02717           register unsigned int d;
02718           for (fi = min;; fi++)
02719             {
02720             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
02721             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02722             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02723             GETCHARINC(d, eptr);
02724             if (fc == d) RRETURN(MATCH_NOMATCH);
02725             }
02726           }
02727         else
02728 #endif
02729         /* Not UTF-8 mode */
02730           {
02731           for (fi = min;; fi++)
02732             {
02733             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
02734             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02735             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
02736               RRETURN(MATCH_NOMATCH);
02737             }
02738           }
02739         /* Control never gets here */
02740         }
02741 
02742       /* Maximize case */
02743 
02744       else
02745         {
02746         pp = eptr;
02747 
02748 #ifdef SUPPORT_UTF8
02749         /* UTF-8 mode */
02750         if (utf8)
02751           {
02752           register unsigned int d;
02753           for (i = min; i < max; i++)
02754             {
02755             int len = 1;
02756             if (eptr >= md->end_subject) break;
02757             GETCHARLEN(d, eptr, len);
02758             if (fc == d) break;
02759             eptr += len;
02760             }
02761           if (possessive) continue;
02762           for(;;)
02763             {
02764             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
02765             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02766             if (eptr-- == pp) break;        /* Stop if tried at original pos */
02767             BACKCHAR(eptr);
02768             }
02769           }
02770         else
02771 #endif
02772         /* Not UTF-8 mode */
02773           {
02774           for (i = min; i < max; i++)
02775             {
02776             if (eptr >= md->end_subject || fc == *eptr) break;
02777             eptr++;
02778             }
02779           if (possessive) continue;
02780           while (eptr >= pp)
02781             {
02782             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
02783             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02784             eptr--;
02785             }
02786           }
02787 
02788         RRETURN(MATCH_NOMATCH);
02789         }
02790       }
02791     /* Control never gets here */
02792 
02793     /* Match a single character type repeatedly; several different opcodes
02794     share code. This is very similar to the code for single characters, but we
02795     repeat it in the interests of efficiency. */
02796 
02797     case OP_TYPEEXACT:
02798     min = max = GET2(ecode, 1);
02799     minimize = TRUE;
02800     ecode += 3;
02801     goto REPEATTYPE;
02802 
02803     case OP_TYPEUPTO:
02804     case OP_TYPEMINUPTO:
02805     min = 0;
02806     max = GET2(ecode, 1);
02807     minimize = *ecode == OP_TYPEMINUPTO;
02808     ecode += 3;
02809     goto REPEATTYPE;
02810 
02811     case OP_TYPEPOSSTAR:
02812     possessive = TRUE;
02813     min = 0;
02814     max = INT_MAX;
02815     ecode++;
02816     goto REPEATTYPE;
02817 
02818     case OP_TYPEPOSPLUS:
02819     possessive = TRUE;
02820     min = 1;
02821     max = INT_MAX;
02822     ecode++;
02823     goto REPEATTYPE;
02824 
02825     case OP_TYPEPOSQUERY:
02826     possessive = TRUE;
02827     min = 0;
02828     max = 1;
02829     ecode++;
02830     goto REPEATTYPE;
02831 
02832     case OP_TYPEPOSUPTO:
02833     possessive = TRUE;
02834     min = 0;
02835     max = GET2(ecode, 1);
02836     ecode += 3;
02837     goto REPEATTYPE;
02838 
02839     case OP_TYPESTAR:
02840     case OP_TYPEMINSTAR:
02841     case OP_TYPEPLUS:
02842     case OP_TYPEMINPLUS:
02843     case OP_TYPEQUERY:
02844     case OP_TYPEMINQUERY:
02845     c = *ecode++ - OP_TYPESTAR;
02846     minimize = (c & 1) != 0;
02847     min = rep_min[c];                 /* Pick up values from tables; */
02848     max = rep_max[c];                 /* zero for max => infinity */
02849     if (max == 0) max = INT_MAX;
02850 
02851     /* Common code for all repeated single character type matches. Note that
02852     in UTF-8 mode, '.' matches a character of any length, but for the other
02853     character types, the valid characters are all one-byte long. */
02854 
02855     REPEATTYPE:
02856     ctype = *ecode++;      /* Code for the character type */
02857 
02858 #ifdef SUPPORT_UCP
02859     if (ctype == OP_PROP || ctype == OP_NOTPROP)
02860       {
02861       prop_fail_result = ctype == OP_NOTPROP;
02862       prop_type = *ecode++;
02863       prop_value = *ecode++;
02864       }
02865     else prop_type = -1;
02866 #endif
02867 
02868     /* First, ensure the minimum number of matches are present. Use inline
02869     code for maximizing the speed, and do the type test once at the start
02870     (i.e. keep it out of the loop). Also we can test that there are at least
02871     the minimum number of bytes before we start. This isn't as effective in
02872     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
02873     is tidier. Also separate the UCP code, which can be the same for both UTF-8
02874     and single-bytes. */
02875 
02876     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02877     if (min > 0)
02878       {
02879 #ifdef SUPPORT_UCP
02880       if (prop_type >= 0)
02881         {
02882         switch(prop_type)
02883           {
02884           case PT_ANY:
02885           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
02886           for (i = 1; i <= min; i++)
02887             {
02888             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02889             GETCHARINCTEST(c, eptr);
02890             }
02891           break;
02892 
02893           case PT_LAMP:
02894           for (i = 1; i <= min; i++)
02895             {
02896             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02897             GETCHARINCTEST(c, eptr);
02898             prop_chartype = UCD_CHARTYPE(c);
02899             if ((prop_chartype == ucp_Lu ||
02900                  prop_chartype == ucp_Ll ||
02901                  prop_chartype == ucp_Lt) == prop_fail_result)
02902               RRETURN(MATCH_NOMATCH);
02903             }
02904           break;
02905 
02906           case PT_GC:
02907           for (i = 1; i <= min; i++)
02908             {
02909             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02910             GETCHARINCTEST(c, eptr);
02911             prop_category = UCD_CATEGORY(c);
02912             if ((prop_category == prop_value) == prop_fail_result)
02913               RRETURN(MATCH_NOMATCH);
02914             }
02915           break;
02916 
02917           case PT_PC:
02918           for (i = 1; i <= min; i++)
02919             {
02920             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02921             GETCHARINCTEST(c, eptr);
02922             prop_chartype = UCD_CHARTYPE(c);
02923             if ((prop_chartype == prop_value) == prop_fail_result)
02924               RRETURN(MATCH_NOMATCH);
02925             }
02926           break;
02927 
02928           case PT_SC:
02929           for (i = 1; i <= min; i++)
02930             {
02931             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02932             GETCHARINCTEST(c, eptr);
02933             prop_script = UCD_SCRIPT(c);
02934             if ((prop_script == prop_value) == prop_fail_result)
02935               RRETURN(MATCH_NOMATCH);
02936             }
02937           break;
02938 
02939           default:
02940           RRETURN(PCRE_ERROR_INTERNAL);
02941           }
02942         }
02943 
02944       /* Match extended Unicode sequences. We will get here only if the
02945       support is in the binary; otherwise a compile-time error occurs. */
02946 
02947       else if (ctype == OP_EXTUNI)
02948         {
02949         for (i = 1; i <= min; i++)
02950           {
02951           GETCHARINCTEST(c, eptr);
02952           prop_category = UCD_CATEGORY(c);
02953           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
02954           while (eptr < md->end_subject)
02955             {
02956             int len = 1;
02957             if (!utf8) c = *eptr; else
02958               {
02959               GETCHARLEN(c, eptr, len);
02960               }
02961             prop_category = UCD_CATEGORY(c);
02962             if (prop_category != ucp_M) break;
02963             eptr += len;
02964             }
02965           }
02966         }
02967 
02968       else
02969 #endif     /* SUPPORT_UCP */
02970 
02971 /* Handle all other cases when the coding is UTF-8 */
02972 
02973 #ifdef SUPPORT_UTF8
02974       if (utf8) switch(ctype)
02975         {
02976         case OP_ANY:
02977         for (i = 1; i <= min; i++)
02978           {
02979           if (eptr >= md->end_subject || IS_NEWLINE(eptr))
02980             RRETURN(MATCH_NOMATCH);
02981           eptr++;
02982           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
02983           }
02984         break;
02985 
02986         case OP_ALLANY:
02987         for (i = 1; i <= min; i++)
02988           {
02989           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02990           eptr++;
02991           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
02992           }
02993         break;
02994 
02995         case OP_ANYBYTE:
02996         eptr += min;
02997         break;
02998 
02999         case OP_ANYNL:
03000         for (i = 1; i <= min; i++)
03001           {
03002           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03003           GETCHARINC(c, eptr);
03004           switch(c)
03005             {
03006             default: RRETURN(MATCH_NOMATCH);
03007             case 0x000d:
03008             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03009             break;
03010 
03011             case 0x000a:
03012             break;
03013 
03014             case 0x000b:
03015             case 0x000c:
03016             case 0x0085:
03017             case 0x2028:
03018             case 0x2029:
03019             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
03020             break;
03021             }
03022           }
03023         break;
03024 
03025         case OP_NOT_HSPACE:
03026         for (i = 1; i <= min; i++)
03027           {
03028           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03029           GETCHARINC(c, eptr);
03030           switch(c)
03031             {
03032             default: break;
03033             case 0x09:      /* HT */
03034             case 0x20:      /* SPACE */
03035             case 0xa0:      /* NBSP */
03036             case 0x1680:    /* OGHAM SPACE MARK */
03037             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
03038             case 0x2000:    /* EN QUAD */
03039             case 0x2001:    /* EM QUAD */
03040             case 0x2002:    /* EN SPACE */
03041             case 0x2003:    /* EM SPACE */
03042             case 0x2004:    /* THREE-PER-EM SPACE */
03043             case 0x2005:    /* FOUR-PER-EM SPACE */
03044             case 0x2006:    /* SIX-PER-EM SPACE */
03045             case 0x2007:    /* FIGURE SPACE */
03046             case 0x2008:    /* PUNCTUATION SPACE */
03047             case 0x2009:    /* THIN SPACE */
03048             case 0x200A:    /* HAIR SPACE */
03049             case 0x202f:    /* NARROW NO-BREAK SPACE */
03050             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
03051             case 0x3000:    /* IDEOGRAPHIC SPACE */
03052             RRETURN(MATCH_NOMATCH);
03053             }
03054           }
03055         break;
03056 
03057         case OP_HSPACE:
03058         for (i = 1; i <= min; i++)
03059           {
03060           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03061           GETCHARINC(c, eptr);
03062           switch(c)
03063             {
03064             default: RRETURN(MATCH_NOMATCH);
03065             case 0x09:      /* HT */
03066             case 0x20:      /* SPACE */
03067             case 0xa0:      /* NBSP */
03068             case 0x1680:    /* OGHAM SPACE MARK */
03069             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
03070             case 0x2000:    /* EN QUAD */
03071             case 0x2001:    /* EM QUAD */
03072             case 0x2002:    /* EN SPACE */
03073             case 0x2003:    /* EM SPACE */
03074             case 0x2004:    /* THREE-PER-EM SPACE */
03075             case 0x2005:    /* FOUR-PER-EM SPACE */
03076             case 0x2006:    /* SIX-PER-EM SPACE */
03077             case 0x2007:    /* FIGURE SPACE */
03078             case 0x2008:    /* PUNCTUATION SPACE */
03079             case 0x2009:    /* THIN SPACE */
03080             case 0x200A:    /* HAIR SPACE */
03081             case 0x202f:    /* NARROW NO-BREAK SPACE */
03082             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
03083             case 0x3000:    /* IDEOGRAPHIC SPACE */
03084             break;
03085             }
03086           }
03087         break;
03088 
03089         case OP_NOT_VSPACE:
03090         for (i = 1; i <= min; i++)
03091           {
03092           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03093           GETCHARINC(c, eptr);
03094           switch(c)
03095             {
03096             default: break;
03097             case 0x0a:      /* LF */
03098             case 0x0b:      /* VT */
03099             case 0x0c:      /* FF */
03100             case 0x0d:      /* CR */
03101             case 0x85:      /* NEL */
03102             case 0x2028:    /* LINE SEPARATOR */
03103             case 0x2029:    /* PARAGRAPH SEPARATOR */
03104             RRETURN(MATCH_NOMATCH);
03105             }
03106           }
03107         break;
03108 
03109         case OP_VSPACE:
03110         for (i = 1; i <= min; i++)
03111           {
03112           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03113           GETCHARINC(c, eptr);
03114           switch(c)
03115             {
03116             default: RRETURN(MATCH_NOMATCH);
03117             case 0x0a:      /* LF */
03118             case 0x0b:      /* VT */
03119             case 0x0c:      /* FF */
03120             case 0x0d:      /* CR */
03121             case 0x85:      /* NEL */
03122             case 0x2028:    /* LINE SEPARATOR */
03123             case 0x2029:    /* PARAGRAPH SEPARATOR */
03124             break;
03125             }
03126           }
03127         break;
03128 
03129         case OP_NOT_DIGIT:
03130         for (i = 1; i <= min; i++)
03131           {
03132           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03133           GETCHARINC(c, eptr);
03134           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
03135             RRETURN(MATCH_NOMATCH);
03136           }
03137         break;
03138 
03139         case OP_DIGIT:
03140         for (i = 1; i <= min; i++)
03141           {
03142           if (eptr >= md->end_subject ||
03143              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
03144             RRETURN(MATCH_NOMATCH);
03145           /* No need to skip more bytes - we know it's a 1-byte character */
03146           }
03147         break;
03148 
03149         case OP_NOT_WHITESPACE:
03150         for (i = 1; i <= min; i++)
03151           {
03152           if (eptr >= md->end_subject ||
03153              (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
03154             RRETURN(MATCH_NOMATCH);
03155           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
03156           }
03157         break;
03158 
03159         case OP_WHITESPACE:
03160         for (i = 1; i <= min; i++)
03161           {
03162           if (eptr >= md->end_subject ||
03163              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
03164             RRETURN(MATCH_NOMATCH);
03165           /* No need to skip more bytes - we know it's a 1-byte character */
03166           }
03167         break;
03168 
03169         case OP_NOT_WORDCHAR:
03170         for (i = 1; i <= min; i++)
03171           {
03172           if (eptr >= md->end_subject ||
03173              (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
03174             RRETURN(MATCH_NOMATCH);
03175           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
03176           }
03177         break;
03178 
03179         case OP_WORDCHAR:
03180         for (i = 1; i <= min; i++)
03181           {
03182           if (eptr >= md->end_subject ||
03183              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
03184             RRETURN(MATCH_NOMATCH);
03185           /* No need to skip more bytes - we know it's a 1-byte character */
03186           }
03187         break;
03188 
03189         default:
03190         RRETURN(PCRE_ERROR_INTERNAL);
03191         }  /* End switch(ctype) */
03192 
03193       else
03194 #endif     /* SUPPORT_UTF8 */
03195 
03196       /* Code for the non-UTF-8 case for minimum matching of operators other
03197       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
03198       number of bytes present, as this was tested above. */
03199 
03200       switch(ctype)
03201         {
03202         case OP_ANY:
03203         for (i = 1; i <= min; i++)
03204           {
03205           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
03206           eptr++;
03207           }
03208         break;
03209 
03210         case OP_ALLANY:
03211         eptr += min;
03212         break;
03213 
03214         case OP_ANYBYTE:
03215         eptr += min;
03216         break;
03217 
03218         /* Because of the CRLF case, we can't assume the minimum number of
03219         bytes are present in this case. */
03220 
03221         case OP_ANYNL:
03222         for (i = 1; i <= min; i++)
03223           {
03224           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03225           switch(*eptr++)
03226             {
03227             default: RRETURN(MATCH_NOMATCH);
03228             case 0x000d:
03229             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03230             break;
03231             case 0x000a:
03232             break;
03233 
03234             case 0x000b:
03235             case 0x000c:
03236             case 0x0085:
03237             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
03238             break;
03239             }
03240           }
03241         break;
03242 
03243         case OP_NOT_HSPACE:
03244         for (i = 1; i <= min; i++)
03245           {
03246           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03247           switch(*eptr++)
03248             {
03249             default: break;
03250             case 0x09:      /* HT */
03251             case 0x20:      /* SPACE */
03252             case 0xa0:      /* NBSP */
03253             RRETURN(MATCH_NOMATCH);
03254             }
03255           }
03256         break;
03257 
03258         case OP_HSPACE:
03259         for (i = 1; i <= min; i++)
03260           {
03261           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03262           switch(*eptr++)
03263             {
03264             default: RRETURN(MATCH_NOMATCH);
03265             case 0x09:      /* HT */
03266             case 0x20:      /* SPACE */
03267             case 0xa0:      /* NBSP */
03268             break;
03269             }
03270           }
03271         break;
03272 
03273         case OP_NOT_VSPACE:
03274         for (i = 1; i <= min; i++)
03275           {
03276           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03277           switch(*eptr++)
03278             {
03279             default: break;
03280             case 0x0a:      /* LF */
03281             case 0x0b:      /* VT */
03282             case 0x0c:      /* FF */
03283             case 0x0d:      /* CR */
03284             case 0x85:      /* NEL */
03285             RRETURN(MATCH_NOMATCH);
03286             }
03287           }
03288         break;
03289 
03290         case OP_VSPACE:
03291         for (i = 1; i <= min; i++)
03292           {
03293           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03294           switch(*eptr++)
03295             {
03296             default: RRETURN(MATCH_NOMATCH);
03297             case 0x0a:      /* LF */
03298             case 0x0b:      /* VT */
03299             case 0x0c:      /* FF */
03300             case 0x0d:      /* CR */
03301             case 0x85:      /* NEL */
03302             break;
03303             }
03304           }
03305         break;
03306 
03307         case OP_NOT_DIGIT:
03308         for (i = 1; i <= min; i++)
03309           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
03310         break;
03311 
03312         case OP_DIGIT:
03313         for (i = 1; i <= min; i++)
03314           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
03315         break;
03316 
03317         case OP_NOT_WHITESPACE:
03318         for (i = 1; i <= min; i++)
03319           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
03320         break;
03321 
03322         case OP_WHITESPACE:
03323         for (i = 1; i <= min; i++)
03324           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
03325         break;
03326 
03327         case OP_NOT_WORDCHAR:
03328         for (i = 1; i <= min; i++)
03329           if ((md->ctypes[*eptr++] & ctype_word) != 0)
03330             RRETURN(MATCH_NOMATCH);
03331         break;
03332 
03333         case OP_WORDCHAR:
03334         for (i = 1; i <= min; i++)
03335           if ((md->ctypes[*eptr++] & ctype_word) == 0)
03336             RRETURN(MATCH_NOMATCH);
03337         break;
03338 
03339         default:
03340         RRETURN(PCRE_ERROR_INTERNAL);
03341         }
03342       }
03343 
03344     /* If min = max, continue at the same level without recursing */
03345 
03346     if (min == max) continue;
03347 
03348     /* If minimizing, we have to test the rest of the pattern before each
03349     subsequent match. Again, separate the UTF-8 case for speed, and also
03350     separate the UCP cases. */
03351 
03352     if (minimize)
03353       {
03354 #ifdef SUPPORT_UCP
03355       if (prop_type >= 0)
03356         {
03357         switch(prop_type)
03358           {
03359           case PT_ANY:
03360           for (fi = min;; fi++)
03361             {
03362             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
03363             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03364             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03365             GETCHARINC(c, eptr);
03366             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
03367             }
03368           /* Control never gets here */
03369 
03370           case PT_LAMP:
03371           for (fi = min;; fi++)
03372             {
03373             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
03374             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03375             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03376             GETCHARINC(c, eptr);
03377             prop_chartype = UCD_CHARTYPE(c);
03378             if ((prop_chartype == ucp_Lu ||
03379                  prop_chartype == ucp_Ll ||
03380                  prop_chartype == ucp_Lt) == prop_fail_result)
03381               RRETURN(MATCH_NOMATCH);
03382             }
03383           /* Control never gets here */
03384 
03385           case PT_GC:
03386           for (fi = min;; fi++)
03387             {
03388             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
03389             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03390             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03391             GETCHARINC(c, eptr);
03392             prop_category = UCD_CATEGORY(c);
03393             if ((prop_category == prop_value) == prop_fail_result)
03394               RRETURN(MATCH_NOMATCH);
03395             }
03396           /* Control never gets here */
03397 
03398           case PT_PC:
03399           for (fi = min;; fi++)
03400             {
03401             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
03402             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03403             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03404             GETCHARINC(c, eptr);
03405             prop_chartype = UCD_CHARTYPE(c);
03406             if ((prop_chartype == prop_value) == prop_fail_result)
03407               RRETURN(MATCH_NOMATCH);
03408             }
03409           /* Control never gets here */
03410 
03411           case PT_SC:
03412           for (fi = min;; fi++)
03413             {
03414             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
03415             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03416             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03417             GETCHARINC(c, eptr);
03418             prop_script = UCD_SCRIPT(c);
03419             if ((prop_script == prop_value) == prop_fail_result)
03420               RRETURN(MATCH_NOMATCH);
03421             }
03422           /* Control never gets here */
03423 
03424           default:
03425           RRETURN(PCRE_ERROR_INTERNAL);
03426           }
03427         }
03428 
03429       /* Match extended Unicode sequences. We will get here only if the
03430       support is in the binary; otherwise a compile-time error occurs. */
03431 
03432       else if (ctype == OP_EXTUNI)
03433         {
03434         for (fi = min;; fi++)
03435           {
03436           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
03437           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03438           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03439           GETCHARINCTEST(c, eptr);
03440           prop_category = UCD_CATEGORY(c);
03441           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
03442           while (eptr < md->end_subject)
03443             {
03444             int len = 1;
03445             if (!utf8) c = *eptr; else
03446               {
03447               GETCHARLEN(c, eptr, len);
03448               }
03449             prop_category = UCD_CATEGORY(c);
03450             if (prop_category != ucp_M) break;
03451             eptr += len;
03452             }
03453           }
03454         }
03455 
03456       else
03457 #endif     /* SUPPORT_UCP */
03458 
03459 #ifdef SUPPORT_UTF8
03460       /* UTF-8 mode */
03461       if (utf8)
03462         {
03463         for (fi = min;; fi++)
03464           {
03465           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
03466           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03467           if (fi >= max || eptr >= md->end_subject ||
03468                (ctype == OP_ANY && IS_NEWLINE(eptr)))
03469             RRETURN(MATCH_NOMATCH);
03470 
03471           GETCHARINC(c, eptr);
03472           switch(ctype)
03473             {
03474             case OP_ANY:        /* This is the non-NL case */
03475             case OP_ALLANY:
03476             case OP_ANYBYTE:
03477             break;
03478 
03479             case OP_ANYNL:
03480             switch(c)
03481               {
03482               default: RRETURN(MATCH_NOMATCH);
03483               case 0x000d:
03484               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03485               break;
03486               case 0x000a:
03487               break;
03488 
03489               case 0x000b:
03490               case 0x000c:
03491               case 0x0085:
03492               case 0x2028:
03493               case 0x2029:
03494               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
03495               break;
03496               }
03497             break;
03498 
03499             case OP_NOT_HSPACE:
03500             switch(c)
03501               {
03502               default: break;
03503               case 0x09:      /* HT */
03504               case 0x20:      /* SPACE */
03505               case 0xa0:      /* NBSP */
03506               case 0x1680:    /* OGHAM SPACE MARK */
03507               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
03508               case 0x2000:    /* EN QUAD */
03509               case 0x2001:    /* EM QUAD */
03510               case 0x2002:    /* EN SPACE */
03511               case 0x2003:    /* EM SPACE */
03512               case 0x2004:    /* THREE-PER-EM SPACE */
03513               case 0x2005:    /* FOUR-PER-EM SPACE */
03514               case 0x2006:    /* SIX-PER-EM SPACE */
03515               case 0x2007:    /* FIGURE SPACE */
03516               case 0x2008:    /* PUNCTUATION SPACE */
03517               case 0x2009:    /* THIN SPACE */
03518               case 0x200A:    /* HAIR SPACE */
03519               case 0x202f:    /* NARROW NO-BREAK SPACE */
03520               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
03521               case 0x3000:    /* IDEOGRAPHIC SPACE */
03522               RRETURN(MATCH_NOMATCH);
03523               }
03524             break;
03525 
03526             case OP_HSPACE:
03527             switch(c)
03528               {
03529               default: RRETURN(MATCH_NOMATCH);
03530               case 0x09:      /* HT */
03531               case 0x20:      /* SPACE */
03532               case 0xa0:      /* NBSP */
03533               case 0x1680:    /* OGHAM SPACE MARK */
03534               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
03535               case 0x2000:    /* EN QUAD */
03536               case 0x2001:    /* EM QUAD */
03537               case 0x2002:    /* EN SPACE */
03538               case 0x2003:    /* EM SPACE */
03539               case 0x2004:    /* THREE-PER-EM SPACE */
03540               case 0x2005:    /* FOUR-PER-EM SPACE */
03541               case 0x2006:    /* SIX-PER-EM SPACE */
03542               case 0x2007:    /* FIGURE SPACE */
03543               case 0x2008:    /* PUNCTUATION SPACE */
03544               case 0x2009:    /* THIN SPACE */
03545               case 0x200A:    /* HAIR SPACE */
03546               case 0x202f:    /* NARROW NO-BREAK SPACE */
03547               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
03548               case 0x3000:    /* IDEOGRAPHIC SPACE */
03549               break;
03550               }
03551             break;
03552 
03553             case OP_NOT_VSPACE:
03554             switch(c)
03555               {
03556               default: break;
03557               case 0x0a:      /* LF */
03558               case 0x0b:      /* VT */
03559               case 0x0c:      /* FF */
03560               case 0x0d:      /* CR */
03561               case 0x85:      /* NEL */
03562               case 0x2028:    /* LINE SEPARATOR */
03563               case 0x2029:    /* PARAGRAPH SEPARATOR */
03564               RRETURN(MATCH_NOMATCH);
03565               }
03566             break;
03567 
03568             case OP_VSPACE:
03569             switch(c)
03570               {
03571               default: RRETURN(MATCH_NOMATCH);
03572               case 0x0a:      /* LF */
03573               case 0x0b:      /* VT */
03574               case 0x0c:      /* FF */
03575               case 0x0d:      /* CR */
03576               case 0x85:      /* NEL */
03577               case 0x2028:    /* LINE SEPARATOR */
03578               case 0x2029:    /* PARAGRAPH SEPARATOR */
03579               break;
03580               }
03581             break;
03582 
03583             case OP_NOT_DIGIT:
03584             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
03585               RRETURN(MATCH_NOMATCH);
03586             break;
03587 
03588             case OP_DIGIT:
03589             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
03590               RRETURN(MATCH_NOMATCH);
03591             break;
03592 
03593             case OP_NOT_WHITESPACE:
03594             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
03595               RRETURN(MATCH_NOMATCH);
03596             break;
03597 
03598             case OP_WHITESPACE:
03599             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
03600               RRETURN(MATCH_NOMATCH);
03601             break;
03602 
03603             case OP_NOT_WORDCHAR:
03604             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
03605               RRETURN(MATCH_NOMATCH);
03606             break;
03607 
03608             case OP_WORDCHAR:
03609             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
03610               RRETURN(MATCH_NOMATCH);
03611             break;
03612 
03613             default:
03614             RRETURN(PCRE_ERROR_INTERNAL);
03615             }
03616           }
03617         }
03618       else
03619 #endif
03620       /* Not UTF-8 mode */
03621         {
03622         for (fi = min;; fi++)
03623           {
03624           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
03625           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03626           if (fi >= max || eptr >= md->end_subject ||
03627                (ctype == OP_ANY && IS_NEWLINE(eptr)))
03628             RRETURN(MATCH_NOMATCH);
03629 
03630           c = *eptr++;
03631           switch(ctype)
03632             {
03633             case OP_ANY:     /* This is the non-NL case */
03634             case OP_ALLANY:
03635             case OP_ANYBYTE:
03636             break;
03637 
03638             case OP_ANYNL:
03639             switch(c)
03640               {
03641               default: RRETURN(MATCH_NOMATCH);
03642               case 0x000d:
03643               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03644               break;
03645 
03646               case 0x000a:
03647               break;
03648 
03649               case 0x000b:
03650               case 0x000c:
03651               case 0x0085:
03652               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
03653               break;
03654               }
03655             break;
03656 
03657             case OP_NOT_HSPACE:
03658             switch(c)
03659               {
03660               default: break;
03661               case 0x09:      /* HT */
03662               case 0x20:      /* SPACE */
03663               case 0xa0:      /* NBSP */
03664               RRETURN(MATCH_NOMATCH);
03665               }
03666             break;
03667 
03668             case OP_HSPACE:
03669             switch(c)
03670               {
03671               default: RRETURN(MATCH_NOMATCH);
03672               case 0x09:      /* HT */
03673               case 0x20:      /* SPACE */
03674               case 0xa0:      /* NBSP */
03675               break;
03676               }
03677             break;
03678 
03679             case OP_NOT_VSPACE:
03680             switch(c)
03681               {
03682               default: break;
03683               case 0x0a:      /* LF */
03684               case 0x0b:      /* VT */
03685               case 0x0c:      /* FF */
03686               case 0x0d:      /* CR */
03687               case 0x85:      /* NEL */
03688               RRETURN(MATCH_NOMATCH);
03689               }
03690             break;
03691 
03692             case OP_VSPACE:
03693             switch(c)
03694               {
03695               default: RRETURN(MATCH_NOMATCH);
03696               case 0x0a:      /* LF */
03697               case 0x0b:      /* VT */
03698               case 0x0c:      /* FF */
03699               case 0x0d:      /* CR */
03700               case 0x85:      /* NEL */
03701               break;
03702               }
03703             break;
03704 
03705             case OP_NOT_DIGIT:
03706             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
03707             break;
03708 
03709             case OP_DIGIT:
03710             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
03711             break;
03712 
03713             case OP_NOT_WHITESPACE:
03714             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
03715             break;
03716 
03717             case OP_WHITESPACE:
03718             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
03719             break;
03720 
03721             case OP_NOT_WORDCHAR:
03722             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
03723             break;
03724 
03725             case OP_WORDCHAR:
03726             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
03727             break;
03728 
03729             default:
03730             RRETURN(PCRE_ERROR_INTERNAL);
03731             }
03732           }
03733         }
03734       /* Control never gets here */
03735       }
03736 
03737     /* If maximizing, it is worth using inline code for speed, doing the type
03738     test once at the start (i.e. keep it out of the loop). Again, keep the
03739     UTF-8 and UCP stuff separate. */
03740 
03741     else
03742       {
03743       pp = eptr;  /* Remember where we started */
03744 
03745 #ifdef SUPPORT_UCP
03746       if (prop_type >= 0)
03747         {
03748         switch(prop_type)
03749           {
03750           case PT_ANY:
03751           for (i = min; i < max; i++)
03752             {
03753             int len = 1;
03754             if (eptr >= md->end_subject) break;
03755             GETCHARLEN(c, eptr, len);
03756             if (prop_fail_result) break;
03757             eptr+= len;
03758             }
03759           break;
03760 
03761           case PT_LAMP:
03762           for (i = min; i < max; i++)
03763             {
03764             int len = 1;
03765             if (eptr >= md->end_subject) break;
03766             GETCHARLEN(c, eptr, len);
03767             prop_chartype = UCD_CHARTYPE(c);
03768             if ((prop_chartype == ucp_Lu ||
03769                  prop_chartype == ucp_Ll ||
03770                  prop_chartype == ucp_Lt) == prop_fail_result)
03771               break;
03772             eptr+= len;
03773             }
03774           break;
03775 
03776           case PT_GC:
03777           for (i = min; i < max; i++)
03778             {
03779             int len = 1;
03780             if (eptr >= md->end_subject) break;
03781             GETCHARLEN(c, eptr, len);
03782             prop_category = UCD_CATEGORY(c);
03783             if ((prop_category == prop_value) == prop_fail_result)
03784               break;
03785             eptr+= len;
03786             }
03787           break;
03788 
03789           case PT_PC:
03790           for (i = min; i < max; i++)
03791             {
03792             int len = 1;
03793             if (eptr >= md->end_subject) break;
03794             GETCHARLEN(c, eptr, len);
03795             prop_chartype = UCD_CHARTYPE(c);
03796             if ((prop_chartype == prop_value) == prop_fail_result)
03797               break;
03798             eptr+= len;
03799             }
03800           break;
03801 
03802           case PT_SC:
03803           for (i = min; i < max; i++)
03804             {
03805             int len = 1;
03806             if (eptr >= md->end_subject) break;
03807             GETCHARLEN(c, eptr, len);
03808             prop_script = UCD_SCRIPT(c);
03809             if ((prop_script == prop_value) == prop_fail_result)
03810               break;
03811             eptr+= len;
03812             }
03813           break;
03814           }
03815 
03816         /* eptr is now past the end of the maximum run */
03817 
03818         if (possessive) continue;
03819         for(;;)
03820           {
03821           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
03822           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03823           if (eptr-- == pp) break;        /* Stop if tried at original pos */
03824           if (utf8) BACKCHAR(eptr);
03825           }
03826         }
03827 
03828       /* Match extended Unicode sequences. We will get here only if the
03829       support is in the binary; otherwise a compile-time error occurs. */
03830 
03831       else if (ctype == OP_EXTUNI)
03832         {
03833         for (i = min; i < max; i++)
03834           {
03835           if (eptr >= md->end_subject) break;
03836           GETCHARINCTEST(c, eptr);
03837           prop_category = UCD_CATEGORY(c);
03838           if (prop_category == ucp_M) break;
03839           while (eptr < md->end_subject)
03840             {
03841             int len = 1;
03842             if (!utf8) c = *eptr; else
03843               {
03844               GETCHARLEN(c, eptr, len);
03845               }
03846             prop_category = UCD_CATEGORY(c);
03847             if (prop_category != ucp_M) break;
03848             eptr += len;
03849             }
03850           }
03851 
03852         /* eptr is now past the end of the maximum run */
03853 
03854         if (possessive) continue;
03855         for(;;)
03856           {
03857           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
03858           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03859           if (eptr-- == pp) break;        /* Stop if tried at original pos */
03860           for (;;)                        /* Move back over one extended */
03861             {
03862             int len = 1;
03863             if (!utf8) c = *eptr; else
03864               {
03865               BACKCHAR(eptr);
03866               GETCHARLEN(c, eptr, len);
03867               }
03868             prop_category = UCD_CATEGORY(c);
03869             if (prop_category != ucp_M) break;
03870             eptr--;
03871             }
03872           }
03873         }
03874 
03875       else
03876 #endif   /* SUPPORT_UCP */
03877 
03878 #ifdef SUPPORT_UTF8
03879       /* UTF-8 mode */
03880 
03881       if (utf8)
03882         {
03883         switch(ctype)
03884           {
03885           case OP_ANY:
03886           if (max < INT_MAX)
03887             {
03888             for (i = min; i < max; i++)
03889               {
03890               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
03891               eptr++;
03892               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
03893               }
03894             }
03895 
03896           /* Handle unlimited UTF-8 repeat */
03897 
03898           else
03899             {
03900             for (i = min; i < max; i++)
03901               {
03902               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
03903               eptr++;
03904               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
03905               }
03906             }
03907           break;
03908 
03909           case OP_ALLANY:
03910           if (max < INT_MAX)
03911             {
03912             for (i = min; i < max; i++)
03913               {
03914               if (eptr >= md->end_subject) break;
03915               eptr++;
03916               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
03917               }
03918             }
03919           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
03920           break;
03921 
03922           /* The byte case is the same as non-UTF8 */
03923 
03924           case OP_ANYBYTE:
03925           c = max - min;
03926           if (c > (unsigned int)(md->end_subject - eptr))
03927             c = md->end_subject - eptr;
03928           eptr += c;
03929           break;
03930 
03931           case OP_ANYNL:
03932           for (i = min; i < max; i++)
03933             {
03934             int len = 1;
03935             if (eptr >= md->end_subject) break;
03936             GETCHARLEN(c, eptr, len);
03937             if (c == 0x000d)
03938               {
03939               if (++eptr >= md->end_subject) break;
03940               if (*eptr == 0x000a) eptr++;
03941               }
03942             else
03943               {
03944               if (c != 0x000a &&
03945                   (md->bsr_anycrlf ||
03946                    (c != 0x000b && c != 0x000c &&
03947                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
03948                 break;
03949               eptr += len;
03950               }
03951             }
03952           break;
03953 
03954           case OP_NOT_HSPACE:
03955           case OP_HSPACE:
03956           for (i = min; i < max; i++)
03957             {
03958             BOOL gotspace;
03959             int len = 1;
03960             if (eptr >= md->end_subject) break;
03961             GETCHARLEN(c, eptr, len);
03962             switch(c)
03963               {
03964               default: gotspace = FALSE; break;
03965               case 0x09:      /* HT */
03966               case 0x20:      /* SPACE */
03967               case 0xa0:      /* NBSP */
03968               case 0x1680:    /* OGHAM SPACE MARK */
03969               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
03970               case 0x2000:    /* EN QUAD */
03971               case 0x2001:    /* EM QUAD */
03972               case 0x2002:    /* EN SPACE */
03973               case 0x2003:    /* EM SPACE */
03974               case 0x2004:    /* THREE-PER-EM SPACE */
03975               case 0x2005:    /* FOUR-PER-EM SPACE */
03976               case 0x2006:    /* SIX-PER-EM SPACE */
03977               case 0x2007:    /* FIGURE SPACE */
03978               case 0x2008:    /* PUNCTUATION SPACE */
03979               case 0x2009:    /* THIN SPACE */
03980               case 0x200A:    /* HAIR SPACE */
03981               case 0x202f:    /* NARROW NO-BREAK SPACE */
03982               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
03983               case 0x3000:    /* IDEOGRAPHIC SPACE */
03984               gotspace = TRUE;
03985               break;
03986               }
03987             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
03988             eptr += len;
03989             }
03990           break;
03991 
03992           case OP_NOT_VSPACE:
03993           case OP_VSPACE:
03994           for (i = min; i < max; i++)
03995             {
03996             BOOL gotspace;
03997             int len = 1;
03998             if (eptr >= md->end_subject) break;
03999             GETCHARLEN(c, eptr, len);
04000             switch(c)
04001               {
04002               default: gotspace = FALSE; break;
04003               case 0x0a:      /* LF */
04004               case 0x0b:      /* VT */
04005               case 0x0c:      /* FF */
04006               case 0x0d:      /* CR */
04007               case 0x85:      /* NEL */
04008               case 0x2028:    /* LINE SEPARATOR */
04009               case 0x2029:    /* PARAGRAPH SEPARATOR */
04010               gotspace = TRUE;
04011               break;
04012               }
04013             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
04014             eptr += len;
04015             }
04016           break;
04017 
04018           case OP_NOT_DIGIT:
04019           for (i = min; i < max; i++)
04020             {
04021             int len = 1;
04022             if (eptr >= md->end_subject) break;
04023             GETCHARLEN(c, eptr, len);
04024             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
04025             eptr+= len;
04026             }
04027           break;
04028 
04029           case OP_DIGIT:
04030           for (i = min; i < max; i++)
04031             {
04032             int len = 1;
04033             if (eptr >= md->end_subject) break;
04034             GETCHARLEN(c, eptr, len);
04035             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
04036             eptr+= len;
04037             }
04038           break;
04039 
04040           case OP_NOT_WHITESPACE:
04041           for (i = min; i < max; i++)
04042             {
04043             int len = 1;
04044             if (eptr >= md->end_subject) break;
04045             GETCHARLEN(c, eptr, len);
04046             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
04047             eptr+= len;
04048             }
04049           break;
04050 
04051           case OP_WHITESPACE:
04052           for (i = min; i < max; i++)
04053             {
04054             int len = 1;
04055             if (eptr >= md->end_subject) break;
04056             GETCHARLEN(c, eptr, len);
04057             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
04058             eptr+= len;
04059             }
04060           break;
04061 
04062           case OP_NOT_WORDCHAR:
04063           for (i = min; i < max; i++)
04064             {
04065             int len = 1;
04066             if (eptr >= md->end_subject) break;
04067             GETCHARLEN(c, eptr, len);
04068             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
04069             eptr+= len;
04070             }
04071           break;
04072 
04073           case OP_WORDCHAR:
04074           for (i = min; i < max; i++)
04075             {
04076             int len = 1;
04077             if (eptr >= md->end_subject) break;
04078             GETCHARLEN(c, eptr, len);
04079             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
04080             eptr+= len;
04081             }
04082           break;
04083 
04084           default:
04085           RRETURN(PCRE_ERROR_INTERNAL);
04086           }
04087 
04088         /* eptr is now past the end of the maximum run */
04089 
04090         if (possessive) continue;
04091         for(;;)
04092           {
04093           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
04094           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04095           if (eptr-- == pp) break;        /* Stop if tried at original pos */
04096           BACKCHAR(eptr);
04097           }
04098         }
04099       else
04100 #endif  /* SUPPORT_UTF8 */
04101 
04102       /* Not UTF-8 mode */
04103         {
04104         switch(ctype)
04105           {
04106           case OP_ANY:
04107           for (i = min; i < max; i++)
04108             {
04109             if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
04110             eptr++;
04111             }
04112           break;
04113 
04114           case OP_ALLANY:
04115           case OP_ANYBYTE:
04116           c = max - min;
04117           if (c > (unsigned int)(md->end_subject - eptr))
04118             c = md->end_subject - eptr;
04119           eptr += c;
04120           break;
04121 
04122           case OP_ANYNL:
04123           for (i = min; i < max; i++)
04124             {
04125             if (eptr >= md->end_subject) break;
04126             c = *eptr;
04127             if (c == 0x000d)
04128               {
04129               if (++eptr >= md->end_subject) break;
04130               if (*eptr == 0x000a) eptr++;
04131               }
04132             else
04133               {
04134               if (c != 0x000a &&
04135                   (md->bsr_anycrlf ||
04136                     (c != 0x000b && c != 0x000c && c != 0x0085)))
04137                 break;
04138               eptr++;
04139               }
04140             }
04141           break;
04142 
04143           case OP_NOT_HSPACE:
04144           for (i = min; i < max; i++)
04145             {
04146             if (eptr >= md->end_subject) break;
04147             c = *eptr;
04148             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
04149             eptr++;
04150             }
04151           break;
04152 
04153           case OP_HSPACE:
04154           for (i = min; i < max; i++)
04155             {
04156             if (eptr >= md->end_subject) break;
04157             c = *eptr;
04158             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
04159             eptr++;
04160             }
04161           break;
04162 
04163           case OP_NOT_VSPACE:
04164           for (i = min; i < max; i++)
04165             {
04166             if (eptr >= md->end_subject) break;
04167             c = *eptr;
04168             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
04169               break;
04170             eptr++;
04171             }
04172           break;
04173 
04174           case OP_VSPACE:
04175           for (i = min; i < max; i++)
04176             {
04177             if (eptr >= md->end_subject) break;
04178             c = *eptr;
04179             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
04180               break;
04181             eptr++;
04182             }
04183           break;
04184 
04185           case OP_NOT_DIGIT:
04186           for (i = min; i < max; i++)
04187             {
04188             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
04189               break;
04190             eptr++;
04191             }
04192           break;
04193 
04194           case OP_DIGIT:
04195           for (i = min; i < max; i++)
04196             {
04197             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
04198               break;
04199             eptr++;
04200             }
04201           break;
04202 
04203           case OP_NOT_WHITESPACE:
04204           for (i = min; i < max; i++)
04205             {
04206             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
04207               break;
04208             eptr++;
04209             }
04210           break;
04211 
04212           case OP_WHITESPACE:
04213           for (i = min; i < max; i++)
04214             {
04215             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
04216               break;
04217             eptr++;
04218             }
04219           break;
04220 
04221           case OP_NOT_WORDCHAR:
04222           for (i = min; i < max; i++)
04223             {
04224             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
04225               break;
04226             eptr++;
04227             }
04228           break;
04229 
04230           case OP_WORDCHAR:
04231           for (i = min; i < max; i++)
04232             {
04233             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
04234               break;
04235             eptr++;
04236             }
04237           break;
04238 
04239           default:
04240           RRETURN(PCRE_ERROR_INTERNAL);
04241           }
04242 
04243         /* eptr is now past the end of the maximum run */
04244 
04245         if (possessive) continue;
04246         while (eptr >= pp)
04247           {
04248           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
04249           eptr--;
04250           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04251           }
04252         }
04253 
04254       /* Get here if we can't make it match with any permitted repetitions */
04255 
04256       RRETURN(MATCH_NOMATCH);
04257       }
04258     /* Control never gets here */
04259 
04260     /* There's been some horrible disaster. Arrival here can only mean there is
04261     something seriously wrong in the code above or the OP_xxx definitions. */
04262 
04263     default:
04264     DPRINTF(("Unknown opcode %d\n", *ecode));
04265     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
04266     }
04267 
04268   /* Do not stick any code in here without much thought; it is assumed
04269   that "continue" in the code above comes out to here to repeat the main
04270   loop. */
04271 
04272   }             /* End of main loop */
04273 /* Control never reaches here */
04274 
04275 
04276 /* When compiling to use the heap rather than the stack for recursive calls to
04277 match(), the RRETURN() macro jumps here. The number that is saved in
04278 frame->Xwhere indicates which label we actually want to return to. */
04279 
04280 #ifdef NO_RECURSE
04281 #define LBL(val) case val: goto L_RM##val;
04282 HEAP_RETURN:
04283 switch (frame->Xwhere)
04284   {
04285   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
04286   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
04287   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
04288   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
04289   LBL(53) LBL(54)
04290 #ifdef SUPPORT_UTF8
04291   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
04292   LBL(32) LBL(34) LBL(42) LBL(46)
04293 #ifdef SUPPORT_UCP
04294   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
04295 #endif  /* SUPPORT_UCP */
04296 #endif  /* SUPPORT_UTF8 */
04297   default:
04298   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
04299   return PCRE_ERROR_INTERNAL;
04300   }
04301 #undef LBL
04302 #endif  /* NO_RECURSE */
04303 }
04304 
04305 
04306 /***************************************************************************
04307 ****************************************************************************
04308                    RECURSION IN THE match() FUNCTION
04309 
04310 Undefine all the macros that were defined above to handle this. */
04311 
04312 #ifdef NO_RECURSE
04313 #undef eptr
04314 #undef ecode
04315 #undef mstart
04316 #undef offset_top
04317 #undef ims
04318 #undef eptrb
04319 #undef flags
04320 
04321 #undef callpat
04322 #undef charptr
04323 #undef data
04324 #undef next
04325 #undef pp
04326 #undef prev
04327 #undef saved_eptr
04328 
04329 #undef new_recursive
04330 
04331 #undef cur_is_word
04332 #undef condition
04333 #undef prev_is_word
04334 
04335 #undef original_ims
04336 
04337 #undef ctype
04338 #undef length
04339 #undef max
04340 #undef min
04341 #undef number
04342 #undef offset
04343 #undef op
04344 #undef save_capture_last
04345 #undef save_offset1
04346 #undef save_offset2
04347 #undef save_offset3
04348 #undef stacksave
04349 
04350 #undef newptrb
04351 
04352 #endif
04353 
04354 /* These two are defined as macros in both cases */
04355 
04356 #undef fc
04357 #undef fi
04358 
04359 /***************************************************************************
04360 ***************************************************************************/
04361 
04362 
04363 
04364 /*************************************************
04365 *         Execute a Regular Expression           *
04366 *************************************************/
04367 
04368 /* This function applies a compiled re to a subject string and picks out
04369 portions of the string if it matches. Two elements in the vector are set for
04370 each substring: the offsets to the start and end of the substring.
04371 
04372 Arguments:
04373   argument_re     points to the compiled expression
04374   extra_data      points to extra data or is NULL
04375   subject         points to the subject string
04376   length          length of subject string (may contain binary zeros)
04377   start_offset    where to start in the subject string
04378   options         option bits
04379   offsets         points to a vector of ints to be filled in with offsets
04380   offsetcount     the number of elements in the vector
04381 
04382 Returns:          > 0 => success; value is the number of elements filled in
04383                   = 0 => success, but offsets is not big enough
04384                    -1 => failed to match
04385                  < -1 => some kind of unexpected problem
04386 */
04387 
04388 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
04389 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
04390   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
04391   int offsetcount)
04392 {
04393 int rc, resetcount, ocount;
04394 int first_byte = -1;
04395 int req_byte = -1;
04396 int req_byte2 = -1;
04397 int newline;
04398 unsigned long int ims;
04399 BOOL using_temporary_offsets = FALSE;
04400 BOOL anchored;
04401 BOOL startline;
04402 BOOL firstline;
04403 BOOL first_byte_caseless = FALSE;
04404 BOOL req_byte_caseless = FALSE;
04405 BOOL utf8;
04406 match_data match_block;
04407 match_data *md = &match_block;
04408 const uschar *tables;
04409 const uschar *start_bits = NULL;
04410 USPTR start_match = (USPTR)subject + start_offset;
04411 USPTR end_subject;
04412 USPTR req_byte_ptr = start_match - 1;
04413 
04414 pcre_study_data internal_study;
04415 const pcre_study_data *study;
04416 
04417 real_pcre internal_re;
04418 const real_pcre *external_re = (const real_pcre *)argument_re;
04419 const real_pcre *re = external_re;
04420 
04421 /* Plausibility checks */
04422 
04423 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
04424 if (re == NULL || subject == NULL ||
04425    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
04426 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
04427 
04428 /* Fish out the optional data from the extra_data structure, first setting
04429 the default values. */
04430 
04431 study = NULL;
04432 md->match_limit = MATCH_LIMIT;
04433 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
04434 md->callout_data = NULL;
04435 
04436 /* The table pointer is always in native byte order. */
04437 
04438 tables = external_re->tables;
04439 
04440 if (extra_data != NULL)
04441   {
04442   register unsigned int flags = extra_data->flags;
04443   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
04444     study = (const pcre_study_data *)extra_data->study_data;
04445   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
04446     md->match_limit = extra_data->match_limit;
04447   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
04448     md->match_limit_recursion = extra_data->match_limit_recursion;
04449   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
04450     md->callout_data = extra_data->callout_data;
04451   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
04452   }
04453 
04454 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
04455 is a feature that makes it possible to save compiled regex and re-use them
04456 in other programs later. */
04457 
04458 if (tables == NULL) tables = _pcre_default_tables;
04459 
04460 /* Check that the first field in the block is the magic number. If it is not,
04461 test for a regex that was compiled on a host of opposite endianness. If this is
04462 the case, flipped values are put in internal_re and internal_study if there was
04463 study data too. */
04464 
04465 if (re->magic_number != MAGIC_NUMBER)
04466   {
04467   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
04468   if (re == NULL) return PCRE_ERROR_BADMAGIC;
04469   if (study != NULL) study = &internal_study;
04470   }
04471 
04472 /* Set up other data */
04473 
04474 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
04475 startline = (re->flags & PCRE_STARTLINE) != 0;
04476 firstline = (re->options & PCRE_FIRSTLINE) != 0;
04477 
04478 /* The code starts after the real_pcre block and the capture name table. */
04479 
04480 md->start_code = (const uschar *)external_re + re->name_table_offset +
04481   re->name_count * re->name_entry_size;
04482 
04483 md->start_subject = (USPTR)subject;
04484 md->start_offset = start_offset;
04485 md->end_subject = md->start_subject + length;
04486 end_subject = md->end_subject;
04487 
04488 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
04489 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
04490 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
04491 
04492 md->notbol = (options & PCRE_NOTBOL) != 0;
04493 md->noteol = (options & PCRE_NOTEOL) != 0;
04494 md->notempty = (options & PCRE_NOTEMPTY) != 0;
04495 md->partial = (options & PCRE_PARTIAL) != 0;
04496 md->hitend = FALSE;
04497 
04498 md->recursive = NULL;                   /* No recursion at top level */
04499 
04500 md->lcc = tables + lcc_offset;
04501 md->ctypes = tables + ctypes_offset;
04502 
04503 /* Handle different \R options. */
04504 
04505 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
04506   {
04507   case 0:
04508   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
04509     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
04510   else
04511 #ifdef BSR_ANYCRLF
04512   md->bsr_anycrlf = TRUE;
04513 #else
04514   md->bsr_anycrlf = FALSE;
04515 #endif
04516   break;
04517 
04518   case PCRE_BSR_ANYCRLF:
04519   md->bsr_anycrlf = TRUE;
04520   break;
04521 
04522   case PCRE_BSR_UNICODE:
04523   md->bsr_anycrlf = FALSE;
04524   break;
04525 
04526   default: return PCRE_ERROR_BADNEWLINE;
04527   }
04528 
04529 /* Handle different types of newline. The three bits give eight cases. If
04530 nothing is set at run time, whatever was used at compile time applies. */
04531 
04532 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
04533         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
04534   {
04535   case 0: newline = NEWLINE; break;   /* Compile-time default */
04536   case PCRE_NEWLINE_CR: newline = '\r'; break;
04537   case PCRE_NEWLINE_LF: newline = '\n'; break;
04538   case PCRE_NEWLINE_CR+
04539        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
04540   case PCRE_NEWLINE_ANY: newline = -1; break;
04541   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
04542   default: return PCRE_ERROR_BADNEWLINE;
04543   }
04544 
04545 if (newline == -2)
04546   {
04547   md->nltype = NLTYPE_ANYCRLF;
04548   }
04549 else if (newline < 0)
04550   {
04551   md->nltype = NLTYPE_ANY;
04552   }
04553 else
04554   {
04555   md->nltype = NLTYPE_FIXED;
04556   if (newline > 255)
04557     {
04558     md->nllen = 2;
04559     md->nl[0] = (newline >> 8) & 255;
04560     md->nl[1] = newline & 255;
04561     }
04562   else
04563     {
04564     md->nllen = 1;
04565     md->nl[0] = newline;
04566     }
04567   }
04568 
04569 /* Partial matching is supported only for a restricted set of regexes at the
04570 moment. */
04571 
04572 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
04573   return PCRE_ERROR_BADPARTIAL;
04574 
04575 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
04576 back the character offset. */
04577 
04578 #ifdef SUPPORT_UTF8
04579 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
04580   {
04581   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
04582     return PCRE_ERROR_BADUTF8;
04583   if (start_offset > 0 && start_offset < length)
04584     {
04585     int tb = ((uschar *)subject)[start_offset];
04586     if (tb > 127)
04587       {
04588       tb &= 0xc0;
04589       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
04590       }
04591     }
04592   }
04593 #endif
04594 
04595 /* The ims options can vary during the matching as a result of the presence
04596 of (?ims) items in the pattern. They are kept in a local variable so that
04597 restoring at the exit of a group is easy. */
04598 
04599 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
04600 
04601 /* If the expression has got more back references than the offsets supplied can
04602 hold, we get a temporary chunk of working store to use during the matching.
04603 Otherwise, we can use the vector supplied, rounding down its size to a multiple
04604 of 3. */
04605 
04606 ocount = offsetcount - (offsetcount % 3);
04607 
04608 if (re->top_backref > 0 && re->top_backref >= ocount/3)
04609   {
04610   ocount = re->top_backref * 3 + 3;
04611   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
04612   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
04613   using_temporary_offsets = TRUE;
04614   DPRINTF(("Got memory to hold back references\n"));
04615   }
04616 else md->offset_vector = offsets;
04617 
04618 md->offset_end = ocount;
04619 md->offset_max = (2*ocount)/3;
04620 md->offset_overflow = FALSE;
04621 md->capture_last = -1;
04622 
04623 /* Compute the minimum number of offsets that we need to reset each time. Doing
04624 this makes a huge difference to execution time when there aren't many brackets
04625 in the pattern. */
04626 
04627 resetcount = 2 + re->top_bracket * 2;
04628 if (resetcount > offsetcount) resetcount = ocount;
04629 
04630 /* Reset the working variable associated with each extraction. These should
04631 never be used unless previously set, but they get saved and restored, and so we
04632 initialize them to avoid reading uninitialized locations. */
04633 
04634 if (md->offset_vector != NULL)
04635   {
04636   register int *iptr = md->offset_vector + ocount;
04637   register int *iend = iptr - resetcount/2 + 1;
04638   while (--iptr >= iend) *iptr = -1;
04639   }
04640 
04641 /* Set up the first character to match, if available. The first_byte value is
04642 never set for an anchored regular expression, but the anchoring may be forced
04643 at run time, so we have to test for anchoring. The first char may be unset for
04644 an unanchored pattern, of course. If there's no first char and the pattern was
04645 studied, there may be a bitmap of possible first characters. */
04646 
04647 if (!anchored)
04648   {
04649   if ((re->flags & PCRE_FIRSTSET) != 0)
04650     {
04651     first_byte = re->first_byte & 255;
04652     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
04653       first_byte = md->lcc[first_byte];
04654     }
04655   else
04656     if (!startline && study != NULL &&
04657       (study->options & PCRE_STUDY_MAPPED) != 0)
04658         start_bits = study->start_bits;
04659   }
04660 
04661 /* For anchored or unanchored matches, there may be a "last known required
04662 character" set. */
04663 
04664 if ((re->flags & PCRE_REQCHSET) != 0)
04665   {
04666   req_byte = re->req_byte & 255;
04667   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
04668   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
04669   }
04670 
04671 
04672 /* ==========================================================================*/
04673 
04674 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
04675 the loop runs just once. */
04676 
04677 for(;;)
04678   {
04679   USPTR save_end_subject = end_subject;
04680   USPTR new_start_match;
04681 
04682   /* Reset the maximum number of extractions we might see. */
04683 
04684   if (md->offset_vector != NULL)
04685     {
04686     register int *iptr = md->offset_vector;
04687     register int *iend = iptr + resetcount;
04688     while (iptr < iend) *iptr++ = -1;
04689     }
04690 
04691   /* Advance to a unique first char if possible. If firstline is TRUE, the
04692   start of the match is constrained to the first line of a multiline string.
04693   That is, the match must be before or at the first newline. Implement this by
04694   temporarily adjusting end_subject so that we stop scanning at a newline. If
04695   the match fails at the newline, later code breaks this loop. */
04696 
04697   if (firstline)
04698     {
04699     USPTR t = start_match;
04700 #ifdef SUPPORT_UTF8
04701     if (utf8)
04702       {
04703       while (t < md->end_subject && !IS_NEWLINE(t))
04704         {
04705         t++;
04706         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
04707         }
04708       }
04709     else
04710 #endif
04711     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
04712     end_subject = t;
04713     }
04714 
04715   /* Now advance to a unique first byte if there is one. */
04716 
04717   if (first_byte >= 0)
04718     {
04719     if (first_byte_caseless)
04720       while (start_match < end_subject && md->lcc[*start_match] != first_byte)
04721         start_match++;
04722     else
04723       while (start_match < end_subject && *start_match != first_byte)
04724         start_match++;
04725     }
04726 
04727   /* Or to just after a linebreak for a multiline match */
04728 
04729   else if (startline)
04730     {
04731     if (start_match > md->start_subject + start_offset)
04732       {
04733 #ifdef SUPPORT_UTF8
04734       if (utf8)
04735         {
04736         while (start_match < end_subject && !WAS_NEWLINE(start_match))
04737           {
04738           start_match++;
04739           while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
04740             start_match++;
04741           }
04742         }
04743       else
04744 #endif
04745       while (start_match < end_subject && !WAS_NEWLINE(start_match))
04746         start_match++;
04747 
04748       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
04749       and we are now at a LF, advance the match position by one more character.
04750       */
04751 
04752       if (start_match[-1] == '\r' &&
04753            (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
04754            start_match < end_subject &&
04755            *start_match == '\n')
04756         start_match++;
04757       }
04758     }
04759 
04760   /* Or to a non-unique first byte after study */
04761 
04762   else if (start_bits != NULL)
04763     {
04764     while (start_match < end_subject)
04765       {
04766       register unsigned int c = *start_match;
04767       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
04768         else break;
04769       }
04770     }
04771 
04772   /* Restore fudged end_subject */
04773 
04774   end_subject = save_end_subject;
04775 
04776 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
04777   printf(">>>> Match against: ");
04778   pchars(start_match, end_subject - start_match, TRUE, md);
04779   printf("\n");
04780 #endif
04781 
04782   /* If req_byte is set, we know that that character must appear in the subject
04783   for the match to succeed. If the first character is set, req_byte must be
04784   later in the subject; otherwise the test starts at the match point. This
04785   optimization can save a huge amount of backtracking in patterns with nested
04786   unlimited repeats that aren't going to match. Writing separate code for
04787   cased/caseless versions makes it go faster, as does using an autoincrement
04788   and backing off on a match.
04789 
04790   HOWEVER: when the subject string is very, very long, searching to its end can
04791   take a long time, and give bad performance on quite ordinary patterns. This
04792   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
04793   string... so we don't do this when the string is sufficiently long.
04794 
04795   ALSO: this processing is disabled when partial matching is requested.
04796   */
04797 
04798   if (req_byte >= 0 &&
04799       end_subject - start_match < REQ_BYTE_MAX &&
04800       !md->partial)
04801     {
04802     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
04803 
04804     /* We don't need to repeat the search if we haven't yet reached the
04805     place we found it at last time. */
04806 
04807     if (p > req_byte_ptr)
04808       {
04809       if (req_byte_caseless)
04810         {
04811         while (p < end_subject)
04812           {
04813           register int pp = *p++;
04814           if (pp == req_byte || pp == req_byte2) { p--; break; }
04815           }
04816         }
04817       else
04818         {
04819         while (p < end_subject)
04820           {
04821           if (*p++ == req_byte) { p--; break; }
04822           }
04823         }
04824 
04825       /* If we can't find the required character, break the matching loop,
04826       forcing a match failure. */
04827 
04828       if (p >= end_subject)
04829         {
04830         rc = MATCH_NOMATCH;
04831         break;
04832         }
04833 
04834       /* If we have found the required character, save the point where we
04835       found it, so that we don't search again next time round the loop if
04836       the start hasn't passed this character yet. */
04837 
04838       req_byte_ptr = p;
04839       }
04840     }
04841 
04842   /* OK, we can now run the match. */
04843 
04844   md->start_match_ptr = start_match;
04845   md->match_call_count = 0;
04846   rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
04847 
04848   switch(rc)
04849     {
04850     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
04851     exactly like PRUNE. */
04852 
04853     case MATCH_NOMATCH:
04854     case MATCH_PRUNE:
04855     case MATCH_THEN:
04856     new_start_match = start_match + 1;
04857 #ifdef SUPPORT_UTF8
04858     if (utf8)
04859       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
04860         new_start_match++;
04861 #endif
04862     break;
04863 
04864     /* SKIP passes back the next starting point explicitly. */
04865 
04866     case MATCH_SKIP:
04867     new_start_match = md->start_match_ptr;
04868     break;
04869 
04870     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
04871 
04872     case MATCH_COMMIT:
04873     rc = MATCH_NOMATCH;
04874     goto ENDLOOP;
04875 
04876     /* Any other return is some kind of error. */
04877 
04878     default:
04879     goto ENDLOOP;
04880     }
04881 
04882   /* Control reaches here for the various types of "no match at this point"
04883   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
04884 
04885   rc = MATCH_NOMATCH;
04886 
04887   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
04888   newline in the subject (though it may continue over the newline). Therefore,
04889   if we have just failed to match, starting at a newline, do not continue. */
04890 
04891   if (firstline && IS_NEWLINE(start_match)) break;
04892 
04893   /* Advance to new matching position */
04894 
04895   start_match = new_start_match;
04896 
04897   /* Break the loop if the pattern is anchored or if we have passed the end of
04898   the subject. */
04899 
04900   if (anchored || start_match > end_subject) break;
04901 
04902   /* If we have just passed a CR and we are now at a LF, and the pattern does
04903   not contain any explicit matches for \r or \n, and the newline option is CRLF
04904   or ANY or ANYCRLF, advance the match position by one more character. */
04905 
04906   if (start_match[-1] == '\r' &&
04907       start_match < end_subject &&
04908       *start_match == '\n' &&
04909       (re->flags & PCRE_HASCRORLF) == 0 &&
04910         (md->nltype == NLTYPE_ANY ||
04911          md->nltype == NLTYPE_ANYCRLF ||
04912          md->nllen == 2))
04913     start_match++;
04914 
04915   }   /* End of for(;;) "bumpalong" loop */
04916 
04917 /* ==========================================================================*/
04918 
04919 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
04920 conditions is true:
04921 
04922 (1) The pattern is anchored or the match was failed by (*COMMIT);
04923 
04924 (2) We are past the end of the subject;
04925 
04926 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
04927     this option requests that a match occur at or before the first newline in
04928     the subject.
04929 
04930 When we have a match and the offset vector is big enough to deal with any
04931 backreferences, captured substring offsets will already be set up. In the case
04932 where we had to get some local store to hold offsets for backreference
04933 processing, copy those that we can. In this case there need not be overflow if
04934 certain parts of the pattern were not used, even though there are more
04935 capturing parentheses than vector slots. */
04936 
04937 ENDLOOP:
04938 
04939 if (rc == MATCH_MATCH)
04940   {
04941   if (using_temporary_offsets)
04942     {
04943     if (offsetcount >= 4)
04944       {
04945       memcpy(offsets + 2, md->offset_vector + 2,
04946         (offsetcount - 2) * sizeof(int));
04947       DPRINTF(("Copied offsets from temporary memory\n"));
04948       }
04949     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
04950     DPRINTF(("Freeing temporary memory\n"));
04951     (pcre_free)(md->offset_vector);
04952     }
04953 
04954   /* Set the return code to the number of captured strings, or 0 if there are
04955   too many to fit into the vector. */
04956 
04957   rc = md->offset_overflow? 0 : md->end_offset_top/2;
04958 
04959   /* If there is space, set up the whole thing as substring 0. The value of
04960   md->start_match_ptr might be modified if \K was encountered on the success
04961   matching path. */
04962 
04963   if (offsetcount < 2) rc = 0; else
04964     {
04965     offsets[0] = md->start_match_ptr - md->start_subject;
04966     offsets[1] = md->end_match_ptr - md->start_subject;
04967     }
04968 
04969   DPRINTF((">>>> returning %d\n", rc));
04970   return rc;
04971   }
04972 
04973 /* Control gets here if there has been an error, or if the overall match
04974 attempt has failed at all permitted starting positions. */
04975 
04976 if (using_temporary_offsets)
04977   {
04978   DPRINTF(("Freeing temporary memory\n"));
04979   (pcre_free)(md->offset_vector);
04980   }
04981 
04982 if (rc != MATCH_NOMATCH)
04983   {
04984   DPRINTF((">>>> error: returning %d\n", rc));
04985   return rc;
04986   }
04987 else if (md->partial && md->hitend)
04988   {
04989   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
04990   return PCRE_ERROR_PARTIAL;
04991   }
04992 else
04993   {
04994   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
04995   return PCRE_ERROR_NOMATCH;
04996   }
04997 }
04998 
04999 /* End of pcre_exec.c */

Generated on Tue Jul 5 14:11:57 2011 for ROOT_528-00b_version by  doxygen 1.5.1