00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #ifdef HAVE_CONFIG_H
00048 #include "config.h"
00049 #endif
00050
00051 #define NLBLOCK md
00052 #define PSSTART start_subject
00053 #define PSEND end_subject
00054
00055 #include "pcre_internal.h"
00056
00057
00058
00059
00060 #define SP " "
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073 #define OP_PROP_EXTRA 300
00074 #define OP_EXTUNI_EXTRA 320
00075 #define OP_ANYNL_EXTRA 340
00076 #define OP_HSPACE_EXTRA 360
00077 #define OP_VSPACE_EXTRA 380
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087 static const uschar coptable[] = {
00088 0,
00089 0, 0, 0, 0, 0,
00090 0, 0, 0, 0, 0, 0,
00091 0, 0, 0,
00092 0, 0, 0,
00093 0, 0, 0, 0, 0,
00094 0, 0, 0, 0, 0,
00095 1,
00096 1,
00097 1,
00098
00099 1, 1, 1, 1, 1, 1,
00100 3, 3, 3,
00101 1, 1, 1, 3,
00102
00103 1, 1, 1, 1, 1, 1,
00104 3, 3, 3,
00105 1, 1, 1, 3,
00106
00107 1, 1, 1, 1, 1, 1,
00108 3, 3, 3,
00109 1, 1, 1, 3,
00110
00111 0, 0, 0, 0, 0, 0,
00112 0, 0,
00113 0,
00114 0,
00115 0,
00116 0,
00117 0,
00118 0,
00119 0,
00120 0,
00121 0,
00122 0,
00123 0,
00124 0,
00125 0,
00126 0,
00127 0,
00128 0, 0, 0, 0,
00129 0, 0, 0,
00130 0,
00131 0,
00132 0,
00133 0, 0,
00134 0, 0, 0, 0,
00135 0, 0, 0
00136 };
00137
00138
00139
00140
00141 static const uschar toptable1[] = {
00142 0, 0, 0, 0, 0, 0,
00143 ctype_digit, ctype_digit,
00144 ctype_space, ctype_space,
00145 ctype_word, ctype_word,
00146 0, 0
00147 };
00148
00149 static const uschar toptable2[] = {
00150 0, 0, 0, 0, 0, 0,
00151 ctype_digit, 0,
00152 ctype_space, 0,
00153 ctype_word, 0,
00154 1, 1
00155 };
00156
00157
00158
00159
00160
00161
00162
00163 typedef struct stateblock {
00164 int offset;
00165 int count;
00166 int ims;
00167 int data;
00168 } stateblock;
00169
00170 #define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
00171
00172
00173 #ifdef DEBUG
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188 static void
00189 pchars(unsigned char *p, int length, FILE *f)
00190 {
00191 int c;
00192 while (length-- > 0)
00193 {
00194 if (isprint(c = *(p++)))
00195 fprintf(f, "%c", c);
00196 else
00197 fprintf(f, "\\x%02x", c);
00198 }
00199 }
00200 #endif
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234 #define ADD_ACTIVE(x,y) \
00235 if (active_count++ < wscount) \
00236 { \
00237 next_active_state->offset = (x); \
00238 next_active_state->count = (y); \
00239 next_active_state->ims = ims; \
00240 next_active_state++; \
00241 DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
00242 } \
00243 else return PCRE_ERROR_DFA_WSSIZE
00244
00245 #define ADD_ACTIVE_DATA(x,y,z) \
00246 if (active_count++ < wscount) \
00247 { \
00248 next_active_state->offset = (x); \
00249 next_active_state->count = (y); \
00250 next_active_state->ims = ims; \
00251 next_active_state->data = (z); \
00252 next_active_state++; \
00253 DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
00254 } \
00255 else return PCRE_ERROR_DFA_WSSIZE
00256
00257 #define ADD_NEW(x,y) \
00258 if (new_count++ < wscount) \
00259 { \
00260 next_new_state->offset = (x); \
00261 next_new_state->count = (y); \
00262 next_new_state->ims = ims; \
00263 next_new_state++; \
00264 DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
00265 } \
00266 else return PCRE_ERROR_DFA_WSSIZE
00267
00268 #define ADD_NEW_DATA(x,y,z) \
00269 if (new_count++ < wscount) \
00270 { \
00271 next_new_state->offset = (x); \
00272 next_new_state->count = (y); \
00273 next_new_state->ims = ims; \
00274 next_new_state->data = (z); \
00275 next_new_state++; \
00276 DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
00277 } \
00278 else return PCRE_ERROR_DFA_WSSIZE
00279
00280
00281
00282 static int
00283 internal_dfa_exec(
00284 dfa_match_data *md,
00285 const uschar *this_start_code,
00286 const uschar *current_subject,
00287 int start_offset,
00288 int *offsets,
00289 int offsetcount,
00290 int *workspace,
00291 int wscount,
00292 int ims,
00293 int rlevel,
00294 int recursing)
00295 {
00296 stateblock *active_states, *new_states, *temp_states;
00297 stateblock *next_active_state, *next_new_state;
00298
00299 const uschar *ctypes, *lcc, *fcc;
00300 const uschar *ptr;
00301 const uschar *end_code, *first_op;
00302
00303 int active_count, new_count, match_count;
00304
00305
00306
00307
00308 const uschar *start_subject = md->start_subject;
00309 const uschar *end_subject = md->end_subject;
00310 const uschar *start_code = md->start_code;
00311
00312 #ifdef SUPPORT_UTF8
00313 BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
00314 #else
00315 BOOL utf8 = FALSE;
00316 #endif
00317
00318 rlevel++;
00319 offsetcount &= (-2);
00320
00321 wscount -= 2;
00322 wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
00323 (2 * INTS_PER_STATEBLOCK);
00324
00325 DPRINTF(("\n%.*s---------------------\n"
00326 "%.*sCall to internal_dfa_exec f=%d r=%d\n",
00327 rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));
00328
00329 ctypes = md->tables + ctypes_offset;
00330 lcc = md->tables + lcc_offset;
00331 fcc = md->tables + fcc_offset;
00332
00333 match_count = PCRE_ERROR_NOMATCH;
00334
00335 active_states = (stateblock *)(workspace + 2);
00336 next_new_state = new_states = active_states + wscount;
00337 new_count = 0;
00338
00339 first_op = this_start_code + 1 + LINK_SIZE +
00340 ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351 if (*first_op == OP_REVERSE)
00352 {
00353 int max_back = 0;
00354 int gone_back;
00355
00356 end_code = this_start_code;
00357 do
00358 {
00359 int back = GET(end_code, 2+LINK_SIZE);
00360 if (back > max_back) max_back = back;
00361 end_code += GET(end_code, 1);
00362 }
00363 while (*end_code == OP_ALT);
00364
00365
00366
00367
00368 #ifdef SUPPORT_UTF8
00369
00370
00371 if (utf8)
00372 {
00373 for (gone_back = 0; gone_back < max_back; gone_back++)
00374 {
00375 if (current_subject <= start_subject) break;
00376 current_subject--;
00377 while (current_subject > start_subject &&
00378 (*current_subject & 0xc0) == 0x80)
00379 current_subject--;
00380 }
00381 }
00382 else
00383 #endif
00384
00385
00386
00387 {
00388 gone_back = (current_subject - max_back < start_subject)?
00389 current_subject - start_subject : max_back;
00390 current_subject -= gone_back;
00391 }
00392
00393
00394
00395 end_code = this_start_code;
00396 do
00397 {
00398 int back = GET(end_code, 2+LINK_SIZE);
00399 if (back <= gone_back)
00400 {
00401 int bstate = end_code - start_code + 2 + 2*LINK_SIZE;
00402 ADD_NEW_DATA(-bstate, 0, gone_back - back);
00403 }
00404 end_code += GET(end_code, 1);
00405 }
00406 while (*end_code == OP_ALT);
00407 }
00408
00409
00410
00411
00412
00413
00414
00415 else
00416 {
00417 end_code = this_start_code;
00418
00419
00420
00421 if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
00422 {
00423 do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
00424 new_count = workspace[1];
00425 if (!workspace[0])
00426 memcpy(new_states, active_states, new_count * sizeof(stateblock));
00427 }
00428
00429
00430
00431 else
00432 {
00433 int length = 1 + LINK_SIZE +
00434 ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
00435 do
00436 {
00437 ADD_NEW(end_code - start_code + length, 0);
00438 end_code += GET(end_code, 1);
00439 length = 1 + LINK_SIZE;
00440 }
00441 while (*end_code == OP_ALT);
00442 }
00443 }
00444
00445 workspace[0] = 0;
00446
00447 DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));
00448
00449
00450
00451 ptr = current_subject;
00452 for (;;)
00453 {
00454 int i, j;
00455 int clen, dlen;
00456 unsigned int c, d;
00457
00458
00459
00460
00461 temp_states = active_states;
00462 active_states = new_states;
00463 new_states = temp_states;
00464 active_count = new_count;
00465 new_count = 0;
00466
00467 workspace[0] ^= 1;
00468 workspace[1] = active_count;
00469
00470 #ifdef DEBUG
00471 printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
00472 pchars((uschar *)ptr, strlen((char *)ptr), stdout);
00473 printf("\"\n");
00474
00475 printf("%.*sActive states: ", rlevel*2-2, SP);
00476 for (i = 0; i < active_count; i++)
00477 printf("%d/%d ", active_states[i].offset, active_states[i].count);
00478 printf("\n");
00479 #endif
00480
00481
00482
00483 next_active_state = active_states + active_count;
00484 next_new_state = new_states;
00485
00486
00487
00488
00489
00490 if (ptr < end_subject)
00491 {
00492 clen = 1;
00493 #ifdef SUPPORT_UTF8
00494 if (utf8) { GETCHARLEN(c, ptr, clen); } else
00495 #endif
00496 c = *ptr;
00497 }
00498 else
00499 {
00500 clen = 0;
00501 c = NOTACHAR;
00502 }
00503
00504
00505
00506
00507
00508
00509 for (i = 0; i < active_count; i++)
00510 {
00511 stateblock *current_state = active_states + i;
00512 const uschar *code;
00513 int state_offset = current_state->offset;
00514 int count, codevalue;
00515
00516 #ifdef DEBUG
00517 printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
00518 if (clen == 0) printf("EOL\n");
00519 else if (c > 32 && c < 127) printf("'%c'\n", c);
00520 else printf("0x%02x\n", c);
00521 #endif
00522
00523
00524
00525 ims = current_state->ims;
00526
00527
00528
00529
00530
00531 if (state_offset < 0)
00532 {
00533 if (current_state->data > 0)
00534 {
00535 DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
00536 ADD_NEW_DATA(state_offset, current_state->count,
00537 current_state->data - 1);
00538 continue;
00539 }
00540 else
00541 {
00542 current_state->offset = state_offset = -state_offset;
00543 }
00544 }
00545
00546
00547
00548 for (j = 0; j < i; j++)
00549 {
00550 if (active_states[j].offset == state_offset &&
00551 active_states[j].count == current_state->count)
00552 {
00553 DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
00554 goto NEXT_ACTIVE_STATE;
00555 }
00556 }
00557
00558
00559
00560 code = start_code + state_offset;
00561 codevalue = *code;
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574 if (coptable[codevalue] > 0)
00575 {
00576 dlen = 1;
00577 #ifdef SUPPORT_UTF8
00578 if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
00579 #endif
00580 d = code[coptable[codevalue]];
00581 if (codevalue >= OP_TYPESTAR)
00582 {
00583 switch(d)
00584 {
00585 case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
00586 case OP_NOTPROP:
00587 case OP_PROP: codevalue += OP_PROP_EXTRA; break;
00588 case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
00589 case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
00590 case OP_NOT_HSPACE:
00591 case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
00592 case OP_NOT_VSPACE:
00593 case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
00594 default: break;
00595 }
00596 }
00597 }
00598 else
00599 {
00600 dlen = 0;
00601 d = NOTACHAR;
00602 }
00603
00604
00605
00606
00607 switch (codevalue)
00608 {
00609
00610
00611
00612
00613
00614
00615
00616 case OP_KET:
00617 case OP_KETRMIN:
00618 case OP_KETRMAX:
00619 if (code != end_code)
00620 {
00621 ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
00622 if (codevalue != OP_KET)
00623 {
00624 ADD_ACTIVE(state_offset - GET(code, 1), 0);
00625 }
00626 }
00627 else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)
00628 {
00629 if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
00630 else if (match_count > 0 && ++match_count * 2 >= offsetcount)
00631 match_count = 0;
00632 count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
00633 if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
00634 if (offsetcount >= 2)
00635 {
00636 offsets[0] = current_subject - start_subject;
00637 offsets[1] = ptr - start_subject;
00638 DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
00639 offsets[1] - offsets[0], current_subject));
00640 }
00641 if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
00642 {
00643 DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
00644 "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
00645 match_count, rlevel*2-2, SP));
00646 return match_count;
00647 }
00648 }
00649 break;
00650
00651
00652
00653
00654
00655
00656 case OP_ALT:
00657 do { code += GET(code, 1); } while (*code == OP_ALT);
00658 ADD_ACTIVE(code - start_code, 0);
00659 break;
00660
00661
00662 case OP_BRA:
00663 case OP_SBRA:
00664 do
00665 {
00666 ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
00667 code += GET(code, 1);
00668 }
00669 while (*code == OP_ALT);
00670 break;
00671
00672
00673 case OP_CBRA:
00674 case OP_SCBRA:
00675 ADD_ACTIVE(code - start_code + 3 + LINK_SIZE, 0);
00676 code += GET(code, 1);
00677 while (*code == OP_ALT)
00678 {
00679 ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
00680 code += GET(code, 1);
00681 }
00682 break;
00683
00684
00685 case OP_BRAZERO:
00686 case OP_BRAMINZERO:
00687 ADD_ACTIVE(state_offset + 1, 0);
00688 code += 1 + GET(code, 2);
00689 while (*code == OP_ALT) code += GET(code, 1);
00690 ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
00691 break;
00692
00693
00694 case OP_SKIPZERO:
00695 code += 1 + GET(code, 2);
00696 while (*code == OP_ALT) code += GET(code, 1);
00697 ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
00698 break;
00699
00700
00701 case OP_CIRC:
00702 if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
00703 ((ims & PCRE_MULTILINE) != 0 &&
00704 ptr != end_subject &&
00705 WAS_NEWLINE(ptr)))
00706 { ADD_ACTIVE(state_offset + 1, 0); }
00707 break;
00708
00709
00710 case OP_EOD:
00711 if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }
00712 break;
00713
00714
00715 case OP_OPT:
00716 ims = code[1];
00717 ADD_ACTIVE(state_offset + 2, 0);
00718 break;
00719
00720
00721 case OP_SOD:
00722 if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
00723 break;
00724
00725
00726 case OP_SOM:
00727 if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
00728 break;
00729
00730
00731
00732
00733
00734
00735
00736
00737
00738 case OP_ANY:
00739 if (clen > 0 && !IS_NEWLINE(ptr))
00740 { ADD_NEW(state_offset + 1, 0); }
00741 break;
00742
00743
00744 case OP_ALLANY:
00745 if (clen > 0)
00746 { ADD_NEW(state_offset + 1, 0); }
00747 break;
00748
00749
00750 case OP_EODN:
00751 if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
00752 { ADD_ACTIVE(state_offset + 1, 0); }
00753 break;
00754
00755
00756 case OP_DOLL:
00757 if ((md->moptions & PCRE_NOTEOL) == 0)
00758 {
00759 if (clen == 0 ||
00760 (IS_NEWLINE(ptr) &&
00761 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
00762 ))
00763 { ADD_ACTIVE(state_offset + 1, 0); }
00764 }
00765 else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
00766 { ADD_ACTIVE(state_offset + 1, 0); }
00767 break;
00768
00769
00770
00771 case OP_DIGIT:
00772 case OP_WHITESPACE:
00773 case OP_WORDCHAR:
00774 if (clen > 0 && c < 256 &&
00775 ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
00776 { ADD_NEW(state_offset + 1, 0); }
00777 break;
00778
00779
00780 case OP_NOT_DIGIT:
00781 case OP_NOT_WHITESPACE:
00782 case OP_NOT_WORDCHAR:
00783 if (clen > 0 && (c >= 256 ||
00784 ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
00785 { ADD_NEW(state_offset + 1, 0); }
00786 break;
00787
00788
00789 case OP_WORD_BOUNDARY:
00790 case OP_NOT_WORD_BOUNDARY:
00791 {
00792 int left_word, right_word;
00793
00794 if (ptr > start_subject)
00795 {
00796 const uschar *temp = ptr - 1;
00797 #ifdef SUPPORT_UTF8
00798 if (utf8) BACKCHAR(temp);
00799 #endif
00800 GETCHARTEST(d, temp);
00801 left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
00802 }
00803 else left_word = 0;
00804
00805 if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
00806 else right_word = 0;
00807
00808 if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
00809 { ADD_ACTIVE(state_offset + 1, 0); }
00810 }
00811 break;
00812
00813
00814
00815
00816
00817
00818
00819 #ifdef SUPPORT_UCP
00820 case OP_PROP:
00821 case OP_NOTPROP:
00822 if (clen > 0)
00823 {
00824 BOOL OK;
00825 const ucd_record * prop = GET_UCD(c);
00826 switch(code[1])
00827 {
00828 case PT_ANY:
00829 OK = TRUE;
00830 break;
00831
00832 case PT_LAMP:
00833 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
00834 break;
00835
00836 case PT_GC:
00837 OK = _pcre_ucp_gentype[prop->chartype] == code[2];
00838 break;
00839
00840 case PT_PC:
00841 OK = prop->chartype == code[2];
00842 break;
00843
00844 case PT_SC:
00845 OK = prop->script == code[2];
00846 break;
00847
00848
00849
00850 default:
00851 OK = codevalue != OP_PROP;
00852 break;
00853 }
00854
00855 if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
00856 }
00857 break;
00858 #endif
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868 case OP_TYPEPLUS:
00869 case OP_TYPEMINPLUS:
00870 case OP_TYPEPOSPLUS:
00871 count = current_state->count;
00872 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
00873 if (clen > 0)
00874 {
00875 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
00876 (c < 256 &&
00877 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
00878 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
00879 {
00880 if (count > 0 && codevalue == OP_TYPEPOSPLUS)
00881 {
00882 active_count--;
00883 next_active_state--;
00884 }
00885 count++;
00886 ADD_NEW(state_offset, count);
00887 }
00888 }
00889 break;
00890
00891
00892 case OP_TYPEQUERY:
00893 case OP_TYPEMINQUERY:
00894 case OP_TYPEPOSQUERY:
00895 ADD_ACTIVE(state_offset + 2, 0);
00896 if (clen > 0)
00897 {
00898 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
00899 (c < 256 &&
00900 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
00901 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
00902 {
00903 if (codevalue == OP_TYPEPOSQUERY)
00904 {
00905 active_count--;
00906 next_active_state--;
00907 }
00908 ADD_NEW(state_offset + 2, 0);
00909 }
00910 }
00911 break;
00912
00913
00914 case OP_TYPESTAR:
00915 case OP_TYPEMINSTAR:
00916 case OP_TYPEPOSSTAR:
00917 ADD_ACTIVE(state_offset + 2, 0);
00918 if (clen > 0)
00919 {
00920 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
00921 (c < 256 &&
00922 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
00923 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
00924 {
00925 if (codevalue == OP_TYPEPOSSTAR)
00926 {
00927 active_count--;
00928 next_active_state--;
00929 }
00930 ADD_NEW(state_offset, 0);
00931 }
00932 }
00933 break;
00934
00935
00936 case OP_TYPEEXACT:
00937 count = current_state->count;
00938 if (clen > 0)
00939 {
00940 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
00941 (c < 256 &&
00942 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
00943 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
00944 {
00945 if (++count >= GET2(code, 1))
00946 { ADD_NEW(state_offset + 4, 0); }
00947 else
00948 { ADD_NEW(state_offset, count); }
00949 }
00950 }
00951 break;
00952
00953
00954 case OP_TYPEUPTO:
00955 case OP_TYPEMINUPTO:
00956 case OP_TYPEPOSUPTO:
00957 ADD_ACTIVE(state_offset + 4, 0);
00958 count = current_state->count;
00959 if (clen > 0)
00960 {
00961 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
00962 (c < 256 &&
00963 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
00964 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
00965 {
00966 if (codevalue == OP_TYPEPOSUPTO)
00967 {
00968 active_count--;
00969 next_active_state--;
00970 }
00971 if (++count >= GET2(code, 1))
00972 { ADD_NEW(state_offset + 4, 0); }
00973 else
00974 { ADD_NEW(state_offset, count); }
00975 }
00976 }
00977 break;
00978
00979
00980
00981
00982
00983
00984
00985 #ifdef SUPPORT_UCP
00986 case OP_PROP_EXTRA + OP_TYPEPLUS:
00987 case OP_PROP_EXTRA + OP_TYPEMINPLUS:
00988 case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
00989 count = current_state->count;
00990 if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
00991 if (clen > 0)
00992 {
00993 BOOL OK;
00994 const ucd_record * prop = GET_UCD(c);
00995 switch(code[2])
00996 {
00997 case PT_ANY:
00998 OK = TRUE;
00999 break;
01000
01001 case PT_LAMP:
01002 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
01003 break;
01004
01005 case PT_GC:
01006 OK = _pcre_ucp_gentype[prop->chartype] == code[3];
01007 break;
01008
01009 case PT_PC:
01010 OK = prop->chartype == code[3];
01011 break;
01012
01013 case PT_SC:
01014 OK = prop->script == code[3];
01015 break;
01016
01017
01018
01019 default:
01020 OK = codevalue != OP_PROP;
01021 break;
01022 }
01023
01024 if (OK == (d == OP_PROP))
01025 {
01026 if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
01027 {
01028 active_count--;
01029 next_active_state--;
01030 }
01031 count++;
01032 ADD_NEW(state_offset, count);
01033 }
01034 }
01035 break;
01036
01037
01038 case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
01039 case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
01040 case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
01041 count = current_state->count;
01042 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
01043 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
01044 {
01045 const uschar *nptr = ptr + clen;
01046 int ncount = 0;
01047 if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
01048 {
01049 active_count--;
01050 next_active_state--;
01051 }
01052 while (nptr < end_subject)
01053 {
01054 int nd;
01055 int ndlen = 1;
01056 GETCHARLEN(nd, nptr, ndlen);
01057 if (UCD_CATEGORY(nd) != ucp_M) break;
01058 ncount++;
01059 nptr += ndlen;
01060 }
01061 count++;
01062 ADD_NEW_DATA(-state_offset, count, ncount);
01063 }
01064 break;
01065 #endif
01066
01067
01068 case OP_ANYNL_EXTRA + OP_TYPEPLUS:
01069 case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
01070 case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
01071 count = current_state->count;
01072 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
01073 if (clen > 0)
01074 {
01075 int ncount = 0;
01076 switch (c)
01077 {
01078 case 0x000b:
01079 case 0x000c:
01080 case 0x0085:
01081 case 0x2028:
01082 case 0x2029:
01083 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
01084 goto ANYNL01;
01085
01086 case 0x000d:
01087 if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
01088
01089
01090 ANYNL01:
01091 case 0x000a:
01092 if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
01093 {
01094 active_count--;
01095 next_active_state--;
01096 }
01097 count++;
01098 ADD_NEW_DATA(-state_offset, count, ncount);
01099 break;
01100
01101 default:
01102 break;
01103 }
01104 }
01105 break;
01106
01107
01108 case OP_VSPACE_EXTRA + OP_TYPEPLUS:
01109 case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
01110 case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
01111 count = current_state->count;
01112 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
01113 if (clen > 0)
01114 {
01115 BOOL OK;
01116 switch (c)
01117 {
01118 case 0x000a:
01119 case 0x000b:
01120 case 0x000c:
01121 case 0x000d:
01122 case 0x0085:
01123 case 0x2028:
01124 case 0x2029:
01125 OK = TRUE;
01126 break;
01127
01128 default:
01129 OK = FALSE;
01130 break;
01131 }
01132
01133 if (OK == (d == OP_VSPACE))
01134 {
01135 if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
01136 {
01137 active_count--;
01138 next_active_state--;
01139 }
01140 count++;
01141 ADD_NEW_DATA(-state_offset, count, 0);
01142 }
01143 }
01144 break;
01145
01146
01147 case OP_HSPACE_EXTRA + OP_TYPEPLUS:
01148 case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
01149 case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
01150 count = current_state->count;
01151 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
01152 if (clen > 0)
01153 {
01154 BOOL OK;
01155 switch (c)
01156 {
01157 case 0x09:
01158 case 0x20:
01159 case 0xa0:
01160 case 0x1680:
01161 case 0x180e:
01162 case 0x2000:
01163 case 0x2001:
01164 case 0x2002:
01165 case 0x2003:
01166 case 0x2004:
01167 case 0x2005:
01168 case 0x2006:
01169 case 0x2007:
01170 case 0x2008:
01171 case 0x2009:
01172 case 0x200A:
01173 case 0x202f:
01174 case 0x205f:
01175 case 0x3000:
01176 OK = TRUE;
01177 break;
01178
01179 default:
01180 OK = FALSE;
01181 break;
01182 }
01183
01184 if (OK == (d == OP_HSPACE))
01185 {
01186 if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
01187 {
01188 active_count--;
01189 next_active_state--;
01190 }
01191 count++;
01192 ADD_NEW_DATA(-state_offset, count, 0);
01193 }
01194 }
01195 break;
01196
01197
01198 #ifdef SUPPORT_UCP
01199 case OP_PROP_EXTRA + OP_TYPEQUERY:
01200 case OP_PROP_EXTRA + OP_TYPEMINQUERY:
01201 case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
01202 count = 4;
01203 goto QS1;
01204
01205 case OP_PROP_EXTRA + OP_TYPESTAR:
01206 case OP_PROP_EXTRA + OP_TYPEMINSTAR:
01207 case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
01208 count = 0;
01209
01210 QS1:
01211
01212 ADD_ACTIVE(state_offset + 4, 0);
01213 if (clen > 0)
01214 {
01215 BOOL OK;
01216 const ucd_record * prop = GET_UCD(c);
01217 switch(code[2])
01218 {
01219 case PT_ANY:
01220 OK = TRUE;
01221 break;
01222
01223 case PT_LAMP:
01224 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
01225 break;
01226
01227 case PT_GC:
01228 OK = _pcre_ucp_gentype[prop->chartype] == code[3];
01229 break;
01230
01231 case PT_PC:
01232 OK = prop->chartype == code[3];
01233 break;
01234
01235 case PT_SC:
01236 OK = prop->script == code[3];
01237 break;
01238
01239
01240
01241 default:
01242 OK = codevalue != OP_PROP;
01243 break;
01244 }
01245
01246 if (OK == (d == OP_PROP))
01247 {
01248 if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
01249 codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
01250 {
01251 active_count--;
01252 next_active_state--;
01253 }
01254 ADD_NEW(state_offset + count, 0);
01255 }
01256 }
01257 break;
01258
01259
01260 case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
01261 case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
01262 case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
01263 count = 2;
01264 goto QS2;
01265
01266 case OP_EXTUNI_EXTRA + OP_TYPESTAR:
01267 case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
01268 case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
01269 count = 0;
01270
01271 QS2:
01272
01273 ADD_ACTIVE(state_offset + 2, 0);
01274 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
01275 {
01276 const uschar *nptr = ptr + clen;
01277 int ncount = 0;
01278 if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
01279 codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
01280 {
01281 active_count--;
01282 next_active_state--;
01283 }
01284 while (nptr < end_subject)
01285 {
01286 int nd;
01287 int ndlen = 1;
01288 GETCHARLEN(nd, nptr, ndlen);
01289 if (UCD_CATEGORY(nd) != ucp_M) break;
01290 ncount++;
01291 nptr += ndlen;
01292 }
01293 ADD_NEW_DATA(-(state_offset + count), 0, ncount);
01294 }
01295 break;
01296 #endif
01297
01298
01299 case OP_ANYNL_EXTRA + OP_TYPEQUERY:
01300 case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
01301 case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
01302 count = 2;
01303 goto QS3;
01304
01305 case OP_ANYNL_EXTRA + OP_TYPESTAR:
01306 case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
01307 case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
01308 count = 0;
01309
01310 QS3:
01311 ADD_ACTIVE(state_offset + 2, 0);
01312 if (clen > 0)
01313 {
01314 int ncount = 0;
01315 switch (c)
01316 {
01317 case 0x000b:
01318 case 0x000c:
01319 case 0x0085:
01320 case 0x2028:
01321 case 0x2029:
01322 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
01323 goto ANYNL02;
01324
01325 case 0x000d:
01326 if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
01327
01328
01329 ANYNL02:
01330 case 0x000a:
01331 if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
01332 codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
01333 {
01334 active_count--;
01335 next_active_state--;
01336 }
01337 ADD_NEW_DATA(-(state_offset + count), 0, ncount);
01338 break;
01339
01340 default:
01341 break;
01342 }
01343 }
01344 break;
01345
01346
01347 case OP_VSPACE_EXTRA + OP_TYPEQUERY:
01348 case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
01349 case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
01350 count = 2;
01351 goto QS4;
01352
01353 case OP_VSPACE_EXTRA + OP_TYPESTAR:
01354 case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
01355 case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
01356 count = 0;
01357
01358 QS4:
01359 ADD_ACTIVE(state_offset + 2, 0);
01360 if (clen > 0)
01361 {
01362 BOOL OK;
01363 switch (c)
01364 {
01365 case 0x000a:
01366 case 0x000b:
01367 case 0x000c:
01368 case 0x000d:
01369 case 0x0085:
01370 case 0x2028:
01371 case 0x2029:
01372 OK = TRUE;
01373 break;
01374
01375 default:
01376 OK = FALSE;
01377 break;
01378 }
01379 if (OK == (d == OP_VSPACE))
01380 {
01381 if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
01382 codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
01383 {
01384 active_count--;
01385 next_active_state--;
01386 }
01387 ADD_NEW_DATA(-(state_offset + count), 0, 0);
01388 }
01389 }
01390 break;
01391
01392
01393 case OP_HSPACE_EXTRA + OP_TYPEQUERY:
01394 case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
01395 case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
01396 count = 2;
01397 goto QS5;
01398
01399 case OP_HSPACE_EXTRA + OP_TYPESTAR:
01400 case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
01401 case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
01402 count = 0;
01403
01404 QS5:
01405 ADD_ACTIVE(state_offset + 2, 0);
01406 if (clen > 0)
01407 {
01408 BOOL OK;
01409 switch (c)
01410 {
01411 case 0x09:
01412 case 0x20:
01413 case 0xa0:
01414 case 0x1680:
01415 case 0x180e:
01416 case 0x2000:
01417 case 0x2001:
01418 case 0x2002:
01419 case 0x2003:
01420 case 0x2004:
01421 case 0x2005:
01422 case 0x2006:
01423 case 0x2007:
01424 case 0x2008:
01425 case 0x2009:
01426 case 0x200A:
01427 case 0x202f:
01428 case 0x205f:
01429 case 0x3000:
01430 OK = TRUE;
01431 break;
01432
01433 default:
01434 OK = FALSE;
01435 break;
01436 }
01437
01438 if (OK == (d == OP_HSPACE))
01439 {
01440 if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
01441 codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
01442 {
01443 active_count--;
01444 next_active_state--;
01445 }
01446 ADD_NEW_DATA(-(state_offset + count), 0, 0);
01447 }
01448 }
01449 break;
01450
01451
01452 #ifdef SUPPORT_UCP
01453 case OP_PROP_EXTRA + OP_TYPEEXACT:
01454 case OP_PROP_EXTRA + OP_TYPEUPTO:
01455 case OP_PROP_EXTRA + OP_TYPEMINUPTO:
01456 case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
01457 if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
01458 { ADD_ACTIVE(state_offset + 6, 0); }
01459 count = current_state->count;
01460 if (clen > 0)
01461 {
01462 BOOL OK;
01463 const ucd_record * prop = GET_UCD(c);
01464 switch(code[4])
01465 {
01466 case PT_ANY:
01467 OK = TRUE;
01468 break;
01469
01470 case PT_LAMP:
01471 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
01472 break;
01473
01474 case PT_GC:
01475 OK = _pcre_ucp_gentype[prop->chartype] == code[5];
01476 break;
01477
01478 case PT_PC:
01479 OK = prop->chartype == code[5];
01480 break;
01481
01482 case PT_SC:
01483 OK = prop->script == code[5];
01484 break;
01485
01486
01487
01488 default:
01489 OK = codevalue != OP_PROP;
01490 break;
01491 }
01492
01493 if (OK == (d == OP_PROP))
01494 {
01495 if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
01496 {
01497 active_count--;
01498 next_active_state--;
01499 }
01500 if (++count >= GET2(code, 1))
01501 { ADD_NEW(state_offset + 6, 0); }
01502 else
01503 { ADD_NEW(state_offset, count); }
01504 }
01505 }
01506 break;
01507
01508
01509 case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
01510 case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
01511 case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
01512 case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
01513 if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
01514 { ADD_ACTIVE(state_offset + 4, 0); }
01515 count = current_state->count;
01516 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
01517 {
01518 const uschar *nptr = ptr + clen;
01519 int ncount = 0;
01520 if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
01521 {
01522 active_count--;
01523 next_active_state--;
01524 }
01525 while (nptr < end_subject)
01526 {
01527 int nd;
01528 int ndlen = 1;
01529 GETCHARLEN(nd, nptr, ndlen);
01530 if (UCD_CATEGORY(nd) != ucp_M) break;
01531 ncount++;
01532 nptr += ndlen;
01533 }
01534 if (++count >= GET2(code, 1))
01535 { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
01536 else
01537 { ADD_NEW_DATA(-state_offset, count, ncount); }
01538 }
01539 break;
01540 #endif
01541
01542
01543 case OP_ANYNL_EXTRA + OP_TYPEEXACT:
01544 case OP_ANYNL_EXTRA + OP_TYPEUPTO:
01545 case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
01546 case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
01547 if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
01548 { ADD_ACTIVE(state_offset + 4, 0); }
01549 count = current_state->count;
01550 if (clen > 0)
01551 {
01552 int ncount = 0;
01553 switch (c)
01554 {
01555 case 0x000b:
01556 case 0x000c:
01557 case 0x0085:
01558 case 0x2028:
01559 case 0x2029:
01560 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
01561 goto ANYNL03;
01562
01563 case 0x000d:
01564 if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
01565
01566
01567 ANYNL03:
01568 case 0x000a:
01569 if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
01570 {
01571 active_count--;
01572 next_active_state--;
01573 }
01574 if (++count >= GET2(code, 1))
01575 { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
01576 else
01577 { ADD_NEW_DATA(-state_offset, count, ncount); }
01578 break;
01579
01580 default:
01581 break;
01582 }
01583 }
01584 break;
01585
01586
01587 case OP_VSPACE_EXTRA + OP_TYPEEXACT:
01588 case OP_VSPACE_EXTRA + OP_TYPEUPTO:
01589 case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
01590 case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
01591 if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
01592 { ADD_ACTIVE(state_offset + 4, 0); }
01593 count = current_state->count;
01594 if (clen > 0)
01595 {
01596 BOOL OK;
01597 switch (c)
01598 {
01599 case 0x000a:
01600 case 0x000b:
01601 case 0x000c:
01602 case 0x000d:
01603 case 0x0085:
01604 case 0x2028:
01605 case 0x2029:
01606 OK = TRUE;
01607 break;
01608
01609 default:
01610 OK = FALSE;
01611 }
01612
01613 if (OK == (d == OP_VSPACE))
01614 {
01615 if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
01616 {
01617 active_count--;
01618 next_active_state--;
01619 }
01620 if (++count >= GET2(code, 1))
01621 { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
01622 else
01623 { ADD_NEW_DATA(-state_offset, count, 0); }
01624 }
01625 }
01626 break;
01627
01628
01629 case OP_HSPACE_EXTRA + OP_TYPEEXACT:
01630 case OP_HSPACE_EXTRA + OP_TYPEUPTO:
01631 case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
01632 case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
01633 if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
01634 { ADD_ACTIVE(state_offset + 4, 0); }
01635 count = current_state->count;
01636 if (clen > 0)
01637 {
01638 BOOL OK;
01639 switch (c)
01640 {
01641 case 0x09:
01642 case 0x20:
01643 case 0xa0:
01644 case 0x1680:
01645 case 0x180e:
01646 case 0x2000:
01647 case 0x2001:
01648 case 0x2002:
01649 case 0x2003:
01650 case 0x2004:
01651 case 0x2005:
01652 case 0x2006:
01653 case 0x2007:
01654 case 0x2008:
01655 case 0x2009:
01656 case 0x200A:
01657 case 0x202f:
01658 case 0x205f:
01659 case 0x3000:
01660 OK = TRUE;
01661 break;
01662
01663 default:
01664 OK = FALSE;
01665 break;
01666 }
01667
01668 if (OK == (d == OP_HSPACE))
01669 {
01670 if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
01671 {
01672 active_count--;
01673 next_active_state--;
01674 }
01675 if (++count >= GET2(code, 1))
01676 { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
01677 else
01678 { ADD_NEW_DATA(-state_offset, count, 0); }
01679 }
01680 }
01681 break;
01682
01683
01684
01685
01686
01687
01688
01689
01690 case OP_CHAR:
01691 if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
01692 break;
01693
01694
01695 case OP_CHARNC:
01696 if (clen == 0) break;
01697
01698 #ifdef SUPPORT_UTF8
01699 if (utf8)
01700 {
01701 if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
01702 {
01703 unsigned int othercase;
01704 if (c < 128) othercase = fcc[c]; else
01705
01706
01707
01708
01709 #ifdef SUPPORT_UCP
01710 othercase = UCD_OTHERCASE(c);
01711 #else
01712 othercase = NOTACHAR;
01713 #endif
01714
01715 if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
01716 }
01717 }
01718 else
01719 #endif
01720
01721
01722 {
01723 if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
01724 }
01725 break;
01726
01727
01728 #ifdef SUPPORT_UCP
01729
01730
01731
01732
01733
01734 case OP_EXTUNI:
01735 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
01736 {
01737 const uschar *nptr = ptr + clen;
01738 int ncount = 0;
01739 while (nptr < end_subject)
01740 {
01741 int nclen = 1;
01742 GETCHARLEN(c, nptr, nclen);
01743 if (UCD_CATEGORY(c) != ucp_M) break;
01744 ncount++;
01745 nptr += nclen;
01746 }
01747 ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
01748 }
01749 break;
01750 #endif
01751
01752
01753
01754
01755
01756
01757 case OP_ANYNL:
01758 if (clen > 0) switch(c)
01759 {
01760 case 0x000b:
01761 case 0x000c:
01762 case 0x0085:
01763 case 0x2028:
01764 case 0x2029:
01765 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
01766
01767 case 0x000a:
01768 ADD_NEW(state_offset + 1, 0);
01769 break;
01770
01771 case 0x000d:
01772 if (ptr + 1 < end_subject && ptr[1] == 0x0a)
01773 {
01774 ADD_NEW_DATA(-(state_offset + 1), 0, 1);
01775 }
01776 else
01777 {
01778 ADD_NEW(state_offset + 1, 0);
01779 }
01780 break;
01781 }
01782 break;
01783
01784
01785 case OP_NOT_VSPACE:
01786 if (clen > 0) switch(c)
01787 {
01788 case 0x000a:
01789 case 0x000b:
01790 case 0x000c:
01791 case 0x000d:
01792 case 0x0085:
01793 case 0x2028:
01794 case 0x2029:
01795 break;
01796
01797 default:
01798 ADD_NEW(state_offset + 1, 0);
01799 break;
01800 }
01801 break;
01802
01803
01804 case OP_VSPACE:
01805 if (clen > 0) switch(c)
01806 {
01807 case 0x000a:
01808 case 0x000b:
01809 case 0x000c:
01810 case 0x000d:
01811 case 0x0085:
01812 case 0x2028:
01813 case 0x2029:
01814 ADD_NEW(state_offset + 1, 0);
01815 break;
01816
01817 default: break;
01818 }
01819 break;
01820
01821
01822 case OP_NOT_HSPACE:
01823 if (clen > 0) switch(c)
01824 {
01825 case 0x09:
01826 case 0x20:
01827 case 0xa0:
01828 case 0x1680:
01829 case 0x180e:
01830 case 0x2000:
01831 case 0x2001:
01832 case 0x2002:
01833 case 0x2003:
01834 case 0x2004:
01835 case 0x2005:
01836 case 0x2006:
01837 case 0x2007:
01838 case 0x2008:
01839 case 0x2009:
01840 case 0x200A:
01841 case 0x202f:
01842 case 0x205f:
01843 case 0x3000:
01844 break;
01845
01846 default:
01847 ADD_NEW(state_offset + 1, 0);
01848 break;
01849 }
01850 break;
01851
01852
01853 case OP_HSPACE:
01854 if (clen > 0) switch(c)
01855 {
01856 case 0x09:
01857 case 0x20:
01858 case 0xa0:
01859 case 0x1680:
01860 case 0x180e:
01861 case 0x2000:
01862 case 0x2001:
01863 case 0x2002:
01864 case 0x2003:
01865 case 0x2004:
01866 case 0x2005:
01867 case 0x2006:
01868 case 0x2007:
01869 case 0x2008:
01870 case 0x2009:
01871 case 0x200A:
01872 case 0x202f:
01873 case 0x205f:
01874 case 0x3000:
01875 ADD_NEW(state_offset + 1, 0);
01876 break;
01877 }
01878 break;
01879
01880
01881
01882
01883
01884
01885 case OP_NOT:
01886 if (clen > 0)
01887 {
01888 unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
01889 if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
01890 }
01891 break;
01892
01893
01894 case OP_PLUS:
01895 case OP_MINPLUS:
01896 case OP_POSPLUS:
01897 case OP_NOTPLUS:
01898 case OP_NOTMINPLUS:
01899 case OP_NOTPOSPLUS:
01900 count = current_state->count;
01901 if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
01902 if (clen > 0)
01903 {
01904 unsigned int otherd = NOTACHAR;
01905 if ((ims & PCRE_CASELESS) != 0)
01906 {
01907 #ifdef SUPPORT_UTF8
01908 if (utf8 && d >= 128)
01909 {
01910 #ifdef SUPPORT_UCP
01911 otherd = UCD_OTHERCASE(d);
01912 #endif
01913 }
01914 else
01915 #endif
01916 otherd = fcc[d];
01917 }
01918 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
01919 {
01920 if (count > 0 &&
01921 (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
01922 {
01923 active_count--;
01924 next_active_state--;
01925 }
01926 count++;
01927 ADD_NEW(state_offset, count);
01928 }
01929 }
01930 break;
01931
01932
01933 case OP_QUERY:
01934 case OP_MINQUERY:
01935 case OP_POSQUERY:
01936 case OP_NOTQUERY:
01937 case OP_NOTMINQUERY:
01938 case OP_NOTPOSQUERY:
01939 ADD_ACTIVE(state_offset + dlen + 1, 0);
01940 if (clen > 0)
01941 {
01942 unsigned int otherd = NOTACHAR;
01943 if ((ims & PCRE_CASELESS) != 0)
01944 {
01945 #ifdef SUPPORT_UTF8
01946 if (utf8 && d >= 128)
01947 {
01948 #ifdef SUPPORT_UCP
01949 otherd = UCD_OTHERCASE(d);
01950 #endif
01951 }
01952 else
01953 #endif
01954 otherd = fcc[d];
01955 }
01956 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
01957 {
01958 if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
01959 {
01960 active_count--;
01961 next_active_state--;
01962 }
01963 ADD_NEW(state_offset + dlen + 1, 0);
01964 }
01965 }
01966 break;
01967
01968
01969 case OP_STAR:
01970 case OP_MINSTAR:
01971 case OP_POSSTAR:
01972 case OP_NOTSTAR:
01973 case OP_NOTMINSTAR:
01974 case OP_NOTPOSSTAR:
01975 ADD_ACTIVE(state_offset + dlen + 1, 0);
01976 if (clen > 0)
01977 {
01978 unsigned int otherd = NOTACHAR;
01979 if ((ims & PCRE_CASELESS) != 0)
01980 {
01981 #ifdef SUPPORT_UTF8
01982 if (utf8 && d >= 128)
01983 {
01984 #ifdef SUPPORT_UCP
01985 otherd = UCD_OTHERCASE(d);
01986 #endif
01987 }
01988 else
01989 #endif
01990 otherd = fcc[d];
01991 }
01992 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
01993 {
01994 if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
01995 {
01996 active_count--;
01997 next_active_state--;
01998 }
01999 ADD_NEW(state_offset, 0);
02000 }
02001 }
02002 break;
02003
02004
02005 case OP_EXACT:
02006 case OP_NOTEXACT:
02007 count = current_state->count;
02008 if (clen > 0)
02009 {
02010 unsigned int otherd = NOTACHAR;
02011 if ((ims & PCRE_CASELESS) != 0)
02012 {
02013 #ifdef SUPPORT_UTF8
02014 if (utf8 && d >= 128)
02015 {
02016 #ifdef SUPPORT_UCP
02017 otherd = UCD_OTHERCASE(d);
02018 #endif
02019 }
02020 else
02021 #endif
02022 otherd = fcc[d];
02023 }
02024 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
02025 {
02026 if (++count >= GET2(code, 1))
02027 { ADD_NEW(state_offset + dlen + 3, 0); }
02028 else
02029 { ADD_NEW(state_offset, count); }
02030 }
02031 }
02032 break;
02033
02034
02035 case OP_UPTO:
02036 case OP_MINUPTO:
02037 case OP_POSUPTO:
02038 case OP_NOTUPTO:
02039 case OP_NOTMINUPTO:
02040 case OP_NOTPOSUPTO:
02041 ADD_ACTIVE(state_offset + dlen + 3, 0);
02042 count = current_state->count;
02043 if (clen > 0)
02044 {
02045 unsigned int otherd = NOTACHAR;
02046 if ((ims & PCRE_CASELESS) != 0)
02047 {
02048 #ifdef SUPPORT_UTF8
02049 if (utf8 && d >= 128)
02050 {
02051 #ifdef SUPPORT_UCP
02052 otherd = UCD_OTHERCASE(d);
02053 #endif
02054 }
02055 else
02056 #endif
02057 otherd = fcc[d];
02058 }
02059 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
02060 {
02061 if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
02062 {
02063 active_count--;
02064 next_active_state--;
02065 }
02066 if (++count >= GET2(code, 1))
02067 { ADD_NEW(state_offset + dlen + 3, 0); }
02068 else
02069 { ADD_NEW(state_offset, count); }
02070 }
02071 }
02072 break;
02073
02074
02075
02076
02077
02078 case OP_CLASS:
02079 case OP_NCLASS:
02080 case OP_XCLASS:
02081 {
02082 BOOL isinclass = FALSE;
02083 int next_state_offset;
02084 const uschar *ecode;
02085
02086
02087
02088
02089 if (codevalue != OP_XCLASS)
02090 {
02091 ecode = code + 33;
02092 if (clen > 0)
02093 {
02094 isinclass = (c > 255)? (codevalue == OP_NCLASS) :
02095 ((code[1 + c/8] & (1 << (c&7))) != 0);
02096 }
02097 }
02098
02099
02100
02101
02102
02103 else
02104 {
02105 ecode = code + GET(code, 1);
02106 if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);
02107 }
02108
02109
02110
02111
02112
02113 next_state_offset = ecode - start_code;
02114
02115 switch (*ecode)
02116 {
02117 case OP_CRSTAR:
02118 case OP_CRMINSTAR:
02119 ADD_ACTIVE(next_state_offset + 1, 0);
02120 if (isinclass) { ADD_NEW(state_offset, 0); }
02121 break;
02122
02123 case OP_CRPLUS:
02124 case OP_CRMINPLUS:
02125 count = current_state->count;
02126 if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
02127 if (isinclass) { count++; ADD_NEW(state_offset, count); }
02128 break;
02129
02130 case OP_CRQUERY:
02131 case OP_CRMINQUERY:
02132 ADD_ACTIVE(next_state_offset + 1, 0);
02133 if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
02134 break;
02135
02136 case OP_CRRANGE:
02137 case OP_CRMINRANGE:
02138 count = current_state->count;
02139 if (count >= GET2(ecode, 1))
02140 { ADD_ACTIVE(next_state_offset + 5, 0); }
02141 if (isinclass)
02142 {
02143 int max = GET2(ecode, 3);
02144 if (++count >= max && max != 0)
02145 { ADD_NEW(next_state_offset + 5, 0); }
02146 else
02147 { ADD_NEW(state_offset, count); }
02148 }
02149 break;
02150
02151 default:
02152 if (isinclass) { ADD_NEW(next_state_offset, 0); }
02153 break;
02154 }
02155 }
02156 break;
02157
02158
02159
02160
02161
02162
02163
02164 case OP_FAIL:
02165 break;
02166
02167 case OP_ASSERT:
02168 case OP_ASSERT_NOT:
02169 case OP_ASSERTBACK:
02170 case OP_ASSERTBACK_NOT:
02171 {
02172 int rc;
02173 int local_offsets[2];
02174 int local_workspace[1000];
02175 const uschar *endasscode = code + GET(code, 1);
02176
02177 while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
02178
02179 rc = internal_dfa_exec(
02180 md,
02181 code,
02182 ptr,
02183 ptr - start_subject,
02184 local_offsets,
02185 sizeof(local_offsets)/sizeof(int),
02186 local_workspace,
02187 sizeof(local_workspace)/sizeof(int),
02188 ims,
02189 rlevel,
02190 recursing);
02191
02192 if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
02193 { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
02194 }
02195 break;
02196
02197
02198 case OP_COND:
02199 case OP_SCOND:
02200 {
02201 int local_offsets[1000];
02202 int local_workspace[1000];
02203 int condcode = code[LINK_SIZE+1];
02204
02205
02206
02207 if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
02208
02209
02210
02211 if (condcode == OP_DEF)
02212 {
02213 ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
02214 }
02215
02216
02217
02218
02219
02220 else if (condcode == OP_RREF)
02221 {
02222 int value = GET2(code, LINK_SIZE+2);
02223 if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
02224 if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
02225 else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
02226 }
02227
02228
02229
02230 else
02231 {
02232 int rc;
02233 const uschar *asscode = code + LINK_SIZE + 1;
02234 const uschar *endasscode = asscode + GET(asscode, 1);
02235
02236 while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
02237
02238 rc = internal_dfa_exec(
02239 md,
02240 asscode,
02241 ptr,
02242 ptr - start_subject,
02243 local_offsets,
02244 sizeof(local_offsets)/sizeof(int),
02245 local_workspace,
02246 sizeof(local_workspace)/sizeof(int),
02247 ims,
02248 rlevel,
02249 recursing);
02250
02251 if ((rc >= 0) ==
02252 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
02253 { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
02254 else
02255 { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
02256 }
02257 }
02258 break;
02259
02260
02261 case OP_RECURSE:
02262 {
02263 int local_offsets[1000];
02264 int local_workspace[1000];
02265 int rc;
02266
02267 DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,
02268 recursing + 1));
02269
02270 rc = internal_dfa_exec(
02271 md,
02272 start_code + GET(code, 1),
02273 ptr,
02274 ptr - start_subject,
02275 local_offsets,
02276 sizeof(local_offsets)/sizeof(int),
02277 local_workspace,
02278 sizeof(local_workspace)/sizeof(int),
02279 ims,
02280 rlevel,
02281 recursing + 1);
02282
02283 DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,
02284 recursing + 1, rc));
02285
02286
02287
02288 if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
02289
02290
02291
02292
02293
02294 if (rc > 0)
02295 {
02296 for (rc = rc*2 - 2; rc >= 0; rc -= 2)
02297 {
02298 const uschar *p = start_subject + local_offsets[rc];
02299 const uschar *pp = start_subject + local_offsets[rc+1];
02300 int charcount = local_offsets[rc+1] - local_offsets[rc];
02301 while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
02302 if (charcount > 0)
02303 {
02304 ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
02305 }
02306 else
02307 {
02308 ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
02309 }
02310 }
02311 }
02312 else if (rc != PCRE_ERROR_NOMATCH) return rc;
02313 }
02314 break;
02315
02316
02317 case OP_ONCE:
02318 {
02319 int local_offsets[2];
02320 int local_workspace[1000];
02321
02322 int rc = internal_dfa_exec(
02323 md,
02324 code,
02325 ptr,
02326 ptr - start_subject,
02327 local_offsets,
02328 sizeof(local_offsets)/sizeof(int),
02329 local_workspace,
02330 sizeof(local_workspace)/sizeof(int),
02331 ims,
02332 rlevel,
02333 recursing);
02334
02335 if (rc >= 0)
02336 {
02337 const uschar *end_subpattern = code;
02338 int charcount = local_offsets[1] - local_offsets[0];
02339 int next_state_offset, repeat_state_offset;
02340
02341 do { end_subpattern += GET(end_subpattern, 1); }
02342 while (*end_subpattern == OP_ALT);
02343 next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;
02344
02345
02346
02347
02348
02349 repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
02350 *end_subpattern == OP_KETRMIN)?
02351 end_subpattern - start_code - GET(end_subpattern, 1) : -1;
02352
02353
02354
02355
02356
02357
02358 if (charcount == 0)
02359 {
02360 ADD_ACTIVE(next_state_offset, 0);
02361 }
02362
02363
02364
02365
02366
02367
02368 else if (i + 1 >= active_count && new_count == 0)
02369 {
02370 ptr += charcount;
02371 clen = 0;
02372 ADD_NEW(next_state_offset, 0);
02373
02374
02375
02376
02377
02378
02379 if (repeat_state_offset >= 0)
02380 {
02381 next_active_state = active_states;
02382 active_count = 0;
02383 i = -1;
02384 ADD_ACTIVE(repeat_state_offset, 0);
02385 }
02386 }
02387 else
02388 {
02389 const uschar *p = start_subject + local_offsets[0];
02390 const uschar *pp = start_subject + local_offsets[1];
02391 while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
02392 ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
02393 if (repeat_state_offset >= 0)
02394 { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
02395 }
02396
02397 }
02398 else if (rc != PCRE_ERROR_NOMATCH) return rc;
02399 }
02400 break;
02401
02402
02403
02404
02405
02406 case OP_CALLOUT:
02407 if (pcre_callout != NULL)
02408 {
02409 int rrc;
02410 pcre_callout_block cb;
02411 cb.version = 1;
02412 cb.callout_number = code[1];
02413 cb.offset_vector = offsets;
02414 cb.subject = (PCRE_SPTR)start_subject;
02415 cb.subject_length = end_subject - start_subject;
02416 cb.start_match = current_subject - start_subject;
02417 cb.current_position = ptr - start_subject;
02418 cb.pattern_position = GET(code, 2);
02419 cb.next_item_length = GET(code, 2 + LINK_SIZE);
02420 cb.capture_top = 1;
02421 cb.capture_last = -1;
02422 cb.callout_data = md->callout_data;
02423 if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;
02424 if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }
02425 }
02426 break;
02427
02428
02429
02430 default:
02431 return PCRE_ERROR_DFA_UITEM;
02432 }
02433
02434 NEXT_ACTIVE_STATE: continue;
02435
02436 }
02437
02438
02439
02440
02441
02442
02443 if (new_count <= 0)
02444 {
02445 if (match_count < 0 &&
02446 rlevel == 1 &&
02447 (md->moptions & PCRE_PARTIAL) != 0 &&
02448 ptr >= end_subject &&
02449 ptr > current_subject)
02450 {
02451 if (offsetcount >= 2)
02452 {
02453 offsets[0] = current_subject - start_subject;
02454 offsets[1] = end_subject - start_subject;
02455 }
02456 match_count = PCRE_ERROR_PARTIAL;
02457 }
02458
02459 DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
02460 "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
02461 rlevel*2-2, SP));
02462 break;
02463 }
02464
02465
02466
02467 ptr += clen;
02468 }
02469
02470
02471
02472
02473
02474
02475
02476 return match_count;
02477 }
02478
02479
02480
02481
02482
02483
02484
02485
02486
02487
02488
02489
02490
02491
02492
02493
02494
02495
02496
02497
02498
02499
02500
02501
02502
02503
02504
02505
02506
02507
02508 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
02509 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
02510 const char *subject, int length, int start_offset, int options, int *offsets,
02511 int offsetcount, int *workspace, int wscount)
02512 {
02513 real_pcre *re = (real_pcre *)argument_re;
02514 dfa_match_data match_block;
02515 dfa_match_data *md = &match_block;
02516 BOOL utf8, anchored, startline, firstline;
02517 const uschar *current_subject, *end_subject, *lcc;
02518
02519 pcre_study_data internal_study;
02520 const pcre_study_data *study = NULL;
02521 real_pcre internal_re;
02522
02523 const uschar *req_byte_ptr;
02524 const uschar *start_bits = NULL;
02525 BOOL first_byte_caseless = FALSE;
02526 BOOL req_byte_caseless = FALSE;
02527 int first_byte = -1;
02528 int req_byte = -1;
02529 int req_byte2 = -1;
02530 int newline;
02531
02532
02533
02534 if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
02535 if (re == NULL || subject == NULL || workspace == NULL ||
02536 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
02537 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
02538 if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
02539
02540
02541
02542
02543
02544
02545 md->tables = re->tables;
02546 md->callout_data = NULL;
02547
02548 if (extra_data != NULL)
02549 {
02550 unsigned int flags = extra_data->flags;
02551 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
02552 study = (const pcre_study_data *)extra_data->study_data;
02553 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
02554 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
02555 return PCRE_ERROR_DFA_UMLIMIT;
02556 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
02557 md->callout_data = extra_data->callout_data;
02558 if ((flags & PCRE_EXTRA_TABLES) != 0)
02559 md->tables = extra_data->tables;
02560 }
02561
02562
02563
02564
02565
02566
02567 if (re->magic_number != MAGIC_NUMBER)
02568 {
02569 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
02570 if (re == NULL) return PCRE_ERROR_BADMAGIC;
02571 if (study != NULL) study = &internal_study;
02572 }
02573
02574
02575
02576 current_subject = (const unsigned char *)subject + start_offset;
02577 end_subject = (const unsigned char *)subject + length;
02578 req_byte_ptr = current_subject - 1;
02579
02580 #ifdef SUPPORT_UTF8
02581 utf8 = (re->options & PCRE_UTF8) != 0;
02582 #else
02583 utf8 = FALSE;
02584 #endif
02585
02586 anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
02587 (re->options & PCRE_ANCHORED) != 0;
02588
02589
02590
02591 md->start_code = (const uschar *)argument_re +
02592 re->name_table_offset + re->name_count * re->name_entry_size;
02593 md->start_subject = (const unsigned char *)subject;
02594 md->end_subject = end_subject;
02595 md->moptions = options;
02596 md->poptions = re->options;
02597
02598
02599
02600
02601 if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
02602 {
02603 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
02604 md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
02605 #ifdef BSR_ANYCRLF
02606 else md->moptions |= PCRE_BSR_ANYCRLF;
02607 #endif
02608 }
02609
02610
02611
02612
02613 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
02614 PCRE_NEWLINE_BITS)
02615 {
02616 case 0: newline = NEWLINE; break;
02617 case PCRE_NEWLINE_CR: newline = '\r'; break;
02618 case PCRE_NEWLINE_LF: newline = '\n'; break;
02619 case PCRE_NEWLINE_CR+
02620 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
02621 case PCRE_NEWLINE_ANY: newline = -1; break;
02622 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
02623 default: return PCRE_ERROR_BADNEWLINE;
02624 }
02625
02626 if (newline == -2)
02627 {
02628 md->nltype = NLTYPE_ANYCRLF;
02629 }
02630 else if (newline < 0)
02631 {
02632 md->nltype = NLTYPE_ANY;
02633 }
02634 else
02635 {
02636 md->nltype = NLTYPE_FIXED;
02637 if (newline > 255)
02638 {
02639 md->nllen = 2;
02640 md->nl[0] = (newline >> 8) & 255;
02641 md->nl[1] = newline & 255;
02642 }
02643 else
02644 {
02645 md->nllen = 1;
02646 md->nl[0] = newline;
02647 }
02648 }
02649
02650
02651
02652
02653 #ifdef SUPPORT_UTF8
02654 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
02655 {
02656 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
02657 return PCRE_ERROR_BADUTF8;
02658 if (start_offset > 0 && start_offset < length)
02659 {
02660 int tb = ((uschar *)subject)[start_offset];
02661 if (tb > 127)
02662 {
02663 tb &= 0xc0;
02664 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
02665 }
02666 }
02667 }
02668 #endif
02669
02670
02671
02672
02673
02674 if (md->tables == NULL) md->tables = _pcre_default_tables;
02675
02676
02677
02678
02679 lcc = md->tables + lcc_offset;
02680 startline = (re->flags & PCRE_STARTLINE) != 0;
02681 firstline = (re->options & PCRE_FIRSTLINE) != 0;
02682
02683
02684
02685
02686
02687
02688
02689 if (!anchored)
02690 {
02691 if ((re->flags & PCRE_FIRSTSET) != 0)
02692 {
02693 first_byte = re->first_byte & 255;
02694 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
02695 first_byte = lcc[first_byte];
02696 }
02697 else
02698 {
02699 if (startline && study != NULL &&
02700 (study->options & PCRE_STUDY_MAPPED) != 0)
02701 start_bits = study->start_bits;
02702 }
02703 }
02704
02705
02706
02707
02708 if ((re->flags & PCRE_REQCHSET) != 0)
02709 {
02710 req_byte = re->req_byte & 255;
02711 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
02712 req_byte2 = (md->tables + fcc_offset)[req_byte];
02713 }
02714
02715
02716
02717
02718
02719
02720 for (;;)
02721 {
02722 int rc;
02723
02724 if ((options & PCRE_DFA_RESTART) == 0)
02725 {
02726 const uschar *save_end_subject = end_subject;
02727
02728
02729
02730
02731
02732
02733
02734 if (firstline)
02735 {
02736 USPTR t = current_subject;
02737 #ifdef SUPPORT_UTF8
02738 if (utf8)
02739 {
02740 while (t < md->end_subject && !IS_NEWLINE(t))
02741 {
02742 t++;
02743 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
02744 }
02745 }
02746 else
02747 #endif
02748 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
02749 end_subject = t;
02750 }
02751
02752 if (first_byte >= 0)
02753 {
02754 if (first_byte_caseless)
02755 while (current_subject < end_subject &&
02756 lcc[*current_subject] != first_byte)
02757 current_subject++;
02758 else
02759 while (current_subject < end_subject && *current_subject != first_byte)
02760 current_subject++;
02761 }
02762
02763
02764
02765 else if (startline)
02766 {
02767 if (current_subject > md->start_subject + start_offset)
02768 {
02769 #ifdef SUPPORT_UTF8
02770 if (utf8)
02771 {
02772 while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
02773 {
02774 current_subject++;
02775 while(current_subject < end_subject &&
02776 (*current_subject & 0xc0) == 0x80)
02777 current_subject++;
02778 }
02779 }
02780 else
02781 #endif
02782 while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
02783 current_subject++;
02784
02785
02786
02787
02788
02789 if (current_subject[-1] == '\r' &&
02790 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
02791 current_subject < end_subject &&
02792 *current_subject == '\n')
02793 current_subject++;
02794 }
02795 }
02796
02797
02798
02799 else if (start_bits != NULL)
02800 {
02801 while (current_subject < end_subject)
02802 {
02803 register unsigned int c = *current_subject;
02804 if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
02805 else break;
02806 }
02807 }
02808
02809
02810
02811 end_subject = save_end_subject;
02812 }
02813
02814
02815
02816
02817
02818
02819
02820
02821
02822
02823
02824
02825
02826
02827
02828
02829
02830 if (req_byte >= 0 &&
02831 end_subject - current_subject < REQ_BYTE_MAX &&
02832 (options & PCRE_PARTIAL) == 0)
02833 {
02834 register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
02835
02836
02837
02838
02839 if (p > req_byte_ptr)
02840 {
02841 if (req_byte_caseless)
02842 {
02843 while (p < end_subject)
02844 {
02845 register int pp = *p++;
02846 if (pp == req_byte || pp == req_byte2) { p--; break; }
02847 }
02848 }
02849 else
02850 {
02851 while (p < end_subject)
02852 {
02853 if (*p++ == req_byte) { p--; break; }
02854 }
02855 }
02856
02857
02858
02859
02860 if (p >= end_subject) break;
02861
02862
02863
02864
02865
02866 req_byte_ptr = p;
02867 }
02868 }
02869
02870
02871
02872 rc = internal_dfa_exec(
02873 md,
02874 md->start_code,
02875 current_subject,
02876 start_offset,
02877 offsets,
02878 offsetcount,
02879 workspace,
02880 wscount,
02881 re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL),
02882 0,
02883 0);
02884
02885
02886
02887
02888 if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
02889
02890
02891
02892
02893 if (firstline && IS_NEWLINE(current_subject)) break;
02894 current_subject++;
02895 if (utf8)
02896 {
02897 while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
02898 current_subject++;
02899 }
02900 if (current_subject > end_subject) break;
02901
02902
02903
02904
02905
02906 if (current_subject[-1] == '\r' &&
02907 current_subject < end_subject &&
02908 *current_subject == '\n' &&
02909 (re->flags & PCRE_HASCRORLF) == 0 &&
02910 (md->nltype == NLTYPE_ANY ||
02911 md->nltype == NLTYPE_ANYCRLF ||
02912 md->nllen == 2))
02913 current_subject++;
02914
02915 }
02916
02917 return PCRE_ERROR_NOMATCH;
02918 }
02919
02920