00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045 #ifdef HAVE_CONFIG_H
00046 #include "config.h"
00047 #endif
00048
00049 #define NLBLOCK md
00050 #define PSSTART start_subject
00051 #define PSEND end_subject
00052
00053 #include "pcre_internal.h"
00054
00055
00056
00057 #undef min
00058 #undef max
00059
00060
00061
00062 #define match_condassert 0x01
00063 #define match_cbegroup 0x02
00064
00065
00066
00067
00068 #define MATCH_MATCH 1
00069 #define MATCH_NOMATCH 0
00070
00071
00072
00073
00074 #define MATCH_COMMIT (-999)
00075 #define MATCH_PRUNE (-998)
00076 #define MATCH_SKIP (-997)
00077 #define MATCH_THEN (-996)
00078
00079
00080
00081
00082
00083 #define REC_STACK_SAVE_MAX 30
00084
00085
00086
00087 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
00088 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
00089
00090
00091
00092 #ifdef DEBUG
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109 static void
00110 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
00111 {
00112 unsigned int c;
00113 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
00114 while (length-- > 0)
00115 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
00116 }
00117 #endif
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138 static BOOL
00139 match_ref(int offset, register USPTR eptr, int length, match_data *md,
00140 unsigned long int ims)
00141 {
00142 USPTR p = md->start_subject + md->offset_vector[offset];
00143
00144 #ifdef DEBUG
00145 if (eptr >= md->end_subject)
00146 printf("matching subject <null>");
00147 else
00148 {
00149 printf("matching subject ");
00150 pchars(eptr, length, TRUE, md);
00151 }
00152 printf(" against backref ");
00153 pchars(p, length, FALSE, md);
00154 printf("\n");
00155 #endif
00156
00157
00158
00159 if (length > md->end_subject - eptr) return FALSE;
00160
00161
00162
00163
00164
00165 if ((ims & PCRE_CASELESS) != 0)
00166 {
00167 #ifdef SUPPORT_UTF8
00168 #ifdef SUPPORT_UCP
00169 if (md->utf8)
00170 {
00171 USPTR endptr = eptr + length;
00172 while (eptr < endptr)
00173 {
00174 int c, d;
00175 GETCHARINC(c, eptr);
00176 GETCHARINC(d, p);
00177 if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
00178 }
00179 }
00180 else
00181 #endif
00182 #endif
00183
00184
00185
00186
00187 while (length-- > 0)
00188 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
00189 }
00190
00191
00192
00193
00194 else
00195 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
00196
00197 return TRUE;
00198 }
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
00244 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
00245 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
00246 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
00247 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
00248 RM51, RM52, RM53, RM54 };
00249
00250
00251
00252
00253
00254 #ifndef NO_RECURSE
00255 #define REGISTER register
00256
00257 #ifdef DEBUG
00258 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
00259 { \
00260 printf("match() called in line %d\n", __LINE__); \
00261 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
00262 printf("to line %d\n", __LINE__); \
00263 }
00264 #define RRETURN(ra) \
00265 { \
00266 printf("match() returned %d from line %d ", ra, __LINE__); \
00267 return ra; \
00268 }
00269 #else
00270 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
00271 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
00272 #define RRETURN(ra) return ra
00273 #endif
00274
00275 #else
00276
00277
00278
00279
00280
00281
00282 #define REGISTER
00283
00284 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
00285 {\
00286 heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
00287 frame->Xwhere = rw; \
00288 newframe->Xeptr = ra;\
00289 newframe->Xecode = rb;\
00290 newframe->Xmstart = mstart;\
00291 newframe->Xoffset_top = rc;\
00292 newframe->Xims = re;\
00293 newframe->Xeptrb = rf;\
00294 newframe->Xflags = rg;\
00295 newframe->Xrdepth = frame->Xrdepth + 1;\
00296 newframe->Xprevframe = frame;\
00297 frame = newframe;\
00298 DPRINTF(("restarting from line %d\n", __LINE__));\
00299 goto HEAP_RECURSE;\
00300 L_##rw:\
00301 DPRINTF(("jumped back to line %d\n", __LINE__));\
00302 }
00303
00304 #define RRETURN(ra)\
00305 {\
00306 heapframe *newframe = frame;\
00307 frame = newframe->Xprevframe;\
00308 (pcre_stack_free)(newframe);\
00309 if (frame != NULL)\
00310 {\
00311 rrc = ra;\
00312 goto HEAP_RETURN;\
00313 }\
00314 return ra;\
00315 }
00316
00317
00318
00319
00320 typedef struct heapframe {
00321 struct heapframe *Xprevframe;
00322
00323
00324
00325 const uschar *Xeptr;
00326 const uschar *Xecode;
00327 const uschar *Xmstart;
00328 int Xoffset_top;
00329 long int Xims;
00330 eptrblock *Xeptrb;
00331 int Xflags;
00332 unsigned int Xrdepth;
00333
00334
00335
00336 const uschar *Xcallpat;
00337 const uschar *Xcharptr;
00338 const uschar *Xdata;
00339 const uschar *Xnext;
00340 const uschar *Xpp;
00341 const uschar *Xprev;
00342 const uschar *Xsaved_eptr;
00343
00344 recursion_info Xnew_recursive;
00345
00346 BOOL Xcur_is_word;
00347 BOOL Xcondition;
00348 BOOL Xprev_is_word;
00349
00350 unsigned long int Xoriginal_ims;
00351
00352 #ifdef SUPPORT_UCP
00353 int Xprop_type;
00354 int Xprop_value;
00355 int Xprop_fail_result;
00356 int Xprop_category;
00357 int Xprop_chartype;
00358 int Xprop_script;
00359 int Xoclength;
00360 uschar Xocchars[8];
00361 #endif
00362
00363 int Xctype;
00364 unsigned int Xfc;
00365 int Xfi;
00366 int Xlength;
00367 int Xmax;
00368 int Xmin;
00369 int Xnumber;
00370 int Xoffset;
00371 int Xop;
00372 int Xsave_capture_last;
00373 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
00374 int Xstacksave[REC_STACK_SAVE_MAX];
00375
00376 eptrblock Xnewptrb;
00377
00378
00379
00380 int Xwhere;
00381
00382 } heapframe;
00383
00384 #endif
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427 static int
00428 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
00429 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
00430 int flags, unsigned int rdepth)
00431 {
00432
00433
00434
00435
00436 register int rrc;
00437 register int i;
00438 register unsigned int c;
00439 register BOOL utf8;
00440
00441 BOOL minimize, possessive;
00442
00443
00444
00445
00446
00447
00448 #ifdef NO_RECURSE
00449 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
00450 frame->Xprevframe = NULL;
00451
00452
00453
00454 frame->Xeptr = eptr;
00455 frame->Xecode = ecode;
00456 frame->Xmstart = mstart;
00457 frame->Xoffset_top = offset_top;
00458 frame->Xims = ims;
00459 frame->Xeptrb = eptrb;
00460 frame->Xflags = flags;
00461 frame->Xrdepth = rdepth;
00462
00463
00464
00465 HEAP_RECURSE:
00466
00467
00468
00469 #define eptr frame->Xeptr
00470 #define ecode frame->Xecode
00471 #define mstart frame->Xmstart
00472 #define offset_top frame->Xoffset_top
00473 #define ims frame->Xims
00474 #define eptrb frame->Xeptrb
00475 #define flags frame->Xflags
00476 #define rdepth frame->Xrdepth
00477
00478
00479
00480 #ifdef SUPPORT_UTF8
00481 #define charptr frame->Xcharptr
00482 #endif
00483 #define callpat frame->Xcallpat
00484 #define data frame->Xdata
00485 #define next frame->Xnext
00486 #define pp frame->Xpp
00487 #define prev frame->Xprev
00488 #define saved_eptr frame->Xsaved_eptr
00489
00490 #define new_recursive frame->Xnew_recursive
00491
00492 #define cur_is_word frame->Xcur_is_word
00493 #define condition frame->Xcondition
00494 #define prev_is_word frame->Xprev_is_word
00495
00496 #define original_ims frame->Xoriginal_ims
00497
00498 #ifdef SUPPORT_UCP
00499 #define prop_type frame->Xprop_type
00500 #define prop_value frame->Xprop_value
00501 #define prop_fail_result frame->Xprop_fail_result
00502 #define prop_category frame->Xprop_category
00503 #define prop_chartype frame->Xprop_chartype
00504 #define prop_script frame->Xprop_script
00505 #define oclength frame->Xoclength
00506 #define occhars frame->Xocchars
00507 #endif
00508
00509 #define ctype frame->Xctype
00510 #define fc frame->Xfc
00511 #define fi frame->Xfi
00512 #define length frame->Xlength
00513 #define max frame->Xmax
00514 #define min frame->Xmin
00515 #define number frame->Xnumber
00516 #define offset frame->Xoffset
00517 #define op frame->Xop
00518 #define save_capture_last frame->Xsave_capture_last
00519 #define save_offset1 frame->Xsave_offset1
00520 #define save_offset2 frame->Xsave_offset2
00521 #define save_offset3 frame->Xsave_offset3
00522 #define stacksave frame->Xstacksave
00523
00524 #define newptrb frame->Xnewptrb
00525
00526
00527
00528
00529
00530 #else
00531 #define fi i
00532 #define fc c
00533
00534
00535 #ifdef SUPPORT_UTF8
00536 const uschar *charptr;
00537 #endif
00538 const uschar *callpat;
00539 const uschar *data;
00540 const uschar *next;
00541 USPTR pp;
00542 const uschar *prev;
00543 USPTR saved_eptr;
00544
00545 recursion_info new_recursive;
00546
00547 BOOL cur_is_word;
00548 BOOL condition;
00549 BOOL prev_is_word;
00550
00551 unsigned long int original_ims;
00552
00553 #ifdef SUPPORT_UCP
00554 int prop_type;
00555 int prop_value;
00556 int prop_fail_result;
00557 int prop_category;
00558 int prop_chartype;
00559 int prop_script;
00560 int oclength;
00561 uschar occhars[8];
00562 #endif
00563
00564 int ctype;
00565 int length;
00566 int max;
00567 int min;
00568 int number;
00569 int offset;
00570 int op;
00571 int save_capture_last;
00572 int save_offset1, save_offset2, save_offset3;
00573 int stacksave[REC_STACK_SAVE_MAX];
00574
00575 eptrblock newptrb;
00576 #endif
00577
00578
00579
00580
00581 #ifdef SUPPORT_UCP
00582 prop_value = 0;
00583 prop_fail_result = 0;
00584 #endif
00585
00586
00587
00588
00589
00590
00591
00592 TAIL_RECURSE:
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602 #ifdef SUPPORT_UTF8
00603 utf8 = md->utf8;
00604 #else
00605 utf8 = FALSE;
00606 #endif
00607
00608
00609
00610
00611 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
00612 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
00613
00614 original_ims = ims;
00615
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625 if ((flags & match_cbegroup) != 0)
00626 {
00627 newptrb.epb_saved_eptr = eptr;
00628 newptrb.epb_prev = eptrb;
00629 eptrb = &newptrb;
00630 }
00631
00632
00633
00634 for (;;)
00635 {
00636 minimize = possessive = FALSE;
00637 op = *ecode;
00638
00639
00640
00641
00642 if (md->partial &&
00643 eptr >= md->end_subject &&
00644 eptr > mstart)
00645 md->hitend = TRUE;
00646
00647 switch(op)
00648 {
00649 case OP_FAIL:
00650 RRETURN(MATCH_NOMATCH);
00651
00652 case OP_PRUNE:
00653 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00654 ims, eptrb, flags, RM51);
00655 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00656 RRETURN(MATCH_PRUNE);
00657
00658 case OP_COMMIT:
00659 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00660 ims, eptrb, flags, RM52);
00661 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00662 RRETURN(MATCH_COMMIT);
00663
00664 case OP_SKIP:
00665 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00666 ims, eptrb, flags, RM53);
00667 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00668 md->start_match_ptr = eptr;
00669 RRETURN(MATCH_SKIP);
00670
00671 case OP_THEN:
00672 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00673 ims, eptrb, flags, RM54);
00674 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
00675 RRETURN(MATCH_THEN);
00676
00677
00678
00679
00680
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691 case OP_CBRA:
00692 case OP_SCBRA:
00693 number = GET2(ecode, 1+LINK_SIZE);
00694 offset = number << 1;
00695
00696 #ifdef DEBUG
00697 printf("start bracket %d\n", number);
00698 printf("subject=");
00699 pchars(eptr, 16, TRUE, md);
00700 printf("\n");
00701 #endif
00702
00703 if (offset < md->offset_max)
00704 {
00705 save_offset1 = md->offset_vector[offset];
00706 save_offset2 = md->offset_vector[offset+1];
00707 save_offset3 = md->offset_vector[md->offset_end - number];
00708 save_capture_last = md->capture_last;
00709
00710 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
00711 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
00712
00713 flags = (op == OP_SCBRA)? match_cbegroup : 0;
00714 do
00715 {
00716 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
00717 ims, eptrb, flags, RM1);
00718 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00719 md->capture_last = save_capture_last;
00720 ecode += GET(ecode, 1);
00721 }
00722 while (*ecode == OP_ALT);
00723
00724 DPRINTF(("bracket %d failed\n", number));
00725
00726 md->offset_vector[offset] = save_offset1;
00727 md->offset_vector[offset+1] = save_offset2;
00728 md->offset_vector[md->offset_end - number] = save_offset3;
00729
00730 RRETURN(MATCH_NOMATCH);
00731 }
00732
00733
00734
00735
00736
00737
00738
00739 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
00740
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750 case OP_BRA:
00751 case OP_SBRA:
00752 DPRINTF(("start non-capturing bracket\n"));
00753 flags = (op >= OP_SBRA)? match_cbegroup : 0;
00754 for (;;)
00755 {
00756 if (ecode[GET(ecode, 1)] != OP_ALT)
00757 {
00758 if (flags == 0)
00759 {
00760 ecode += _pcre_OP_lengths[*ecode];
00761 DPRINTF(("bracket 0 tail recursion\n"));
00762 goto TAIL_RECURSE;
00763 }
00764
00765
00766
00767 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
00768 eptrb, flags, RM48);
00769 RRETURN(rrc);
00770 }
00771
00772
00773
00774
00775 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
00776 eptrb, flags, RM2);
00777 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00778 ecode += GET(ecode, 1);
00779 }
00780
00781
00782
00783
00784
00785
00786
00787
00788 case OP_COND:
00789 case OP_SCOND:
00790 if (ecode[LINK_SIZE+1] == OP_RREF)
00791 {
00792 offset = GET2(ecode, LINK_SIZE + 2);
00793 condition = md->recursive != NULL &&
00794 (offset == RREF_ANY || offset == md->recursive->group_num);
00795 ecode += condition? 3 : GET(ecode, 1);
00796 }
00797
00798 else if (ecode[LINK_SIZE+1] == OP_CREF)
00799 {
00800 offset = GET2(ecode, LINK_SIZE+2) << 1;
00801 condition = offset < offset_top && md->offset_vector[offset] >= 0;
00802 ecode += condition? 3 : GET(ecode, 1);
00803 }
00804
00805 else if (ecode[LINK_SIZE+1] == OP_DEF)
00806 {
00807 condition = FALSE;
00808 ecode += GET(ecode, 1);
00809 }
00810
00811
00812
00813
00814
00815 else
00816 {
00817 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
00818 match_condassert, RM3);
00819 if (rrc == MATCH_MATCH)
00820 {
00821 condition = TRUE;
00822 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
00823 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
00824 }
00825 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
00826 {
00827 RRETURN(rrc);
00828 }
00829 else
00830 {
00831 condition = FALSE;
00832 ecode += GET(ecode, 1);
00833 }
00834 }
00835
00836
00837
00838
00839
00840
00841 if (condition || *ecode == OP_ALT)
00842 {
00843 ecode += 1 + LINK_SIZE;
00844 if (op == OP_SCOND)
00845 {
00846 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
00847 RRETURN(rrc);
00848 }
00849 else
00850 {
00851 flags = 0;
00852 goto TAIL_RECURSE;
00853 }
00854 }
00855 else
00856 {
00857 ecode += 1 + LINK_SIZE;
00858 }
00859 break;
00860
00861
00862
00863
00864
00865
00866 case OP_ACCEPT:
00867 case OP_END:
00868 if (md->recursive != NULL && md->recursive->group_num == 0)
00869 {
00870 recursion_info *rec = md->recursive;
00871 DPRINTF(("End of pattern in a (?0) recursion\n"));
00872 md->recursive = rec->prevrec;
00873 memmove(md->offset_vector, rec->offset_save,
00874 rec->saved_max * sizeof(int));
00875 mstart = rec->save_start;
00876 ims = original_ims;
00877 ecode = rec->after_call;
00878 break;
00879 }
00880
00881
00882
00883
00884 if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
00885 md->end_match_ptr = eptr;
00886 md->end_offset_top = offset_top;
00887 md->start_match_ptr = mstart;
00888 RRETURN(MATCH_MATCH);
00889
00890
00891
00892 case OP_OPT:
00893 ims = ecode[1];
00894 ecode += 2;
00895 DPRINTF(("ims set to %02lx\n", ims));
00896 break;
00897
00898
00899
00900
00901
00902
00903
00904 case OP_ASSERT:
00905 case OP_ASSERTBACK:
00906 do
00907 {
00908 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
00909 RM4);
00910 if (rrc == MATCH_MATCH) break;
00911 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00912 ecode += GET(ecode, 1);
00913 }
00914 while (*ecode == OP_ALT);
00915 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
00916
00917
00918
00919 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
00920
00921
00922
00923
00924 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
00925 ecode += 1 + LINK_SIZE;
00926 offset_top = md->end_offset_top;
00927 continue;
00928
00929
00930
00931 case OP_ASSERT_NOT:
00932 case OP_ASSERTBACK_NOT:
00933 do
00934 {
00935 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
00936 RM5);
00937 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
00938 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
00939 ecode += GET(ecode,1);
00940 }
00941 while (*ecode == OP_ALT);
00942
00943 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
00944
00945 ecode += 1 + LINK_SIZE;
00946 continue;
00947
00948
00949
00950
00951
00952
00953 case OP_REVERSE:
00954 #ifdef SUPPORT_UTF8
00955 if (utf8)
00956 {
00957 i = GET(ecode, 1);
00958 while (i-- > 0)
00959 {
00960 eptr--;
00961 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
00962 BACKCHAR(eptr);
00963 }
00964 }
00965 else
00966 #endif
00967
00968
00969
00970 {
00971 eptr -= GET(ecode, 1);
00972 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
00973 }
00974
00975
00976
00977 ecode += 1 + LINK_SIZE;
00978 break;
00979
00980
00981
00982
00983
00984 case OP_CALLOUT:
00985 if (pcre_callout != NULL)
00986 {
00987 pcre_callout_block cb;
00988 cb.version = 1;
00989 cb.callout_number = ecode[1];
00990 cb.offset_vector = md->offset_vector;
00991 cb.subject = (PCRE_SPTR)md->start_subject;
00992 cb.subject_length = md->end_subject - md->start_subject;
00993 cb.start_match = mstart - md->start_subject;
00994 cb.current_position = eptr - md->start_subject;
00995 cb.pattern_position = GET(ecode, 2);
00996 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
00997 cb.capture_top = offset_top/2;
00998 cb.capture_last = md->capture_last;
00999 cb.callout_data = md->callout_data;
01000 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
01001 if (rrc < 0) RRETURN(rrc);
01002 }
01003 ecode += 2 + 2*LINK_SIZE;
01004 break;
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024
01025 case OP_RECURSE:
01026 {
01027 callpat = md->start_code + GET(ecode, 1);
01028 new_recursive.group_num = (callpat == md->start_code)? 0 :
01029 GET2(callpat, 1 + LINK_SIZE);
01030
01031
01032
01033 new_recursive.prevrec = md->recursive;
01034 md->recursive = &new_recursive;
01035
01036
01037
01038 ecode += 1 + LINK_SIZE;
01039 new_recursive.after_call = ecode;
01040
01041
01042
01043 new_recursive.saved_max = md->offset_end;
01044 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
01045 new_recursive.offset_save = stacksave;
01046 else
01047 {
01048 new_recursive.offset_save =
01049 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
01050 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
01051 }
01052
01053 memcpy(new_recursive.offset_save, md->offset_vector,
01054 new_recursive.saved_max * sizeof(int));
01055 new_recursive.save_start = mstart;
01056 mstart = eptr;
01057
01058
01059
01060
01061 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
01062 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
01063 do
01064 {
01065 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
01066 md, ims, eptrb, flags, RM6);
01067 if (rrc == MATCH_MATCH)
01068 {
01069 DPRINTF(("Recursion matched\n"));
01070 md->recursive = new_recursive.prevrec;
01071 if (new_recursive.offset_save != stacksave)
01072 (pcre_free)(new_recursive.offset_save);
01073 RRETURN(MATCH_MATCH);
01074 }
01075 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
01076 {
01077 DPRINTF(("Recursion gave error %d\n", rrc));
01078 RRETURN(rrc);
01079 }
01080
01081 md->recursive = &new_recursive;
01082 memcpy(md->offset_vector, new_recursive.offset_save,
01083 new_recursive.saved_max * sizeof(int));
01084 callpat += GET(callpat, 1);
01085 }
01086 while (*callpat == OP_ALT);
01087
01088 DPRINTF(("Recursion didn't match\n"));
01089 md->recursive = new_recursive.prevrec;
01090 if (new_recursive.offset_save != stacksave)
01091 (pcre_free)(new_recursive.offset_save);
01092 RRETURN(MATCH_NOMATCH);
01093 }
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103 case OP_ONCE:
01104 prev = ecode;
01105 saved_eptr = eptr;
01106
01107 do
01108 {
01109 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
01110 if (rrc == MATCH_MATCH) break;
01111 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
01112 ecode += GET(ecode,1);
01113 }
01114 while (*ecode == OP_ALT);
01115
01116
01117
01118 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
01119
01120
01121
01122
01123 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
01124
01125 offset_top = md->end_offset_top;
01126 eptr = md->end_match_ptr;
01127
01128
01129
01130
01131
01132
01133
01134 if (*ecode == OP_KET || eptr == saved_eptr)
01135 {
01136 ecode += 1+LINK_SIZE;
01137 break;
01138 }
01139
01140
01141
01142
01143
01144
01145
01146 if (ecode[1+LINK_SIZE] == OP_OPT)
01147 {
01148 ims = (ims & ~PCRE_IMS) | ecode[4];
01149 DPRINTF(("ims set to %02lx at group repeat\n", ims));
01150 }
01151
01152 if (*ecode == OP_KETRMIN)
01153 {
01154 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
01155 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01156 ecode = prev;
01157 flags = 0;
01158 goto TAIL_RECURSE;
01159 }
01160 else
01161 {
01162 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
01163 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01164 ecode += 1 + LINK_SIZE;
01165 flags = 0;
01166 goto TAIL_RECURSE;
01167 }
01168
01169
01170
01171
01172
01173 case OP_ALT:
01174 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
01175 break;
01176
01177
01178
01179
01180
01181
01182
01183 case OP_BRAZERO:
01184 {
01185 next = ecode+1;
01186 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
01187 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01188 do next += GET(next,1); while (*next == OP_ALT);
01189 ecode = next + 1 + LINK_SIZE;
01190 }
01191 break;
01192
01193 case OP_BRAMINZERO:
01194 {
01195 next = ecode+1;
01196 do next += GET(next, 1); while (*next == OP_ALT);
01197 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
01198 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01199 ecode++;
01200 }
01201 break;
01202
01203 case OP_SKIPZERO:
01204 {
01205 next = ecode+1;
01206 do next += GET(next,1); while (*next == OP_ALT);
01207 ecode = next + 1 + LINK_SIZE;
01208 }
01209 break;
01210
01211
01212
01213 case OP_KET:
01214 case OP_KETRMIN:
01215 case OP_KETRMAX:
01216 prev = ecode - GET(ecode, 1);
01217
01218
01219
01220
01221
01222 if (*prev >= OP_SBRA)
01223 {
01224 saved_eptr = eptrb->epb_saved_eptr;
01225 eptrb = eptrb->epb_prev;
01226 }
01227 else saved_eptr = NULL;
01228
01229
01230
01231
01232
01233 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
01234 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
01235 *prev == OP_ONCE)
01236 {
01237 md->end_match_ptr = eptr;
01238 md->end_offset_top = offset_top;
01239 RRETURN(MATCH_MATCH);
01240 }
01241
01242
01243
01244
01245
01246
01247
01248 if (*prev == OP_CBRA || *prev == OP_SCBRA)
01249 {
01250 number = GET2(prev, 1+LINK_SIZE);
01251 offset = number << 1;
01252
01253 #ifdef DEBUG
01254 printf("end bracket %d", number);
01255 printf("\n");
01256 #endif
01257
01258 md->capture_last = number;
01259 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
01260 {
01261 md->offset_vector[offset] =
01262 md->offset_vector[md->offset_end - number];
01263 md->offset_vector[offset+1] = eptr - md->start_subject;
01264 if (offset_top <= offset) offset_top = offset + 2;
01265 }
01266
01267
01268
01269
01270 if (md->recursive != NULL && md->recursive->group_num == number)
01271 {
01272 recursion_info *rec = md->recursive;
01273 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
01274 md->recursive = rec->prevrec;
01275 mstart = rec->save_start;
01276 memcpy(md->offset_vector, rec->offset_save,
01277 rec->saved_max * sizeof(int));
01278 ecode = rec->after_call;
01279 ims = original_ims;
01280 break;
01281 }
01282 }
01283
01284
01285
01286
01287 ims = original_ims;
01288 DPRINTF(("ims reset to %02lx\n", ims));
01289
01290
01291
01292
01293
01294
01295
01296 if (*ecode == OP_KET || eptr == saved_eptr)
01297 {
01298 ecode += 1 + LINK_SIZE;
01299 break;
01300 }
01301
01302
01303
01304
01305
01306
01307 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
01308
01309 if (*ecode == OP_KETRMIN)
01310 {
01311 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
01312 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01313 if (flags != 0)
01314 {
01315 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
01316 RRETURN(rrc);
01317 }
01318 ecode = prev;
01319 goto TAIL_RECURSE;
01320 }
01321 else
01322 {
01323 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
01324 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01325 ecode += 1 + LINK_SIZE;
01326 flags = 0;
01327 goto TAIL_RECURSE;
01328 }
01329
01330
01331
01332
01333 case OP_CIRC:
01334 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
01335 if ((ims & PCRE_MULTILINE) != 0)
01336 {
01337 if (eptr != md->start_subject &&
01338 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
01339 RRETURN(MATCH_NOMATCH);
01340 ecode++;
01341 break;
01342 }
01343
01344
01345
01346
01347 case OP_SOD:
01348 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
01349 ecode++;
01350 break;
01351
01352
01353
01354 case OP_SOM:
01355 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
01356 ecode++;
01357 break;
01358
01359
01360
01361 case OP_SET_SOM:
01362 mstart = eptr;
01363 ecode++;
01364 break;
01365
01366
01367
01368
01369 case OP_DOLL:
01370 if ((ims & PCRE_MULTILINE) != 0)
01371 {
01372 if (eptr < md->end_subject)
01373 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
01374 else
01375 { if (md->noteol) RRETURN(MATCH_NOMATCH); }
01376 ecode++;
01377 break;
01378 }
01379 else
01380 {
01381 if (md->noteol) RRETURN(MATCH_NOMATCH);
01382 if (!md->endonly)
01383 {
01384 if (eptr != md->end_subject &&
01385 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
01386 RRETURN(MATCH_NOMATCH);
01387 ecode++;
01388 break;
01389 }
01390 }
01391
01392
01393
01394
01395 case OP_EOD:
01396 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
01397 ecode++;
01398 break;
01399
01400
01401
01402 case OP_EODN:
01403 if (eptr != md->end_subject &&
01404 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
01405 RRETURN(MATCH_NOMATCH);
01406 ecode++;
01407 break;
01408
01409
01410
01411 case OP_NOT_WORD_BOUNDARY:
01412 case OP_WORD_BOUNDARY:
01413 {
01414
01415
01416
01417
01418
01419 #ifdef SUPPORT_UTF8
01420 if (utf8)
01421 {
01422 if (eptr == md->start_subject) prev_is_word = FALSE; else
01423 {
01424 const uschar *lastptr = eptr - 1;
01425 while((*lastptr & 0xc0) == 0x80) lastptr--;
01426 GETCHAR(c, lastptr);
01427 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
01428 }
01429 if (eptr >= md->end_subject) cur_is_word = FALSE; else
01430 {
01431 GETCHAR(c, eptr);
01432 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
01433 }
01434 }
01435 else
01436 #endif
01437
01438
01439
01440 {
01441 prev_is_word = (eptr != md->start_subject) &&
01442 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
01443 cur_is_word = (eptr < md->end_subject) &&
01444 ((md->ctypes[*eptr] & ctype_word) != 0);
01445 }
01446
01447
01448
01449 if ((*ecode++ == OP_WORD_BOUNDARY)?
01450 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
01451 RRETURN(MATCH_NOMATCH);
01452 }
01453 break;
01454
01455
01456
01457 case OP_ANY:
01458 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
01459
01460
01461 case OP_ALLANY:
01462 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
01463 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
01464 ecode++;
01465 break;
01466
01467
01468
01469
01470 case OP_ANYBYTE:
01471 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
01472 ecode++;
01473 break;
01474
01475 case OP_NOT_DIGIT:
01476 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01477 GETCHARINCTEST(c, eptr);
01478 if (
01479 #ifdef SUPPORT_UTF8
01480 c < 256 &&
01481 #endif
01482 (md->ctypes[c] & ctype_digit) != 0
01483 )
01484 RRETURN(MATCH_NOMATCH);
01485 ecode++;
01486 break;
01487
01488 case OP_DIGIT:
01489 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01490 GETCHARINCTEST(c, eptr);
01491 if (
01492 #ifdef SUPPORT_UTF8
01493 c >= 256 ||
01494 #endif
01495 (md->ctypes[c] & ctype_digit) == 0
01496 )
01497 RRETURN(MATCH_NOMATCH);
01498 ecode++;
01499 break;
01500
01501 case OP_NOT_WHITESPACE:
01502 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01503 GETCHARINCTEST(c, eptr);
01504 if (
01505 #ifdef SUPPORT_UTF8
01506 c < 256 &&
01507 #endif
01508 (md->ctypes[c] & ctype_space) != 0
01509 )
01510 RRETURN(MATCH_NOMATCH);
01511 ecode++;
01512 break;
01513
01514 case OP_WHITESPACE:
01515 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01516 GETCHARINCTEST(c, eptr);
01517 if (
01518 #ifdef SUPPORT_UTF8
01519 c >= 256 ||
01520 #endif
01521 (md->ctypes[c] & ctype_space) == 0
01522 )
01523 RRETURN(MATCH_NOMATCH);
01524 ecode++;
01525 break;
01526
01527 case OP_NOT_WORDCHAR:
01528 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01529 GETCHARINCTEST(c, eptr);
01530 if (
01531 #ifdef SUPPORT_UTF8
01532 c < 256 &&
01533 #endif
01534 (md->ctypes[c] & ctype_word) != 0
01535 )
01536 RRETURN(MATCH_NOMATCH);
01537 ecode++;
01538 break;
01539
01540 case OP_WORDCHAR:
01541 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01542 GETCHARINCTEST(c, eptr);
01543 if (
01544 #ifdef SUPPORT_UTF8
01545 c >= 256 ||
01546 #endif
01547 (md->ctypes[c] & ctype_word) == 0
01548 )
01549 RRETURN(MATCH_NOMATCH);
01550 ecode++;
01551 break;
01552
01553 case OP_ANYNL:
01554 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01555 GETCHARINCTEST(c, eptr);
01556 switch(c)
01557 {
01558 default: RRETURN(MATCH_NOMATCH);
01559 case 0x000d:
01560 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
01561 break;
01562
01563 case 0x000a:
01564 break;
01565
01566 case 0x000b:
01567 case 0x000c:
01568 case 0x0085:
01569 case 0x2028:
01570 case 0x2029:
01571 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
01572 break;
01573 }
01574 ecode++;
01575 break;
01576
01577 case OP_NOT_HSPACE:
01578 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01579 GETCHARINCTEST(c, eptr);
01580 switch(c)
01581 {
01582 default: break;
01583 case 0x09:
01584 case 0x20:
01585 case 0xa0:
01586 case 0x1680:
01587 case 0x180e:
01588 case 0x2000:
01589 case 0x2001:
01590 case 0x2002:
01591 case 0x2003:
01592 case 0x2004:
01593 case 0x2005:
01594 case 0x2006:
01595 case 0x2007:
01596 case 0x2008:
01597 case 0x2009:
01598 case 0x200A:
01599 case 0x202f:
01600 case 0x205f:
01601 case 0x3000:
01602 RRETURN(MATCH_NOMATCH);
01603 }
01604 ecode++;
01605 break;
01606
01607 case OP_HSPACE:
01608 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01609 GETCHARINCTEST(c, eptr);
01610 switch(c)
01611 {
01612 default: RRETURN(MATCH_NOMATCH);
01613 case 0x09:
01614 case 0x20:
01615 case 0xa0:
01616 case 0x1680:
01617 case 0x180e:
01618 case 0x2000:
01619 case 0x2001:
01620 case 0x2002:
01621 case 0x2003:
01622 case 0x2004:
01623 case 0x2005:
01624 case 0x2006:
01625 case 0x2007:
01626 case 0x2008:
01627 case 0x2009:
01628 case 0x200A:
01629 case 0x202f:
01630 case 0x205f:
01631 case 0x3000:
01632 break;
01633 }
01634 ecode++;
01635 break;
01636
01637 case OP_NOT_VSPACE:
01638 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01639 GETCHARINCTEST(c, eptr);
01640 switch(c)
01641 {
01642 default: break;
01643 case 0x0a:
01644 case 0x0b:
01645 case 0x0c:
01646 case 0x0d:
01647 case 0x85:
01648 case 0x2028:
01649 case 0x2029:
01650 RRETURN(MATCH_NOMATCH);
01651 }
01652 ecode++;
01653 break;
01654
01655 case OP_VSPACE:
01656 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01657 GETCHARINCTEST(c, eptr);
01658 switch(c)
01659 {
01660 default: RRETURN(MATCH_NOMATCH);
01661 case 0x0a:
01662 case 0x0b:
01663 case 0x0c:
01664 case 0x0d:
01665 case 0x85:
01666 case 0x2028:
01667 case 0x2029:
01668 break;
01669 }
01670 ecode++;
01671 break;
01672
01673 #ifdef SUPPORT_UCP
01674
01675
01676
01677 case OP_PROP:
01678 case OP_NOTPROP:
01679 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01680 GETCHARINCTEST(c, eptr);
01681 {
01682 const ucd_record * prop = GET_UCD(c);
01683
01684 switch(ecode[1])
01685 {
01686 case PT_ANY:
01687 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
01688 break;
01689
01690 case PT_LAMP:
01691 if ((prop->chartype == ucp_Lu ||
01692 prop->chartype == ucp_Ll ||
01693 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
01694 RRETURN(MATCH_NOMATCH);
01695 break;
01696
01697 case PT_GC:
01698 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
01699 RRETURN(MATCH_NOMATCH);
01700 break;
01701
01702 case PT_PC:
01703 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
01704 RRETURN(MATCH_NOMATCH);
01705 break;
01706
01707 case PT_SC:
01708 if ((ecode[2] != prop->script) == (op == OP_PROP))
01709 RRETURN(MATCH_NOMATCH);
01710 break;
01711
01712 default:
01713 RRETURN(PCRE_ERROR_INTERNAL);
01714 }
01715
01716 ecode += 3;
01717 }
01718 break;
01719
01720
01721
01722
01723 case OP_EXTUNI:
01724 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01725 GETCHARINCTEST(c, eptr);
01726 {
01727 int category = UCD_CATEGORY(c);
01728 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
01729 while (eptr < md->end_subject)
01730 {
01731 int len = 1;
01732 if (!utf8) c = *eptr; else
01733 {
01734 GETCHARLEN(c, eptr, len);
01735 }
01736 category = UCD_CATEGORY(c);
01737 if (category != ucp_M) break;
01738 eptr += len;
01739 }
01740 }
01741 ecode++;
01742 break;
01743 #endif
01744
01745
01746
01747
01748
01749
01750
01751
01752
01753
01754 case OP_REF:
01755 {
01756 offset = GET2(ecode, 1) << 1;
01757 ecode += 3;
01758
01759
01760
01761
01762
01763
01764
01765
01766
01767
01768
01769
01770
01771
01772 if (offset >= offset_top || md->offset_vector[offset] < 0)
01773 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
01774 else
01775 length = md->offset_vector[offset+1] - md->offset_vector[offset];
01776
01777
01778
01779 switch (*ecode)
01780 {
01781 case OP_CRSTAR:
01782 case OP_CRMINSTAR:
01783 case OP_CRPLUS:
01784 case OP_CRMINPLUS:
01785 case OP_CRQUERY:
01786 case OP_CRMINQUERY:
01787 c = *ecode++ - OP_CRSTAR;
01788 minimize = (c & 1) != 0;
01789 min = rep_min[c];
01790 max = rep_max[c];
01791 if (max == 0) max = INT_MAX;
01792 break;
01793
01794 case OP_CRRANGE:
01795 case OP_CRMINRANGE:
01796 minimize = (*ecode == OP_CRMINRANGE);
01797 min = GET2(ecode, 1);
01798 max = GET2(ecode, 3);
01799 if (max == 0) max = INT_MAX;
01800 ecode += 5;
01801 break;
01802
01803 default:
01804 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
01805 eptr += length;
01806 continue;
01807 }
01808
01809
01810
01811
01812 if (length == 0) continue;
01813
01814
01815
01816
01817
01818 for (i = 1; i <= min; i++)
01819 {
01820 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
01821 eptr += length;
01822 }
01823
01824
01825
01826
01827 if (min == max) continue;
01828
01829
01830
01831 if (minimize)
01832 {
01833 for (fi = min;; fi++)
01834 {
01835 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
01836 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01837 if (fi >= max || !match_ref(offset, eptr, length, md, ims))
01838 RRETURN(MATCH_NOMATCH);
01839 eptr += length;
01840 }
01841
01842 }
01843
01844
01845
01846 else
01847 {
01848 pp = eptr;
01849 for (i = min; i < max; i++)
01850 {
01851 if (!match_ref(offset, eptr, length, md, ims)) break;
01852 eptr += length;
01853 }
01854 while (eptr >= pp)
01855 {
01856 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
01857 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01858 eptr -= length;
01859 }
01860 RRETURN(MATCH_NOMATCH);
01861 }
01862 }
01863
01864
01865
01866
01867
01868
01869
01870
01871
01872
01873
01874
01875
01876
01877
01878 case OP_NCLASS:
01879 case OP_CLASS:
01880 {
01881 data = ecode + 1;
01882 ecode += 33;
01883
01884 switch (*ecode)
01885 {
01886 case OP_CRSTAR:
01887 case OP_CRMINSTAR:
01888 case OP_CRPLUS:
01889 case OP_CRMINPLUS:
01890 case OP_CRQUERY:
01891 case OP_CRMINQUERY:
01892 c = *ecode++ - OP_CRSTAR;
01893 minimize = (c & 1) != 0;
01894 min = rep_min[c];
01895 max = rep_max[c];
01896 if (max == 0) max = INT_MAX;
01897 break;
01898
01899 case OP_CRRANGE:
01900 case OP_CRMINRANGE:
01901 minimize = (*ecode == OP_CRMINRANGE);
01902 min = GET2(ecode, 1);
01903 max = GET2(ecode, 3);
01904 if (max == 0) max = INT_MAX;
01905 ecode += 5;
01906 break;
01907
01908 default:
01909 min = max = 1;
01910 break;
01911 }
01912
01913
01914
01915 #ifdef SUPPORT_UTF8
01916
01917 if (utf8)
01918 {
01919 for (i = 1; i <= min; i++)
01920 {
01921 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01922 GETCHARINC(c, eptr);
01923 if (c > 255)
01924 {
01925 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
01926 }
01927 else
01928 {
01929 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
01930 }
01931 }
01932 }
01933 else
01934 #endif
01935
01936 {
01937 for (i = 1; i <= min; i++)
01938 {
01939 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01940 c = *eptr++;
01941 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
01942 }
01943 }
01944
01945
01946
01947
01948 if (min == max) continue;
01949
01950
01951
01952
01953 if (minimize)
01954 {
01955 #ifdef SUPPORT_UTF8
01956
01957 if (utf8)
01958 {
01959 for (fi = min;; fi++)
01960 {
01961 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
01962 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01963 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01964 GETCHARINC(c, eptr);
01965 if (c > 255)
01966 {
01967 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
01968 }
01969 else
01970 {
01971 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
01972 }
01973 }
01974 }
01975 else
01976 #endif
01977
01978 {
01979 for (fi = min;; fi++)
01980 {
01981 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
01982 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
01983 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
01984 c = *eptr++;
01985 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
01986 }
01987 }
01988
01989 }
01990
01991
01992
01993 else
01994 {
01995 pp = eptr;
01996
01997 #ifdef SUPPORT_UTF8
01998
01999 if (utf8)
02000 {
02001 for (i = min; i < max; i++)
02002 {
02003 int len = 1;
02004 if (eptr >= md->end_subject) break;
02005 GETCHARLEN(c, eptr, len);
02006 if (c > 255)
02007 {
02008 if (op == OP_CLASS) break;
02009 }
02010 else
02011 {
02012 if ((data[c/8] & (1 << (c&7))) == 0) break;
02013 }
02014 eptr += len;
02015 }
02016 for (;;)
02017 {
02018 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
02019 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02020 if (eptr-- == pp) break;
02021 BACKCHAR(eptr);
02022 }
02023 }
02024 else
02025 #endif
02026
02027 {
02028 for (i = min; i < max; i++)
02029 {
02030 if (eptr >= md->end_subject) break;
02031 c = *eptr;
02032 if ((data[c/8] & (1 << (c&7))) == 0) break;
02033 eptr++;
02034 }
02035 while (eptr >= pp)
02036 {
02037 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
02038 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02039 eptr--;
02040 }
02041 }
02042
02043 RRETURN(MATCH_NOMATCH);
02044 }
02045 }
02046
02047
02048
02049
02050
02051
02052 #ifdef SUPPORT_UTF8
02053 case OP_XCLASS:
02054 {
02055 data = ecode + 1 + LINK_SIZE;
02056 ecode += GET(ecode, 1);
02057
02058 switch (*ecode)
02059 {
02060 case OP_CRSTAR:
02061 case OP_CRMINSTAR:
02062 case OP_CRPLUS:
02063 case OP_CRMINPLUS:
02064 case OP_CRQUERY:
02065 case OP_CRMINQUERY:
02066 c = *ecode++ - OP_CRSTAR;
02067 minimize = (c & 1) != 0;
02068 min = rep_min[c];
02069 max = rep_max[c];
02070 if (max == 0) max = INT_MAX;
02071 break;
02072
02073 case OP_CRRANGE:
02074 case OP_CRMINRANGE:
02075 minimize = (*ecode == OP_CRMINRANGE);
02076 min = GET2(ecode, 1);
02077 max = GET2(ecode, 3);
02078 if (max == 0) max = INT_MAX;
02079 ecode += 5;
02080 break;
02081
02082 default:
02083 min = max = 1;
02084 break;
02085 }
02086
02087
02088
02089 for (i = 1; i <= min; i++)
02090 {
02091 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02092 GETCHARINC(c, eptr);
02093 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
02094 }
02095
02096
02097
02098
02099 if (min == max) continue;
02100
02101
02102
02103
02104 if (minimize)
02105 {
02106 for (fi = min;; fi++)
02107 {
02108 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
02109 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02110 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02111 GETCHARINC(c, eptr);
02112 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
02113 }
02114
02115 }
02116
02117
02118
02119 else
02120 {
02121 pp = eptr;
02122 for (i = min; i < max; i++)
02123 {
02124 int len = 1;
02125 if (eptr >= md->end_subject) break;
02126 GETCHARLEN(c, eptr, len);
02127 if (!_pcre_xclass(c, data)) break;
02128 eptr += len;
02129 }
02130 for(;;)
02131 {
02132 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
02133 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02134 if (eptr-- == pp) break;
02135 if (utf8) BACKCHAR(eptr);
02136 }
02137 RRETURN(MATCH_NOMATCH);
02138 }
02139
02140
02141 }
02142 #endif
02143
02144
02145
02146 case OP_CHAR:
02147 #ifdef SUPPORT_UTF8
02148 if (utf8)
02149 {
02150 length = 1;
02151 ecode++;
02152 GETCHARLEN(fc, ecode, length);
02153 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02154 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
02155 }
02156 else
02157 #endif
02158
02159
02160 {
02161 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
02162 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
02163 ecode += 2;
02164 }
02165 break;
02166
02167
02168
02169 case OP_CHARNC:
02170 #ifdef SUPPORT_UTF8
02171 if (utf8)
02172 {
02173 length = 1;
02174 ecode++;
02175 GETCHARLEN(fc, ecode, length);
02176
02177 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02178
02179
02180
02181
02182 if (fc < 128)
02183 {
02184 if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02185 }
02186
02187
02188
02189 else
02190 {
02191 unsigned int dc;
02192 GETCHARINC(dc, eptr);
02193 ecode += length;
02194
02195
02196
02197
02198 if (fc != dc)
02199 {
02200 #ifdef SUPPORT_UCP
02201 if (dc != UCD_OTHERCASE(fc))
02202 #endif
02203 RRETURN(MATCH_NOMATCH);
02204 }
02205 }
02206 }
02207 else
02208 #endif
02209
02210
02211 {
02212 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
02213 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02214 ecode += 2;
02215 }
02216 break;
02217
02218
02219
02220 case OP_EXACT:
02221 min = max = GET2(ecode, 1);
02222 ecode += 3;
02223 goto REPEATCHAR;
02224
02225 case OP_POSUPTO:
02226 possessive = TRUE;
02227
02228
02229 case OP_UPTO:
02230 case OP_MINUPTO:
02231 min = 0;
02232 max = GET2(ecode, 1);
02233 minimize = *ecode == OP_MINUPTO;
02234 ecode += 3;
02235 goto REPEATCHAR;
02236
02237 case OP_POSSTAR:
02238 possessive = TRUE;
02239 min = 0;
02240 max = INT_MAX;
02241 ecode++;
02242 goto REPEATCHAR;
02243
02244 case OP_POSPLUS:
02245 possessive = TRUE;
02246 min = 1;
02247 max = INT_MAX;
02248 ecode++;
02249 goto REPEATCHAR;
02250
02251 case OP_POSQUERY:
02252 possessive = TRUE;
02253 min = 0;
02254 max = 1;
02255 ecode++;
02256 goto REPEATCHAR;
02257
02258 case OP_STAR:
02259 case OP_MINSTAR:
02260 case OP_PLUS:
02261 case OP_MINPLUS:
02262 case OP_QUERY:
02263 case OP_MINQUERY:
02264 c = *ecode++ - OP_STAR;
02265 minimize = (c & 1) != 0;
02266 min = rep_min[c];
02267 max = rep_max[c];
02268 if (max == 0) max = INT_MAX;
02269
02270
02271
02272
02273
02274 REPEATCHAR:
02275 #ifdef SUPPORT_UTF8
02276 if (utf8)
02277 {
02278 length = 1;
02279 charptr = ecode;
02280 GETCHARLEN(fc, ecode, length);
02281 if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02282 ecode += length;
02283
02284
02285
02286
02287 if (length > 1)
02288 {
02289 #ifdef SUPPORT_UCP
02290 unsigned int othercase;
02291 if ((ims & PCRE_CASELESS) != 0 &&
02292 (othercase = UCD_OTHERCASE(fc)) != fc)
02293 oclength = _pcre_ord2utf8(othercase, occhars);
02294 else oclength = 0;
02295 #endif
02296
02297 for (i = 1; i <= min; i++)
02298 {
02299 if (memcmp(eptr, charptr, length) == 0) eptr += length;
02300 #ifdef SUPPORT_UCP
02301
02302 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
02303 else
02304 {
02305 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
02306 eptr += oclength;
02307 }
02308 #else
02309 else { RRETURN(MATCH_NOMATCH); }
02310 #endif
02311 }
02312
02313 if (min == max) continue;
02314
02315 if (minimize)
02316 {
02317 for (fi = min;; fi++)
02318 {
02319 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
02320 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02321 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02322 if (memcmp(eptr, charptr, length) == 0) eptr += length;
02323 #ifdef SUPPORT_UCP
02324
02325 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
02326 else
02327 {
02328 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
02329 eptr += oclength;
02330 }
02331 #else
02332 else { RRETURN (MATCH_NOMATCH); }
02333 #endif
02334 }
02335
02336 }
02337
02338 else
02339 {
02340 pp = eptr;
02341 for (i = min; i < max; i++)
02342 {
02343 if (eptr > md->end_subject - length) break;
02344 if (memcmp(eptr, charptr, length) == 0) eptr += length;
02345 #ifdef SUPPORT_UCP
02346 else if (oclength == 0) break;
02347 else
02348 {
02349 if (memcmp(eptr, occhars, oclength) != 0) break;
02350 eptr += oclength;
02351 }
02352 #else
02353 else break;
02354 #endif
02355 }
02356
02357 if (possessive) continue;
02358 for(;;)
02359 {
02360 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
02361 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02362 if (eptr == pp) RRETURN(MATCH_NOMATCH);
02363 #ifdef SUPPORT_UCP
02364 eptr--;
02365 BACKCHAR(eptr);
02366 #else
02367 eptr -= length;
02368 #endif
02369 }
02370 }
02371
02372 }
02373
02374
02375
02376
02377 }
02378 else
02379 #endif
02380
02381
02382 {
02383 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02384 fc = *ecode++;
02385 }
02386
02387
02388
02389
02390
02391
02392
02393
02394
02395
02396 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
02397 max, eptr));
02398
02399 if ((ims & PCRE_CASELESS) != 0)
02400 {
02401 fc = md->lcc[fc];
02402 for (i = 1; i <= min; i++)
02403 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02404 if (min == max) continue;
02405 if (minimize)
02406 {
02407 for (fi = min;; fi++)
02408 {
02409 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
02410 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02411 if (fi >= max || eptr >= md->end_subject ||
02412 fc != md->lcc[*eptr++])
02413 RRETURN(MATCH_NOMATCH);
02414 }
02415
02416 }
02417 else
02418 {
02419 pp = eptr;
02420 for (i = min; i < max; i++)
02421 {
02422 if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
02423 eptr++;
02424 }
02425 if (possessive) continue;
02426 while (eptr >= pp)
02427 {
02428 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
02429 eptr--;
02430 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02431 }
02432 RRETURN(MATCH_NOMATCH);
02433 }
02434
02435 }
02436
02437
02438
02439 else
02440 {
02441 for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
02442 if (min == max) continue;
02443 if (minimize)
02444 {
02445 for (fi = min;; fi++)
02446 {
02447 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
02448 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02449 if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
02450 RRETURN(MATCH_NOMATCH);
02451 }
02452
02453 }
02454 else
02455 {
02456 pp = eptr;
02457 for (i = min; i < max; i++)
02458 {
02459 if (eptr >= md->end_subject || fc != *eptr) break;
02460 eptr++;
02461 }
02462 if (possessive) continue;
02463 while (eptr >= pp)
02464 {
02465 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
02466 eptr--;
02467 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02468 }
02469 RRETURN(MATCH_NOMATCH);
02470 }
02471 }
02472
02473
02474
02475
02476
02477 case OP_NOT:
02478 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02479 ecode++;
02480 GETCHARINCTEST(c, eptr);
02481 if ((ims & PCRE_CASELESS) != 0)
02482 {
02483 #ifdef SUPPORT_UTF8
02484 if (c < 256)
02485 #endif
02486 c = md->lcc[c];
02487 if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
02488 }
02489 else
02490 {
02491 if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
02492 }
02493 break;
02494
02495
02496
02497
02498
02499
02500
02501
02502 case OP_NOTEXACT:
02503 min = max = GET2(ecode, 1);
02504 ecode += 3;
02505 goto REPEATNOTCHAR;
02506
02507 case OP_NOTUPTO:
02508 case OP_NOTMINUPTO:
02509 min = 0;
02510 max = GET2(ecode, 1);
02511 minimize = *ecode == OP_NOTMINUPTO;
02512 ecode += 3;
02513 goto REPEATNOTCHAR;
02514
02515 case OP_NOTPOSSTAR:
02516 possessive = TRUE;
02517 min = 0;
02518 max = INT_MAX;
02519 ecode++;
02520 goto REPEATNOTCHAR;
02521
02522 case OP_NOTPOSPLUS:
02523 possessive = TRUE;
02524 min = 1;
02525 max = INT_MAX;
02526 ecode++;
02527 goto REPEATNOTCHAR;
02528
02529 case OP_NOTPOSQUERY:
02530 possessive = TRUE;
02531 min = 0;
02532 max = 1;
02533 ecode++;
02534 goto REPEATNOTCHAR;
02535
02536 case OP_NOTPOSUPTO:
02537 possessive = TRUE;
02538 min = 0;
02539 max = GET2(ecode, 1);
02540 ecode += 3;
02541 goto REPEATNOTCHAR;
02542
02543 case OP_NOTSTAR:
02544 case OP_NOTMINSTAR:
02545 case OP_NOTPLUS:
02546 case OP_NOTMINPLUS:
02547 case OP_NOTQUERY:
02548 case OP_NOTMINQUERY:
02549 c = *ecode++ - OP_NOTSTAR;
02550 minimize = (c & 1) != 0;
02551 min = rep_min[c];
02552 max = rep_max[c];
02553 if (max == 0) max = INT_MAX;
02554
02555
02556
02557
02558
02559 REPEATNOTCHAR:
02560 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02561 fc = *ecode++;
02562
02563
02564
02565
02566
02567
02568
02569
02570
02571 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
02572 max, eptr));
02573
02574 if ((ims & PCRE_CASELESS) != 0)
02575 {
02576 fc = md->lcc[fc];
02577
02578 #ifdef SUPPORT_UTF8
02579
02580 if (utf8)
02581 {
02582 register unsigned int d;
02583 for (i = 1; i <= min; i++)
02584 {
02585 GETCHARINC(d, eptr);
02586 if (d < 256) d = md->lcc[d];
02587 if (fc == d) RRETURN(MATCH_NOMATCH);
02588 }
02589 }
02590 else
02591 #endif
02592
02593
02594 {
02595 for (i = 1; i <= min; i++)
02596 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
02597 }
02598
02599 if (min == max) continue;
02600
02601 if (minimize)
02602 {
02603 #ifdef SUPPORT_UTF8
02604
02605 if (utf8)
02606 {
02607 register unsigned int d;
02608 for (fi = min;; fi++)
02609 {
02610 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
02611 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02612 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02613 GETCHARINC(d, eptr);
02614 if (d < 256) d = md->lcc[d];
02615 if (fc == d) RRETURN(MATCH_NOMATCH);
02616
02617 }
02618 }
02619 else
02620 #endif
02621
02622 {
02623 for (fi = min;; fi++)
02624 {
02625 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
02626 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02627 if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
02628 RRETURN(MATCH_NOMATCH);
02629 }
02630 }
02631
02632 }
02633
02634
02635
02636 else
02637 {
02638 pp = eptr;
02639
02640 #ifdef SUPPORT_UTF8
02641
02642 if (utf8)
02643 {
02644 register unsigned int d;
02645 for (i = min; i < max; i++)
02646 {
02647 int len = 1;
02648 if (eptr >= md->end_subject) break;
02649 GETCHARLEN(d, eptr, len);
02650 if (d < 256) d = md->lcc[d];
02651 if (fc == d) break;
02652 eptr += len;
02653 }
02654 if (possessive) continue;
02655 for(;;)
02656 {
02657 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
02658 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02659 if (eptr-- == pp) break;
02660 BACKCHAR(eptr);
02661 }
02662 }
02663 else
02664 #endif
02665
02666 {
02667 for (i = min; i < max; i++)
02668 {
02669 if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
02670 eptr++;
02671 }
02672 if (possessive) continue;
02673 while (eptr >= pp)
02674 {
02675 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
02676 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02677 eptr--;
02678 }
02679 }
02680
02681 RRETURN(MATCH_NOMATCH);
02682 }
02683
02684 }
02685
02686
02687
02688 else
02689 {
02690 #ifdef SUPPORT_UTF8
02691
02692 if (utf8)
02693 {
02694 register unsigned int d;
02695 for (i = 1; i <= min; i++)
02696 {
02697 GETCHARINC(d, eptr);
02698 if (fc == d) RRETURN(MATCH_NOMATCH);
02699 }
02700 }
02701 else
02702 #endif
02703
02704 {
02705 for (i = 1; i <= min; i++)
02706 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
02707 }
02708
02709 if (min == max) continue;
02710
02711 if (minimize)
02712 {
02713 #ifdef SUPPORT_UTF8
02714
02715 if (utf8)
02716 {
02717 register unsigned int d;
02718 for (fi = min;; fi++)
02719 {
02720 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
02721 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02722 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02723 GETCHARINC(d, eptr);
02724 if (fc == d) RRETURN(MATCH_NOMATCH);
02725 }
02726 }
02727 else
02728 #endif
02729
02730 {
02731 for (fi = min;; fi++)
02732 {
02733 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
02734 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02735 if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
02736 RRETURN(MATCH_NOMATCH);
02737 }
02738 }
02739
02740 }
02741
02742
02743
02744 else
02745 {
02746 pp = eptr;
02747
02748 #ifdef SUPPORT_UTF8
02749
02750 if (utf8)
02751 {
02752 register unsigned int d;
02753 for (i = min; i < max; i++)
02754 {
02755 int len = 1;
02756 if (eptr >= md->end_subject) break;
02757 GETCHARLEN(d, eptr, len);
02758 if (fc == d) break;
02759 eptr += len;
02760 }
02761 if (possessive) continue;
02762 for(;;)
02763 {
02764 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
02765 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02766 if (eptr-- == pp) break;
02767 BACKCHAR(eptr);
02768 }
02769 }
02770 else
02771 #endif
02772
02773 {
02774 for (i = min; i < max; i++)
02775 {
02776 if (eptr >= md->end_subject || fc == *eptr) break;
02777 eptr++;
02778 }
02779 if (possessive) continue;
02780 while (eptr >= pp)
02781 {
02782 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
02783 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
02784 eptr--;
02785 }
02786 }
02787
02788 RRETURN(MATCH_NOMATCH);
02789 }
02790 }
02791
02792
02793
02794
02795
02796
02797 case OP_TYPEEXACT:
02798 min = max = GET2(ecode, 1);
02799 minimize = TRUE;
02800 ecode += 3;
02801 goto REPEATTYPE;
02802
02803 case OP_TYPEUPTO:
02804 case OP_TYPEMINUPTO:
02805 min = 0;
02806 max = GET2(ecode, 1);
02807 minimize = *ecode == OP_TYPEMINUPTO;
02808 ecode += 3;
02809 goto REPEATTYPE;
02810
02811 case OP_TYPEPOSSTAR:
02812 possessive = TRUE;
02813 min = 0;
02814 max = INT_MAX;
02815 ecode++;
02816 goto REPEATTYPE;
02817
02818 case OP_TYPEPOSPLUS:
02819 possessive = TRUE;
02820 min = 1;
02821 max = INT_MAX;
02822 ecode++;
02823 goto REPEATTYPE;
02824
02825 case OP_TYPEPOSQUERY:
02826 possessive = TRUE;
02827 min = 0;
02828 max = 1;
02829 ecode++;
02830 goto REPEATTYPE;
02831
02832 case OP_TYPEPOSUPTO:
02833 possessive = TRUE;
02834 min = 0;
02835 max = GET2(ecode, 1);
02836 ecode += 3;
02837 goto REPEATTYPE;
02838
02839 case OP_TYPESTAR:
02840 case OP_TYPEMINSTAR:
02841 case OP_TYPEPLUS:
02842 case OP_TYPEMINPLUS:
02843 case OP_TYPEQUERY:
02844 case OP_TYPEMINQUERY:
02845 c = *ecode++ - OP_TYPESTAR;
02846 minimize = (c & 1) != 0;
02847 min = rep_min[c];
02848 max = rep_max[c];
02849 if (max == 0) max = INT_MAX;
02850
02851
02852
02853
02854
02855 REPEATTYPE:
02856 ctype = *ecode++;
02857
02858 #ifdef SUPPORT_UCP
02859 if (ctype == OP_PROP || ctype == OP_NOTPROP)
02860 {
02861 prop_fail_result = ctype == OP_NOTPROP;
02862 prop_type = *ecode++;
02863 prop_value = *ecode++;
02864 }
02865 else prop_type = -1;
02866 #endif
02867
02868
02869
02870
02871
02872
02873
02874
02875
02876 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
02877 if (min > 0)
02878 {
02879 #ifdef SUPPORT_UCP
02880 if (prop_type >= 0)
02881 {
02882 switch(prop_type)
02883 {
02884 case PT_ANY:
02885 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
02886 for (i = 1; i <= min; i++)
02887 {
02888 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02889 GETCHARINCTEST(c, eptr);
02890 }
02891 break;
02892
02893 case PT_LAMP:
02894 for (i = 1; i <= min; i++)
02895 {
02896 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02897 GETCHARINCTEST(c, eptr);
02898 prop_chartype = UCD_CHARTYPE(c);
02899 if ((prop_chartype == ucp_Lu ||
02900 prop_chartype == ucp_Ll ||
02901 prop_chartype == ucp_Lt) == prop_fail_result)
02902 RRETURN(MATCH_NOMATCH);
02903 }
02904 break;
02905
02906 case PT_GC:
02907 for (i = 1; i <= min; i++)
02908 {
02909 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02910 GETCHARINCTEST(c, eptr);
02911 prop_category = UCD_CATEGORY(c);
02912 if ((prop_category == prop_value) == prop_fail_result)
02913 RRETURN(MATCH_NOMATCH);
02914 }
02915 break;
02916
02917 case PT_PC:
02918 for (i = 1; i <= min; i++)
02919 {
02920 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02921 GETCHARINCTEST(c, eptr);
02922 prop_chartype = UCD_CHARTYPE(c);
02923 if ((prop_chartype == prop_value) == prop_fail_result)
02924 RRETURN(MATCH_NOMATCH);
02925 }
02926 break;
02927
02928 case PT_SC:
02929 for (i = 1; i <= min; i++)
02930 {
02931 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02932 GETCHARINCTEST(c, eptr);
02933 prop_script = UCD_SCRIPT(c);
02934 if ((prop_script == prop_value) == prop_fail_result)
02935 RRETURN(MATCH_NOMATCH);
02936 }
02937 break;
02938
02939 default:
02940 RRETURN(PCRE_ERROR_INTERNAL);
02941 }
02942 }
02943
02944
02945
02946
02947 else if (ctype == OP_EXTUNI)
02948 {
02949 for (i = 1; i <= min; i++)
02950 {
02951 GETCHARINCTEST(c, eptr);
02952 prop_category = UCD_CATEGORY(c);
02953 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
02954 while (eptr < md->end_subject)
02955 {
02956 int len = 1;
02957 if (!utf8) c = *eptr; else
02958 {
02959 GETCHARLEN(c, eptr, len);
02960 }
02961 prop_category = UCD_CATEGORY(c);
02962 if (prop_category != ucp_M) break;
02963 eptr += len;
02964 }
02965 }
02966 }
02967
02968 else
02969 #endif
02970
02971
02972
02973 #ifdef SUPPORT_UTF8
02974 if (utf8) switch(ctype)
02975 {
02976 case OP_ANY:
02977 for (i = 1; i <= min; i++)
02978 {
02979 if (eptr >= md->end_subject || IS_NEWLINE(eptr))
02980 RRETURN(MATCH_NOMATCH);
02981 eptr++;
02982 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
02983 }
02984 break;
02985
02986 case OP_ALLANY:
02987 for (i = 1; i <= min; i++)
02988 {
02989 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
02990 eptr++;
02991 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
02992 }
02993 break;
02994
02995 case OP_ANYBYTE:
02996 eptr += min;
02997 break;
02998
02999 case OP_ANYNL:
03000 for (i = 1; i <= min; i++)
03001 {
03002 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03003 GETCHARINC(c, eptr);
03004 switch(c)
03005 {
03006 default: RRETURN(MATCH_NOMATCH);
03007 case 0x000d:
03008 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03009 break;
03010
03011 case 0x000a:
03012 break;
03013
03014 case 0x000b:
03015 case 0x000c:
03016 case 0x0085:
03017 case 0x2028:
03018 case 0x2029:
03019 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
03020 break;
03021 }
03022 }
03023 break;
03024
03025 case OP_NOT_HSPACE:
03026 for (i = 1; i <= min; i++)
03027 {
03028 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03029 GETCHARINC(c, eptr);
03030 switch(c)
03031 {
03032 default: break;
03033 case 0x09:
03034 case 0x20:
03035 case 0xa0:
03036 case 0x1680:
03037 case 0x180e:
03038 case 0x2000:
03039 case 0x2001:
03040 case 0x2002:
03041 case 0x2003:
03042 case 0x2004:
03043 case 0x2005:
03044 case 0x2006:
03045 case 0x2007:
03046 case 0x2008:
03047 case 0x2009:
03048 case 0x200A:
03049 case 0x202f:
03050 case 0x205f:
03051 case 0x3000:
03052 RRETURN(MATCH_NOMATCH);
03053 }
03054 }
03055 break;
03056
03057 case OP_HSPACE:
03058 for (i = 1; i <= min; i++)
03059 {
03060 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03061 GETCHARINC(c, eptr);
03062 switch(c)
03063 {
03064 default: RRETURN(MATCH_NOMATCH);
03065 case 0x09:
03066 case 0x20:
03067 case 0xa0:
03068 case 0x1680:
03069 case 0x180e:
03070 case 0x2000:
03071 case 0x2001:
03072 case 0x2002:
03073 case 0x2003:
03074 case 0x2004:
03075 case 0x2005:
03076 case 0x2006:
03077 case 0x2007:
03078 case 0x2008:
03079 case 0x2009:
03080 case 0x200A:
03081 case 0x202f:
03082 case 0x205f:
03083 case 0x3000:
03084 break;
03085 }
03086 }
03087 break;
03088
03089 case OP_NOT_VSPACE:
03090 for (i = 1; i <= min; i++)
03091 {
03092 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03093 GETCHARINC(c, eptr);
03094 switch(c)
03095 {
03096 default: break;
03097 case 0x0a:
03098 case 0x0b:
03099 case 0x0c:
03100 case 0x0d:
03101 case 0x85:
03102 case 0x2028:
03103 case 0x2029:
03104 RRETURN(MATCH_NOMATCH);
03105 }
03106 }
03107 break;
03108
03109 case OP_VSPACE:
03110 for (i = 1; i <= min; i++)
03111 {
03112 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03113 GETCHARINC(c, eptr);
03114 switch(c)
03115 {
03116 default: RRETURN(MATCH_NOMATCH);
03117 case 0x0a:
03118 case 0x0b:
03119 case 0x0c:
03120 case 0x0d:
03121 case 0x85:
03122 case 0x2028:
03123 case 0x2029:
03124 break;
03125 }
03126 }
03127 break;
03128
03129 case OP_NOT_DIGIT:
03130 for (i = 1; i <= min; i++)
03131 {
03132 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03133 GETCHARINC(c, eptr);
03134 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
03135 RRETURN(MATCH_NOMATCH);
03136 }
03137 break;
03138
03139 case OP_DIGIT:
03140 for (i = 1; i <= min; i++)
03141 {
03142 if (eptr >= md->end_subject ||
03143 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
03144 RRETURN(MATCH_NOMATCH);
03145
03146 }
03147 break;
03148
03149 case OP_NOT_WHITESPACE:
03150 for (i = 1; i <= min; i++)
03151 {
03152 if (eptr >= md->end_subject ||
03153 (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
03154 RRETURN(MATCH_NOMATCH);
03155 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
03156 }
03157 break;
03158
03159 case OP_WHITESPACE:
03160 for (i = 1; i <= min; i++)
03161 {
03162 if (eptr >= md->end_subject ||
03163 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
03164 RRETURN(MATCH_NOMATCH);
03165
03166 }
03167 break;
03168
03169 case OP_NOT_WORDCHAR:
03170 for (i = 1; i <= min; i++)
03171 {
03172 if (eptr >= md->end_subject ||
03173 (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
03174 RRETURN(MATCH_NOMATCH);
03175 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
03176 }
03177 break;
03178
03179 case OP_WORDCHAR:
03180 for (i = 1; i <= min; i++)
03181 {
03182 if (eptr >= md->end_subject ||
03183 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
03184 RRETURN(MATCH_NOMATCH);
03185
03186 }
03187 break;
03188
03189 default:
03190 RRETURN(PCRE_ERROR_INTERNAL);
03191 }
03192
03193 else
03194 #endif
03195
03196
03197
03198
03199
03200 switch(ctype)
03201 {
03202 case OP_ANY:
03203 for (i = 1; i <= min; i++)
03204 {
03205 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
03206 eptr++;
03207 }
03208 break;
03209
03210 case OP_ALLANY:
03211 eptr += min;
03212 break;
03213
03214 case OP_ANYBYTE:
03215 eptr += min;
03216 break;
03217
03218
03219
03220
03221 case OP_ANYNL:
03222 for (i = 1; i <= min; i++)
03223 {
03224 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03225 switch(*eptr++)
03226 {
03227 default: RRETURN(MATCH_NOMATCH);
03228 case 0x000d:
03229 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03230 break;
03231 case 0x000a:
03232 break;
03233
03234 case 0x000b:
03235 case 0x000c:
03236 case 0x0085:
03237 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
03238 break;
03239 }
03240 }
03241 break;
03242
03243 case OP_NOT_HSPACE:
03244 for (i = 1; i <= min; i++)
03245 {
03246 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03247 switch(*eptr++)
03248 {
03249 default: break;
03250 case 0x09:
03251 case 0x20:
03252 case 0xa0:
03253 RRETURN(MATCH_NOMATCH);
03254 }
03255 }
03256 break;
03257
03258 case OP_HSPACE:
03259 for (i = 1; i <= min; i++)
03260 {
03261 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03262 switch(*eptr++)
03263 {
03264 default: RRETURN(MATCH_NOMATCH);
03265 case 0x09:
03266 case 0x20:
03267 case 0xa0:
03268 break;
03269 }
03270 }
03271 break;
03272
03273 case OP_NOT_VSPACE:
03274 for (i = 1; i <= min; i++)
03275 {
03276 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03277 switch(*eptr++)
03278 {
03279 default: break;
03280 case 0x0a:
03281 case 0x0b:
03282 case 0x0c:
03283 case 0x0d:
03284 case 0x85:
03285 RRETURN(MATCH_NOMATCH);
03286 }
03287 }
03288 break;
03289
03290 case OP_VSPACE:
03291 for (i = 1; i <= min; i++)
03292 {
03293 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03294 switch(*eptr++)
03295 {
03296 default: RRETURN(MATCH_NOMATCH);
03297 case 0x0a:
03298 case 0x0b:
03299 case 0x0c:
03300 case 0x0d:
03301 case 0x85:
03302 break;
03303 }
03304 }
03305 break;
03306
03307 case OP_NOT_DIGIT:
03308 for (i = 1; i <= min; i++)
03309 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
03310 break;
03311
03312 case OP_DIGIT:
03313 for (i = 1; i <= min; i++)
03314 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
03315 break;
03316
03317 case OP_NOT_WHITESPACE:
03318 for (i = 1; i <= min; i++)
03319 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
03320 break;
03321
03322 case OP_WHITESPACE:
03323 for (i = 1; i <= min; i++)
03324 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
03325 break;
03326
03327 case OP_NOT_WORDCHAR:
03328 for (i = 1; i <= min; i++)
03329 if ((md->ctypes[*eptr++] & ctype_word) != 0)
03330 RRETURN(MATCH_NOMATCH);
03331 break;
03332
03333 case OP_WORDCHAR:
03334 for (i = 1; i <= min; i++)
03335 if ((md->ctypes[*eptr++] & ctype_word) == 0)
03336 RRETURN(MATCH_NOMATCH);
03337 break;
03338
03339 default:
03340 RRETURN(PCRE_ERROR_INTERNAL);
03341 }
03342 }
03343
03344
03345
03346 if (min == max) continue;
03347
03348
03349
03350
03351
03352 if (minimize)
03353 {
03354 #ifdef SUPPORT_UCP
03355 if (prop_type >= 0)
03356 {
03357 switch(prop_type)
03358 {
03359 case PT_ANY:
03360 for (fi = min;; fi++)
03361 {
03362 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
03363 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03364 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03365 GETCHARINC(c, eptr);
03366 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
03367 }
03368
03369
03370 case PT_LAMP:
03371 for (fi = min;; fi++)
03372 {
03373 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
03374 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03375 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03376 GETCHARINC(c, eptr);
03377 prop_chartype = UCD_CHARTYPE(c);
03378 if ((prop_chartype == ucp_Lu ||
03379 prop_chartype == ucp_Ll ||
03380 prop_chartype == ucp_Lt) == prop_fail_result)
03381 RRETURN(MATCH_NOMATCH);
03382 }
03383
03384
03385 case PT_GC:
03386 for (fi = min;; fi++)
03387 {
03388 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
03389 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03390 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03391 GETCHARINC(c, eptr);
03392 prop_category = UCD_CATEGORY(c);
03393 if ((prop_category == prop_value) == prop_fail_result)
03394 RRETURN(MATCH_NOMATCH);
03395 }
03396
03397
03398 case PT_PC:
03399 for (fi = min;; fi++)
03400 {
03401 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
03402 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03403 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03404 GETCHARINC(c, eptr);
03405 prop_chartype = UCD_CHARTYPE(c);
03406 if ((prop_chartype == prop_value) == prop_fail_result)
03407 RRETURN(MATCH_NOMATCH);
03408 }
03409
03410
03411 case PT_SC:
03412 for (fi = min;; fi++)
03413 {
03414 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
03415 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03416 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03417 GETCHARINC(c, eptr);
03418 prop_script = UCD_SCRIPT(c);
03419 if ((prop_script == prop_value) == prop_fail_result)
03420 RRETURN(MATCH_NOMATCH);
03421 }
03422
03423
03424 default:
03425 RRETURN(PCRE_ERROR_INTERNAL);
03426 }
03427 }
03428
03429
03430
03431
03432 else if (ctype == OP_EXTUNI)
03433 {
03434 for (fi = min;; fi++)
03435 {
03436 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
03437 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03438 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
03439 GETCHARINCTEST(c, eptr);
03440 prop_category = UCD_CATEGORY(c);
03441 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
03442 while (eptr < md->end_subject)
03443 {
03444 int len = 1;
03445 if (!utf8) c = *eptr; else
03446 {
03447 GETCHARLEN(c, eptr, len);
03448 }
03449 prop_category = UCD_CATEGORY(c);
03450 if (prop_category != ucp_M) break;
03451 eptr += len;
03452 }
03453 }
03454 }
03455
03456 else
03457 #endif
03458
03459 #ifdef SUPPORT_UTF8
03460
03461 if (utf8)
03462 {
03463 for (fi = min;; fi++)
03464 {
03465 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
03466 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03467 if (fi >= max || eptr >= md->end_subject ||
03468 (ctype == OP_ANY && IS_NEWLINE(eptr)))
03469 RRETURN(MATCH_NOMATCH);
03470
03471 GETCHARINC(c, eptr);
03472 switch(ctype)
03473 {
03474 case OP_ANY:
03475 case OP_ALLANY:
03476 case OP_ANYBYTE:
03477 break;
03478
03479 case OP_ANYNL:
03480 switch(c)
03481 {
03482 default: RRETURN(MATCH_NOMATCH);
03483 case 0x000d:
03484 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03485 break;
03486 case 0x000a:
03487 break;
03488
03489 case 0x000b:
03490 case 0x000c:
03491 case 0x0085:
03492 case 0x2028:
03493 case 0x2029:
03494 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
03495 break;
03496 }
03497 break;
03498
03499 case OP_NOT_HSPACE:
03500 switch(c)
03501 {
03502 default: break;
03503 case 0x09:
03504 case 0x20:
03505 case 0xa0:
03506 case 0x1680:
03507 case 0x180e:
03508 case 0x2000:
03509 case 0x2001:
03510 case 0x2002:
03511 case 0x2003:
03512 case 0x2004:
03513 case 0x2005:
03514 case 0x2006:
03515 case 0x2007:
03516 case 0x2008:
03517 case 0x2009:
03518 case 0x200A:
03519 case 0x202f:
03520 case 0x205f:
03521 case 0x3000:
03522 RRETURN(MATCH_NOMATCH);
03523 }
03524 break;
03525
03526 case OP_HSPACE:
03527 switch(c)
03528 {
03529 default: RRETURN(MATCH_NOMATCH);
03530 case 0x09:
03531 case 0x20:
03532 case 0xa0:
03533 case 0x1680:
03534 case 0x180e:
03535 case 0x2000:
03536 case 0x2001:
03537 case 0x2002:
03538 case 0x2003:
03539 case 0x2004:
03540 case 0x2005:
03541 case 0x2006:
03542 case 0x2007:
03543 case 0x2008:
03544 case 0x2009:
03545 case 0x200A:
03546 case 0x202f:
03547 case 0x205f:
03548 case 0x3000:
03549 break;
03550 }
03551 break;
03552
03553 case OP_NOT_VSPACE:
03554 switch(c)
03555 {
03556 default: break;
03557 case 0x0a:
03558 case 0x0b:
03559 case 0x0c:
03560 case 0x0d:
03561 case 0x85:
03562 case 0x2028:
03563 case 0x2029:
03564 RRETURN(MATCH_NOMATCH);
03565 }
03566 break;
03567
03568 case OP_VSPACE:
03569 switch(c)
03570 {
03571 default: RRETURN(MATCH_NOMATCH);
03572 case 0x0a:
03573 case 0x0b:
03574 case 0x0c:
03575 case 0x0d:
03576 case 0x85:
03577 case 0x2028:
03578 case 0x2029:
03579 break;
03580 }
03581 break;
03582
03583 case OP_NOT_DIGIT:
03584 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
03585 RRETURN(MATCH_NOMATCH);
03586 break;
03587
03588 case OP_DIGIT:
03589 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
03590 RRETURN(MATCH_NOMATCH);
03591 break;
03592
03593 case OP_NOT_WHITESPACE:
03594 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
03595 RRETURN(MATCH_NOMATCH);
03596 break;
03597
03598 case OP_WHITESPACE:
03599 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
03600 RRETURN(MATCH_NOMATCH);
03601 break;
03602
03603 case OP_NOT_WORDCHAR:
03604 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
03605 RRETURN(MATCH_NOMATCH);
03606 break;
03607
03608 case OP_WORDCHAR:
03609 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
03610 RRETURN(MATCH_NOMATCH);
03611 break;
03612
03613 default:
03614 RRETURN(PCRE_ERROR_INTERNAL);
03615 }
03616 }
03617 }
03618 else
03619 #endif
03620
03621 {
03622 for (fi = min;; fi++)
03623 {
03624 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
03625 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03626 if (fi >= max || eptr >= md->end_subject ||
03627 (ctype == OP_ANY && IS_NEWLINE(eptr)))
03628 RRETURN(MATCH_NOMATCH);
03629
03630 c = *eptr++;
03631 switch(ctype)
03632 {
03633 case OP_ANY:
03634 case OP_ALLANY:
03635 case OP_ANYBYTE:
03636 break;
03637
03638 case OP_ANYNL:
03639 switch(c)
03640 {
03641 default: RRETURN(MATCH_NOMATCH);
03642 case 0x000d:
03643 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
03644 break;
03645
03646 case 0x000a:
03647 break;
03648
03649 case 0x000b:
03650 case 0x000c:
03651 case 0x0085:
03652 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
03653 break;
03654 }
03655 break;
03656
03657 case OP_NOT_HSPACE:
03658 switch(c)
03659 {
03660 default: break;
03661 case 0x09:
03662 case 0x20:
03663 case 0xa0:
03664 RRETURN(MATCH_NOMATCH);
03665 }
03666 break;
03667
03668 case OP_HSPACE:
03669 switch(c)
03670 {
03671 default: RRETURN(MATCH_NOMATCH);
03672 case 0x09:
03673 case 0x20:
03674 case 0xa0:
03675 break;
03676 }
03677 break;
03678
03679 case OP_NOT_VSPACE:
03680 switch(c)
03681 {
03682 default: break;
03683 case 0x0a:
03684 case 0x0b:
03685 case 0x0c:
03686 case 0x0d:
03687 case 0x85:
03688 RRETURN(MATCH_NOMATCH);
03689 }
03690 break;
03691
03692 case OP_VSPACE:
03693 switch(c)
03694 {
03695 default: RRETURN(MATCH_NOMATCH);
03696 case 0x0a:
03697 case 0x0b:
03698 case 0x0c:
03699 case 0x0d:
03700 case 0x85:
03701 break;
03702 }
03703 break;
03704
03705 case OP_NOT_DIGIT:
03706 if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
03707 break;
03708
03709 case OP_DIGIT:
03710 if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
03711 break;
03712
03713 case OP_NOT_WHITESPACE:
03714 if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
03715 break;
03716
03717 case OP_WHITESPACE:
03718 if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
03719 break;
03720
03721 case OP_NOT_WORDCHAR:
03722 if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
03723 break;
03724
03725 case OP_WORDCHAR:
03726 if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
03727 break;
03728
03729 default:
03730 RRETURN(PCRE_ERROR_INTERNAL);
03731 }
03732 }
03733 }
03734
03735 }
03736
03737
03738
03739
03740
03741 else
03742 {
03743 pp = eptr;
03744
03745 #ifdef SUPPORT_UCP
03746 if (prop_type >= 0)
03747 {
03748 switch(prop_type)
03749 {
03750 case PT_ANY:
03751 for (i = min; i < max; i++)
03752 {
03753 int len = 1;
03754 if (eptr >= md->end_subject) break;
03755 GETCHARLEN(c, eptr, len);
03756 if (prop_fail_result) break;
03757 eptr+= len;
03758 }
03759 break;
03760
03761 case PT_LAMP:
03762 for (i = min; i < max; i++)
03763 {
03764 int len = 1;
03765 if (eptr >= md->end_subject) break;
03766 GETCHARLEN(c, eptr, len);
03767 prop_chartype = UCD_CHARTYPE(c);
03768 if ((prop_chartype == ucp_Lu ||
03769 prop_chartype == ucp_Ll ||
03770 prop_chartype == ucp_Lt) == prop_fail_result)
03771 break;
03772 eptr+= len;
03773 }
03774 break;
03775
03776 case PT_GC:
03777 for (i = min; i < max; i++)
03778 {
03779 int len = 1;
03780 if (eptr >= md->end_subject) break;
03781 GETCHARLEN(c, eptr, len);
03782 prop_category = UCD_CATEGORY(c);
03783 if ((prop_category == prop_value) == prop_fail_result)
03784 break;
03785 eptr+= len;
03786 }
03787 break;
03788
03789 case PT_PC:
03790 for (i = min; i < max; i++)
03791 {
03792 int len = 1;
03793 if (eptr >= md->end_subject) break;
03794 GETCHARLEN(c, eptr, len);
03795 prop_chartype = UCD_CHARTYPE(c);
03796 if ((prop_chartype == prop_value) == prop_fail_result)
03797 break;
03798 eptr+= len;
03799 }
03800 break;
03801
03802 case PT_SC:
03803 for (i = min; i < max; i++)
03804 {
03805 int len = 1;
03806 if (eptr >= md->end_subject) break;
03807 GETCHARLEN(c, eptr, len);
03808 prop_script = UCD_SCRIPT(c);
03809 if ((prop_script == prop_value) == prop_fail_result)
03810 break;
03811 eptr+= len;
03812 }
03813 break;
03814 }
03815
03816
03817
03818 if (possessive) continue;
03819 for(;;)
03820 {
03821 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
03822 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03823 if (eptr-- == pp) break;
03824 if (utf8) BACKCHAR(eptr);
03825 }
03826 }
03827
03828
03829
03830
03831 else if (ctype == OP_EXTUNI)
03832 {
03833 for (i = min; i < max; i++)
03834 {
03835 if (eptr >= md->end_subject) break;
03836 GETCHARINCTEST(c, eptr);
03837 prop_category = UCD_CATEGORY(c);
03838 if (prop_category == ucp_M) break;
03839 while (eptr < md->end_subject)
03840 {
03841 int len = 1;
03842 if (!utf8) c = *eptr; else
03843 {
03844 GETCHARLEN(c, eptr, len);
03845 }
03846 prop_category = UCD_CATEGORY(c);
03847 if (prop_category != ucp_M) break;
03848 eptr += len;
03849 }
03850 }
03851
03852
03853
03854 if (possessive) continue;
03855 for(;;)
03856 {
03857 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
03858 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
03859 if (eptr-- == pp) break;
03860 for (;;)
03861 {
03862 int len = 1;
03863 if (!utf8) c = *eptr; else
03864 {
03865 BACKCHAR(eptr);
03866 GETCHARLEN(c, eptr, len);
03867 }
03868 prop_category = UCD_CATEGORY(c);
03869 if (prop_category != ucp_M) break;
03870 eptr--;
03871 }
03872 }
03873 }
03874
03875 else
03876 #endif
03877
03878 #ifdef SUPPORT_UTF8
03879
03880
03881 if (utf8)
03882 {
03883 switch(ctype)
03884 {
03885 case OP_ANY:
03886 if (max < INT_MAX)
03887 {
03888 for (i = min; i < max; i++)
03889 {
03890 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
03891 eptr++;
03892 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
03893 }
03894 }
03895
03896
03897
03898 else
03899 {
03900 for (i = min; i < max; i++)
03901 {
03902 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
03903 eptr++;
03904 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
03905 }
03906 }
03907 break;
03908
03909 case OP_ALLANY:
03910 if (max < INT_MAX)
03911 {
03912 for (i = min; i < max; i++)
03913 {
03914 if (eptr >= md->end_subject) break;
03915 eptr++;
03916 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
03917 }
03918 }
03919 else eptr = md->end_subject;
03920 break;
03921
03922
03923
03924 case OP_ANYBYTE:
03925 c = max - min;
03926 if (c > (unsigned int)(md->end_subject - eptr))
03927 c = md->end_subject - eptr;
03928 eptr += c;
03929 break;
03930
03931 case OP_ANYNL:
03932 for (i = min; i < max; i++)
03933 {
03934 int len = 1;
03935 if (eptr >= md->end_subject) break;
03936 GETCHARLEN(c, eptr, len);
03937 if (c == 0x000d)
03938 {
03939 if (++eptr >= md->end_subject) break;
03940 if (*eptr == 0x000a) eptr++;
03941 }
03942 else
03943 {
03944 if (c != 0x000a &&
03945 (md->bsr_anycrlf ||
03946 (c != 0x000b && c != 0x000c &&
03947 c != 0x0085 && c != 0x2028 && c != 0x2029)))
03948 break;
03949 eptr += len;
03950 }
03951 }
03952 break;
03953
03954 case OP_NOT_HSPACE:
03955 case OP_HSPACE:
03956 for (i = min; i < max; i++)
03957 {
03958 BOOL gotspace;
03959 int len = 1;
03960 if (eptr >= md->end_subject) break;
03961 GETCHARLEN(c, eptr, len);
03962 switch(c)
03963 {
03964 default: gotspace = FALSE; break;
03965 case 0x09:
03966 case 0x20:
03967 case 0xa0:
03968 case 0x1680:
03969 case 0x180e:
03970 case 0x2000:
03971 case 0x2001:
03972 case 0x2002:
03973 case 0x2003:
03974 case 0x2004:
03975 case 0x2005:
03976 case 0x2006:
03977 case 0x2007:
03978 case 0x2008:
03979 case 0x2009:
03980 case 0x200A:
03981 case 0x202f:
03982 case 0x205f:
03983 case 0x3000:
03984 gotspace = TRUE;
03985 break;
03986 }
03987 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
03988 eptr += len;
03989 }
03990 break;
03991
03992 case OP_NOT_VSPACE:
03993 case OP_VSPACE:
03994 for (i = min; i < max; i++)
03995 {
03996 BOOL gotspace;
03997 int len = 1;
03998 if (eptr >= md->end_subject) break;
03999 GETCHARLEN(c, eptr, len);
04000 switch(c)
04001 {
04002 default: gotspace = FALSE; break;
04003 case 0x0a:
04004 case 0x0b:
04005 case 0x0c:
04006 case 0x0d:
04007 case 0x85:
04008 case 0x2028:
04009 case 0x2029:
04010 gotspace = TRUE;
04011 break;
04012 }
04013 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
04014 eptr += len;
04015 }
04016 break;
04017
04018 case OP_NOT_DIGIT:
04019 for (i = min; i < max; i++)
04020 {
04021 int len = 1;
04022 if (eptr >= md->end_subject) break;
04023 GETCHARLEN(c, eptr, len);
04024 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
04025 eptr+= len;
04026 }
04027 break;
04028
04029 case OP_DIGIT:
04030 for (i = min; i < max; i++)
04031 {
04032 int len = 1;
04033 if (eptr >= md->end_subject) break;
04034 GETCHARLEN(c, eptr, len);
04035 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
04036 eptr+= len;
04037 }
04038 break;
04039
04040 case OP_NOT_WHITESPACE:
04041 for (i = min; i < max; i++)
04042 {
04043 int len = 1;
04044 if (eptr >= md->end_subject) break;
04045 GETCHARLEN(c, eptr, len);
04046 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
04047 eptr+= len;
04048 }
04049 break;
04050
04051 case OP_WHITESPACE:
04052 for (i = min; i < max; i++)
04053 {
04054 int len = 1;
04055 if (eptr >= md->end_subject) break;
04056 GETCHARLEN(c, eptr, len);
04057 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
04058 eptr+= len;
04059 }
04060 break;
04061
04062 case OP_NOT_WORDCHAR:
04063 for (i = min; i < max; i++)
04064 {
04065 int len = 1;
04066 if (eptr >= md->end_subject) break;
04067 GETCHARLEN(c, eptr, len);
04068 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
04069 eptr+= len;
04070 }
04071 break;
04072
04073 case OP_WORDCHAR:
04074 for (i = min; i < max; i++)
04075 {
04076 int len = 1;
04077 if (eptr >= md->end_subject) break;
04078 GETCHARLEN(c, eptr, len);
04079 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
04080 eptr+= len;
04081 }
04082 break;
04083
04084 default:
04085 RRETURN(PCRE_ERROR_INTERNAL);
04086 }
04087
04088
04089
04090 if (possessive) continue;
04091 for(;;)
04092 {
04093 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
04094 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04095 if (eptr-- == pp) break;
04096 BACKCHAR(eptr);
04097 }
04098 }
04099 else
04100 #endif
04101
04102
04103 {
04104 switch(ctype)
04105 {
04106 case OP_ANY:
04107 for (i = min; i < max; i++)
04108 {
04109 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
04110 eptr++;
04111 }
04112 break;
04113
04114 case OP_ALLANY:
04115 case OP_ANYBYTE:
04116 c = max - min;
04117 if (c > (unsigned int)(md->end_subject - eptr))
04118 c = md->end_subject - eptr;
04119 eptr += c;
04120 break;
04121
04122 case OP_ANYNL:
04123 for (i = min; i < max; i++)
04124 {
04125 if (eptr >= md->end_subject) break;
04126 c = *eptr;
04127 if (c == 0x000d)
04128 {
04129 if (++eptr >= md->end_subject) break;
04130 if (*eptr == 0x000a) eptr++;
04131 }
04132 else
04133 {
04134 if (c != 0x000a &&
04135 (md->bsr_anycrlf ||
04136 (c != 0x000b && c != 0x000c && c != 0x0085)))
04137 break;
04138 eptr++;
04139 }
04140 }
04141 break;
04142
04143 case OP_NOT_HSPACE:
04144 for (i = min; i < max; i++)
04145 {
04146 if (eptr >= md->end_subject) break;
04147 c = *eptr;
04148 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
04149 eptr++;
04150 }
04151 break;
04152
04153 case OP_HSPACE:
04154 for (i = min; i < max; i++)
04155 {
04156 if (eptr >= md->end_subject) break;
04157 c = *eptr;
04158 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
04159 eptr++;
04160 }
04161 break;
04162
04163 case OP_NOT_VSPACE:
04164 for (i = min; i < max; i++)
04165 {
04166 if (eptr >= md->end_subject) break;
04167 c = *eptr;
04168 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
04169 break;
04170 eptr++;
04171 }
04172 break;
04173
04174 case OP_VSPACE:
04175 for (i = min; i < max; i++)
04176 {
04177 if (eptr >= md->end_subject) break;
04178 c = *eptr;
04179 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
04180 break;
04181 eptr++;
04182 }
04183 break;
04184
04185 case OP_NOT_DIGIT:
04186 for (i = min; i < max; i++)
04187 {
04188 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
04189 break;
04190 eptr++;
04191 }
04192 break;
04193
04194 case OP_DIGIT:
04195 for (i = min; i < max; i++)
04196 {
04197 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
04198 break;
04199 eptr++;
04200 }
04201 break;
04202
04203 case OP_NOT_WHITESPACE:
04204 for (i = min; i < max; i++)
04205 {
04206 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
04207 break;
04208 eptr++;
04209 }
04210 break;
04211
04212 case OP_WHITESPACE:
04213 for (i = min; i < max; i++)
04214 {
04215 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
04216 break;
04217 eptr++;
04218 }
04219 break;
04220
04221 case OP_NOT_WORDCHAR:
04222 for (i = min; i < max; i++)
04223 {
04224 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
04225 break;
04226 eptr++;
04227 }
04228 break;
04229
04230 case OP_WORDCHAR:
04231 for (i = min; i < max; i++)
04232 {
04233 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
04234 break;
04235 eptr++;
04236 }
04237 break;
04238
04239 default:
04240 RRETURN(PCRE_ERROR_INTERNAL);
04241 }
04242
04243
04244
04245 if (possessive) continue;
04246 while (eptr >= pp)
04247 {
04248 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
04249 eptr--;
04250 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
04251 }
04252 }
04253
04254
04255
04256 RRETURN(MATCH_NOMATCH);
04257 }
04258
04259
04260
04261
04262
04263 default:
04264 DPRINTF(("Unknown opcode %d\n", *ecode));
04265 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
04266 }
04267
04268
04269
04270
04271
04272 }
04273
04274
04275
04276
04277
04278
04279
04280 #ifdef NO_RECURSE
04281 #define LBL(val) case val: goto L_RM##val;
04282 HEAP_RETURN:
04283 switch (frame->Xwhere)
04284 {
04285 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
04286 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
04287 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
04288 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
04289 LBL(53) LBL(54)
04290 #ifdef SUPPORT_UTF8
04291 LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
04292 LBL(32) LBL(34) LBL(42) LBL(46)
04293 #ifdef SUPPORT_UCP
04294 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
04295 #endif
04296 #endif
04297 default:
04298 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
04299 return PCRE_ERROR_INTERNAL;
04300 }
04301 #undef LBL
04302 #endif
04303 }
04304
04305
04306
04307
04308
04309
04310
04311
04312 #ifdef NO_RECURSE
04313 #undef eptr
04314 #undef ecode
04315 #undef mstart
04316 #undef offset_top
04317 #undef ims
04318 #undef eptrb
04319 #undef flags
04320
04321 #undef callpat
04322 #undef charptr
04323 #undef data
04324 #undef next
04325 #undef pp
04326 #undef prev
04327 #undef saved_eptr
04328
04329 #undef new_recursive
04330
04331 #undef cur_is_word
04332 #undef condition
04333 #undef prev_is_word
04334
04335 #undef original_ims
04336
04337 #undef ctype
04338 #undef length
04339 #undef max
04340 #undef min
04341 #undef number
04342 #undef offset
04343 #undef op
04344 #undef save_capture_last
04345 #undef save_offset1
04346 #undef save_offset2
04347 #undef save_offset3
04348 #undef stacksave
04349
04350 #undef newptrb
04351
04352 #endif
04353
04354
04355
04356 #undef fc
04357 #undef fi
04358
04359
04360
04361
04362
04363
04364
04365
04366
04367
04368
04369
04370
04371
04372
04373
04374
04375
04376
04377
04378
04379
04380
04381
04382
04383
04384
04385
04386
04387
04388 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
04389 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
04390 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
04391 int offsetcount)
04392 {
04393 int rc, resetcount, ocount;
04394 int first_byte = -1;
04395 int req_byte = -1;
04396 int req_byte2 = -1;
04397 int newline;
04398 unsigned long int ims;
04399 BOOL using_temporary_offsets = FALSE;
04400 BOOL anchored;
04401 BOOL startline;
04402 BOOL firstline;
04403 BOOL first_byte_caseless = FALSE;
04404 BOOL req_byte_caseless = FALSE;
04405 BOOL utf8;
04406 match_data match_block;
04407 match_data *md = &match_block;
04408 const uschar *tables;
04409 const uschar *start_bits = NULL;
04410 USPTR start_match = (USPTR)subject + start_offset;
04411 USPTR end_subject;
04412 USPTR req_byte_ptr = start_match - 1;
04413
04414 pcre_study_data internal_study;
04415 const pcre_study_data *study;
04416
04417 real_pcre internal_re;
04418 const real_pcre *external_re = (const real_pcre *)argument_re;
04419 const real_pcre *re = external_re;
04420
04421
04422
04423 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
04424 if (re == NULL || subject == NULL ||
04425 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
04426 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
04427
04428
04429
04430
04431 study = NULL;
04432 md->match_limit = MATCH_LIMIT;
04433 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
04434 md->callout_data = NULL;
04435
04436
04437
04438 tables = external_re->tables;
04439
04440 if (extra_data != NULL)
04441 {
04442 register unsigned int flags = extra_data->flags;
04443 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
04444 study = (const pcre_study_data *)extra_data->study_data;
04445 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
04446 md->match_limit = extra_data->match_limit;
04447 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
04448 md->match_limit_recursion = extra_data->match_limit_recursion;
04449 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
04450 md->callout_data = extra_data->callout_data;
04451 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
04452 }
04453
04454
04455
04456
04457
04458 if (tables == NULL) tables = _pcre_default_tables;
04459
04460
04461
04462
04463
04464
04465 if (re->magic_number != MAGIC_NUMBER)
04466 {
04467 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
04468 if (re == NULL) return PCRE_ERROR_BADMAGIC;
04469 if (study != NULL) study = &internal_study;
04470 }
04471
04472
04473
04474 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
04475 startline = (re->flags & PCRE_STARTLINE) != 0;
04476 firstline = (re->options & PCRE_FIRSTLINE) != 0;
04477
04478
04479
04480 md->start_code = (const uschar *)external_re + re->name_table_offset +
04481 re->name_count * re->name_entry_size;
04482
04483 md->start_subject = (USPTR)subject;
04484 md->start_offset = start_offset;
04485 md->end_subject = md->start_subject + length;
04486 end_subject = md->end_subject;
04487
04488 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
04489 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
04490 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
04491
04492 md->notbol = (options & PCRE_NOTBOL) != 0;
04493 md->noteol = (options & PCRE_NOTEOL) != 0;
04494 md->notempty = (options & PCRE_NOTEMPTY) != 0;
04495 md->partial = (options & PCRE_PARTIAL) != 0;
04496 md->hitend = FALSE;
04497
04498 md->recursive = NULL;
04499
04500 md->lcc = tables + lcc_offset;
04501 md->ctypes = tables + ctypes_offset;
04502
04503
04504
04505 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
04506 {
04507 case 0:
04508 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
04509 md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
04510 else
04511 #ifdef BSR_ANYCRLF
04512 md->bsr_anycrlf = TRUE;
04513 #else
04514 md->bsr_anycrlf = FALSE;
04515 #endif
04516 break;
04517
04518 case PCRE_BSR_ANYCRLF:
04519 md->bsr_anycrlf = TRUE;
04520 break;
04521
04522 case PCRE_BSR_UNICODE:
04523 md->bsr_anycrlf = FALSE;
04524 break;
04525
04526 default: return PCRE_ERROR_BADNEWLINE;
04527 }
04528
04529
04530
04531
04532 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
04533 (pcre_uint32)options) & PCRE_NEWLINE_BITS)
04534 {
04535 case 0: newline = NEWLINE; break;
04536 case PCRE_NEWLINE_CR: newline = '\r'; break;
04537 case PCRE_NEWLINE_LF: newline = '\n'; break;
04538 case PCRE_NEWLINE_CR+
04539 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
04540 case PCRE_NEWLINE_ANY: newline = -1; break;
04541 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
04542 default: return PCRE_ERROR_BADNEWLINE;
04543 }
04544
04545 if (newline == -2)
04546 {
04547 md->nltype = NLTYPE_ANYCRLF;
04548 }
04549 else if (newline < 0)
04550 {
04551 md->nltype = NLTYPE_ANY;
04552 }
04553 else
04554 {
04555 md->nltype = NLTYPE_FIXED;
04556 if (newline > 255)
04557 {
04558 md->nllen = 2;
04559 md->nl[0] = (newline >> 8) & 255;
04560 md->nl[1] = newline & 255;
04561 }
04562 else
04563 {
04564 md->nllen = 1;
04565 md->nl[0] = newline;
04566 }
04567 }
04568
04569
04570
04571
04572 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
04573 return PCRE_ERROR_BADPARTIAL;
04574
04575
04576
04577
04578 #ifdef SUPPORT_UTF8
04579 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
04580 {
04581 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
04582 return PCRE_ERROR_BADUTF8;
04583 if (start_offset > 0 && start_offset < length)
04584 {
04585 int tb = ((uschar *)subject)[start_offset];
04586 if (tb > 127)
04587 {
04588 tb &= 0xc0;
04589 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
04590 }
04591 }
04592 }
04593 #endif
04594
04595
04596
04597
04598
04599 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
04600
04601
04602
04603
04604
04605
04606 ocount = offsetcount - (offsetcount % 3);
04607
04608 if (re->top_backref > 0 && re->top_backref >= ocount/3)
04609 {
04610 ocount = re->top_backref * 3 + 3;
04611 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
04612 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
04613 using_temporary_offsets = TRUE;
04614 DPRINTF(("Got memory to hold back references\n"));
04615 }
04616 else md->offset_vector = offsets;
04617
04618 md->offset_end = ocount;
04619 md->offset_max = (2*ocount)/3;
04620 md->offset_overflow = FALSE;
04621 md->capture_last = -1;
04622
04623
04624
04625
04626
04627 resetcount = 2 + re->top_bracket * 2;
04628 if (resetcount > offsetcount) resetcount = ocount;
04629
04630
04631
04632
04633
04634 if (md->offset_vector != NULL)
04635 {
04636 register int *iptr = md->offset_vector + ocount;
04637 register int *iend = iptr - resetcount/2 + 1;
04638 while (--iptr >= iend) *iptr = -1;
04639 }
04640
04641
04642
04643
04644
04645
04646
04647 if (!anchored)
04648 {
04649 if ((re->flags & PCRE_FIRSTSET) != 0)
04650 {
04651 first_byte = re->first_byte & 255;
04652 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
04653 first_byte = md->lcc[first_byte];
04654 }
04655 else
04656 if (!startline && study != NULL &&
04657 (study->options & PCRE_STUDY_MAPPED) != 0)
04658 start_bits = study->start_bits;
04659 }
04660
04661
04662
04663
04664 if ((re->flags & PCRE_REQCHSET) != 0)
04665 {
04666 req_byte = re->req_byte & 255;
04667 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
04668 req_byte2 = (tables + fcc_offset)[req_byte];
04669 }
04670
04671
04672
04673
04674
04675
04676
04677 for(;;)
04678 {
04679 USPTR save_end_subject = end_subject;
04680 USPTR new_start_match;
04681
04682
04683
04684 if (md->offset_vector != NULL)
04685 {
04686 register int *iptr = md->offset_vector;
04687 register int *iend = iptr + resetcount;
04688 while (iptr < iend) *iptr++ = -1;
04689 }
04690
04691
04692
04693
04694
04695
04696
04697 if (firstline)
04698 {
04699 USPTR t = start_match;
04700 #ifdef SUPPORT_UTF8
04701 if (utf8)
04702 {
04703 while (t < md->end_subject && !IS_NEWLINE(t))
04704 {
04705 t++;
04706 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
04707 }
04708 }
04709 else
04710 #endif
04711 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
04712 end_subject = t;
04713 }
04714
04715
04716
04717 if (first_byte >= 0)
04718 {
04719 if (first_byte_caseless)
04720 while (start_match < end_subject && md->lcc[*start_match] != first_byte)
04721 start_match++;
04722 else
04723 while (start_match < end_subject && *start_match != first_byte)
04724 start_match++;
04725 }
04726
04727
04728
04729 else if (startline)
04730 {
04731 if (start_match > md->start_subject + start_offset)
04732 {
04733 #ifdef SUPPORT_UTF8
04734 if (utf8)
04735 {
04736 while (start_match < end_subject && !WAS_NEWLINE(start_match))
04737 {
04738 start_match++;
04739 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
04740 start_match++;
04741 }
04742 }
04743 else
04744 #endif
04745 while (start_match < end_subject && !WAS_NEWLINE(start_match))
04746 start_match++;
04747
04748
04749
04750
04751
04752 if (start_match[-1] == '\r' &&
04753 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
04754 start_match < end_subject &&
04755 *start_match == '\n')
04756 start_match++;
04757 }
04758 }
04759
04760
04761
04762 else if (start_bits != NULL)
04763 {
04764 while (start_match < end_subject)
04765 {
04766 register unsigned int c = *start_match;
04767 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
04768 else break;
04769 }
04770 }
04771
04772
04773
04774 end_subject = save_end_subject;
04775
04776 #ifdef DEBUG
04777 printf(">>>> Match against: ");
04778 pchars(start_match, end_subject - start_match, TRUE, md);
04779 printf("\n");
04780 #endif
04781
04782
04783
04784
04785
04786
04787
04788
04789
04790
04791
04792
04793
04794
04795
04796
04797
04798 if (req_byte >= 0 &&
04799 end_subject - start_match < REQ_BYTE_MAX &&
04800 !md->partial)
04801 {
04802 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
04803
04804
04805
04806
04807 if (p > req_byte_ptr)
04808 {
04809 if (req_byte_caseless)
04810 {
04811 while (p < end_subject)
04812 {
04813 register int pp = *p++;
04814 if (pp == req_byte || pp == req_byte2) { p--; break; }
04815 }
04816 }
04817 else
04818 {
04819 while (p < end_subject)
04820 {
04821 if (*p++ == req_byte) { p--; break; }
04822 }
04823 }
04824
04825
04826
04827
04828 if (p >= end_subject)
04829 {
04830 rc = MATCH_NOMATCH;
04831 break;
04832 }
04833
04834
04835
04836
04837
04838 req_byte_ptr = p;
04839 }
04840 }
04841
04842
04843
04844 md->start_match_ptr = start_match;
04845 md->match_call_count = 0;
04846 rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
04847
04848 switch(rc)
04849 {
04850
04851
04852
04853 case MATCH_NOMATCH:
04854 case MATCH_PRUNE:
04855 case MATCH_THEN:
04856 new_start_match = start_match + 1;
04857 #ifdef SUPPORT_UTF8
04858 if (utf8)
04859 while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
04860 new_start_match++;
04861 #endif
04862 break;
04863
04864
04865
04866 case MATCH_SKIP:
04867 new_start_match = md->start_match_ptr;
04868 break;
04869
04870
04871
04872 case MATCH_COMMIT:
04873 rc = MATCH_NOMATCH;
04874 goto ENDLOOP;
04875
04876
04877
04878 default:
04879 goto ENDLOOP;
04880 }
04881
04882
04883
04884
04885 rc = MATCH_NOMATCH;
04886
04887
04888
04889
04890
04891 if (firstline && IS_NEWLINE(start_match)) break;
04892
04893
04894
04895 start_match = new_start_match;
04896
04897
04898
04899
04900 if (anchored || start_match > end_subject) break;
04901
04902
04903
04904
04905
04906 if (start_match[-1] == '\r' &&
04907 start_match < end_subject &&
04908 *start_match == '\n' &&
04909 (re->flags & PCRE_HASCRORLF) == 0 &&
04910 (md->nltype == NLTYPE_ANY ||
04911 md->nltype == NLTYPE_ANYCRLF ||
04912 md->nllen == 2))
04913 start_match++;
04914
04915 }
04916
04917
04918
04919
04920
04921
04922
04923
04924
04925
04926
04927
04928
04929
04930
04931
04932
04933
04934
04935
04936
04937 ENDLOOP:
04938
04939 if (rc == MATCH_MATCH)
04940 {
04941 if (using_temporary_offsets)
04942 {
04943 if (offsetcount >= 4)
04944 {
04945 memcpy(offsets + 2, md->offset_vector + 2,
04946 (offsetcount - 2) * sizeof(int));
04947 DPRINTF(("Copied offsets from temporary memory\n"));
04948 }
04949 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
04950 DPRINTF(("Freeing temporary memory\n"));
04951 (pcre_free)(md->offset_vector);
04952 }
04953
04954
04955
04956
04957 rc = md->offset_overflow? 0 : md->end_offset_top/2;
04958
04959
04960
04961
04962
04963 if (offsetcount < 2) rc = 0; else
04964 {
04965 offsets[0] = md->start_match_ptr - md->start_subject;
04966 offsets[1] = md->end_match_ptr - md->start_subject;
04967 }
04968
04969 DPRINTF((">>>> returning %d\n", rc));
04970 return rc;
04971 }
04972
04973
04974
04975
04976 if (using_temporary_offsets)
04977 {
04978 DPRINTF(("Freeing temporary memory\n"));
04979 (pcre_free)(md->offset_vector);
04980 }
04981
04982 if (rc != MATCH_NOMATCH)
04983 {
04984 DPRINTF((">>>> error: returning %d\n", rc));
04985 return rc;
04986 }
04987 else if (md->partial && md->hitend)
04988 {
04989 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
04990 return PCRE_ERROR_PARTIAL;
04991 }
04992 else
04993 {
04994 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
04995 return PCRE_ERROR_NOMATCH;
04996 }
04997 }
04998
04999