00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045 #ifdef HAVE_CONFIG_H
00046 #include "config.h"
00047 #endif
00048
00049 #define NLBLOCK cd
00050 #define PSSTART start_pattern
00051 #define PSEND end_pattern
00052
00053 #include "pcre_internal.h"
00054
00055
00056
00057
00058
00059 #ifdef DEBUG
00060 #include "pcre_printint.src"
00061 #endif
00062
00063
00064
00065
00066 #define SETBIT(a,b) a[b/8] |= (1 << (b%8))
00067
00068
00069
00070
00071
00072
00073 #define OFLOW_MAX (INT_MAX - 20)
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092 #define COMPILE_WORK_SIZE (4096)
00093
00094
00095
00096
00097
00098
00099
00100 #ifndef EBCDIC
00101 static const short int escapes[] = {
00102 0, 0, 0, 0, 0, 0, 0, 0,
00103 0, 0, ':', ';', '<', '=', '>', '?',
00104 '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G,
00105 -ESC_H, 0, 0, -ESC_K, 0, 0, 0, 0,
00106 -ESC_P, -ESC_Q, -ESC_R, -ESC_S, 0, 0, -ESC_V, -ESC_W,
00107 -ESC_X, 0, -ESC_Z, '[', '\\', ']', '^', '_',
00108 '`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
00109 -ESC_h, 0, 0, -ESC_k, 0, 0, ESC_n, 0,
00110 -ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, -ESC_v, -ESC_w,
00111 0, 0, -ESC_z
00112 };
00113
00114 #else
00115 static const short int escapes[] = {
00116 0, 0, 0, '.', '<', '(', '+', '|',
00117 '&', 0, 0, 0, 0, 0, 0, 0,
00118 0, 0, '!', '$', '*', ')', ';', '~',
00119 '-', '/', 0, 0, 0, 0, 0, 0,
00120 0, 0, '|', ',', '%', '_', '>', '?',
00121 0, 0, 0, 0, 0, 0, 0, 0,
00122 0, '`', ':', '#', '@', '\'', '=', '"',
00123 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
00124 -ESC_h, 0, 0, '{', 0, 0, 0, 0,
00125 0, 0, -ESC_k, 'l', 0, ESC_n, 0, -ESC_p,
00126 0, ESC_r, 0, '}', 0, 0, 0, 0,
00127 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0,
00128 0,-ESC_z, 0, 0, 0, '[', 0, 0,
00129 0, 0, 0, 0, 0, 0, 0, 0,
00130 0, 0, 0, 0, 0, ']', '=', '-',
00131 '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
00132 -ESC_H, 0, 0, 0, 0, 0, 0, 0,
00133 '}', 0, -ESC_K, 0, 0, 0, 0, -ESC_P,
00134 -ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
00135 '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
00136 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
00137 0, 0, 0, 0, 0, 0, 0, 0,
00138 0, 0, 0, 0, 0, 0, 0, 0
00139 };
00140 #endif
00141
00142
00143
00144
00145
00146
00147 typedef struct verbitem {
00148 int len;
00149 int op;
00150 } verbitem;
00151
00152 static const char verbnames[] =
00153 "ACCEPT\0"
00154 "COMMIT\0"
00155 "F\0"
00156 "FAIL\0"
00157 "PRUNE\0"
00158 "SKIP\0"
00159 "THEN";
00160
00161 static const verbitem verbs[] = {
00162 { 6, OP_ACCEPT },
00163 { 6, OP_COMMIT },
00164 { 1, OP_FAIL },
00165 { 4, OP_FAIL },
00166 { 5, OP_PRUNE },
00167 { 4, OP_SKIP },
00168 { 4, OP_THEN }
00169 };
00170
00171 static const int verbcount = sizeof(verbs)/sizeof(verbitem);
00172
00173
00174
00175
00176
00177
00178
00179
00180 static const char posix_names[] =
00181 "alpha\0" "lower\0" "upper\0" "alnum\0" "ascii\0" "blank\0"
00182 "cntrl\0" "digit\0" "graph\0" "print\0" "punct\0" "space\0"
00183 "word\0" "xdigit";
00184
00185 static const uschar posix_name_lengths[] = {
00186 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198 static const int posix_class_maps[] = {
00199 cbit_word, cbit_digit, -2,
00200 cbit_lower, -1, 0,
00201 cbit_upper, -1, 0,
00202 cbit_word, -1, 2,
00203 cbit_print, cbit_cntrl, 0,
00204 cbit_space, -1, 1,
00205 cbit_cntrl, -1, 0,
00206 cbit_digit, -1, 0,
00207 cbit_graph, -1, 0,
00208 cbit_print, -1, 0,
00209 cbit_punct, -1, 0,
00210 cbit_space, -1, 0,
00211 cbit_word, -1, 0,
00212 cbit_xdigit,-1, 0
00213 };
00214
00215
00216 #define STRING(a) # a
00217 #define XSTRING(s) STRING(s)
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229 static const char error_texts[] =
00230 "no error\0"
00231 "\\ at end of pattern\0"
00232 "\\c at end of pattern\0"
00233 "unrecognized character follows \\\0"
00234 "numbers out of order in {} quantifier\0"
00235
00236 "number too big in {} quantifier\0"
00237 "missing terminating ] for character class\0"
00238 "invalid escape sequence in character class\0"
00239 "range out of order in character class\0"
00240 "nothing to repeat\0"
00241
00242 "operand of unlimited repeat could match the empty string\0"
00243 "internal error: unexpected repeat\0"
00244 "unrecognized character after (? or (?-\0"
00245 "POSIX named classes are supported only within a class\0"
00246 "missing )\0"
00247
00248 "reference to non-existent subpattern\0"
00249 "erroffset passed as NULL\0"
00250 "unknown option bit(s) set\0"
00251 "missing ) after comment\0"
00252 "parentheses nested too deeply\0"
00253
00254 "regular expression is too large\0"
00255 "failed to get memory\0"
00256 "unmatched parentheses\0"
00257 "internal error: code overflow\0"
00258 "unrecognized character after (?<\0"
00259
00260 "lookbehind assertion is not fixed length\0"
00261 "malformed number or name after (?(\0"
00262 "conditional group contains more than two branches\0"
00263 "assertion expected after (?(\0"
00264 "(?R or (?[+-]digits must be followed by )\0"
00265
00266 "unknown POSIX class name\0"
00267 "POSIX collating elements are not supported\0"
00268 "this version of PCRE is not compiled with PCRE_UTF8 support\0"
00269 "spare error\0"
00270 "character value in \\x{...} sequence is too large\0"
00271
00272 "invalid condition (?(0)\0"
00273 "\\C not allowed in lookbehind assertion\0"
00274 "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0"
00275 "number after (?C is > 255\0"
00276 "closing ) for (?C expected\0"
00277
00278 "recursive call could loop indefinitely\0"
00279 "unrecognized character after (?P\0"
00280 "syntax error in subpattern name (missing terminator)\0"
00281 "two named subpatterns have the same name\0"
00282 "invalid UTF-8 string\0"
00283
00284 "support for \\P, \\p, and \\X has not been compiled\0"
00285 "malformed \\P or \\p sequence\0"
00286 "unknown property name after \\P or \\p\0"
00287 "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
00288 "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
00289
00290 "repeated subpattern is too long\0"
00291 "octal value is greater than \\377 (not in UTF-8 mode)\0"
00292 "internal error: overran compiling workspace\0"
00293 "internal error: previously-checked referenced subpattern not found\0"
00294 "DEFINE group contains more than one branch\0"
00295
00296 "repeating a DEFINE group is not allowed\0"
00297 "inconsistent NEWLINE options\0"
00298 "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
00299 "a numbered reference must not be zero\0"
00300 "(*VERB) with an argument is not supported\0"
00301
00302 "(*VERB) not recognized\0"
00303 "number is too big\0"
00304 "subpattern name expected\0"
00305 "digit expected after (?+\0"
00306 "] is an invalid data character in JavaScript compatibility mode";
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325 #ifndef EBCDIC
00326 static const unsigned char digitab[] =
00327 {
00328 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00329 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00330 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00331 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00332 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00334 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,
00335 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,
00336 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
00337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00340 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
00341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00349 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00350 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00351 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00352 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00353 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00354 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00355 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00356 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00357 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00358 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00359 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
00360
00361 #else
00362 static const unsigned char digitab[] =
00363 {
00364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00366 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00367 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00368 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00371 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00372 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00375 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00376 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00377 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00378 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00379 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00380 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
00381 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00382 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00383 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00384 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00385 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00386 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00387 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00388 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
00389 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00390 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00391 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00392 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00393 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00394 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,
00395 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};
00396
00397 static const unsigned char ebcdic_chartab[] = {
00398 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
00399 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00,
00400 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
00401 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00402 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
00403 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00406 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00407 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80,
00408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00409 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00,
00410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00411 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80,
00412 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00413 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00414 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12,
00415 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00416 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00417 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00418 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12,
00419 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00420 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00421 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,
00422 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12,
00423 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00424 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00425 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00426 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12,
00427 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
00428 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,
00429 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};
00430 #endif
00431
00432
00433
00434
00435 static BOOL
00436 compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int,
00437 int *, int *, branch_chain *, compile_data *, int *);
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454 static const char *
00455 find_error_text(int n)
00456 {
00457 const char *s = error_texts;
00458 for (; n > 0; n--) while (*s++ != 0) {};
00459 return s;
00460 }
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487 static int
00488 check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,
00489 int options, BOOL isclass)
00490 {
00491 BOOL utf8 = (options & PCRE_UTF8) != 0;
00492 const uschar *ptr = *ptrptr + 1;
00493 int c, i;
00494
00495 GETCHARINCTEST(c, ptr);
00496 ptr--;
00497
00498
00499
00500 if (c == 0) *errorcodeptr = ERR1;
00501
00502
00503
00504
00505
00506 #ifndef EBCDIC
00507 else if (c < '0' || c > 'z') {}
00508 else if ((i = escapes[c - '0']) != 0) c = i;
00509
00510 #else
00511 else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}
00512 else if ((i = escapes[c - 0x48]) != 0) c = i;
00513 #endif
00514
00515
00516
00517 else
00518 {
00519 const uschar *oldptr;
00520 BOOL braced, negated;
00521
00522 switch (c)
00523 {
00524
00525
00526
00527 case 'l':
00528 case 'L':
00529 case 'N':
00530 case 'u':
00531 case 'U':
00532 *errorcodeptr = ERR37;
00533 break;
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551 case 'g':
00552 if (ptr[1] == '<' || ptr[1] == '\'')
00553 {
00554 c = -ESC_g;
00555 break;
00556 }
00557
00558
00559
00560 if (ptr[1] == '{')
00561 {
00562 const uschar *p;
00563 for (p = ptr+2; *p != 0 && *p != '}'; p++)
00564 if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;
00565 if (*p != 0 && *p != '}')
00566 {
00567 c = -ESC_k;
00568 break;
00569 }
00570 braced = TRUE;
00571 ptr++;
00572 }
00573 else braced = FALSE;
00574
00575 if (ptr[1] == '-')
00576 {
00577 negated = TRUE;
00578 ptr++;
00579 }
00580 else negated = FALSE;
00581
00582 c = 0;
00583 while ((digitab[ptr[1]] & ctype_digit) != 0)
00584 c = c * 10 + *(++ptr) - '0';
00585
00586 if (c < 0)
00587 {
00588 *errorcodeptr = ERR61;
00589 break;
00590 }
00591
00592 if (braced && *(++ptr) != '}')
00593 {
00594 *errorcodeptr = ERR57;
00595 break;
00596 }
00597
00598 if (c == 0)
00599 {
00600 *errorcodeptr = ERR58;
00601 break;
00602 }
00603
00604 if (negated)
00605 {
00606 if (c > bracount)
00607 {
00608 *errorcodeptr = ERR15;
00609 break;
00610 }
00611 c = bracount - (c - 1);
00612 }
00613
00614 c = -(ESC_REF + c);
00615 break;
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629 case '1': case '2': case '3': case '4': case '5':
00630 case '6': case '7': case '8': case '9':
00631
00632 if (!isclass)
00633 {
00634 oldptr = ptr;
00635 c -= '0';
00636 while ((digitab[ptr[1]] & ctype_digit) != 0)
00637 c = c * 10 + *(++ptr) - '0';
00638 if (c < 0)
00639 {
00640 *errorcodeptr = ERR61;
00641 break;
00642 }
00643 if (c < 10 || c <= bracount)
00644 {
00645 c = -(ESC_REF + c);
00646 break;
00647 }
00648 ptr = oldptr;
00649 }
00650
00651
00652
00653
00654
00655 if ((c = *ptr) >= '8')
00656 {
00657 ptr--;
00658 c = 0;
00659 break;
00660 }
00661
00662
00663
00664
00665
00666
00667
00668 case '0':
00669 c -= '0';
00670 while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')
00671 c = c * 8 + *(++ptr) - '0';
00672 if (!utf8 && c > 255) *errorcodeptr = ERR51;
00673 break;
00674
00675
00676
00677
00678
00679 case 'x':
00680 if (ptr[1] == '{')
00681 {
00682 const uschar *pt = ptr + 2;
00683 int count = 0;
00684
00685 c = 0;
00686 while ((digitab[*pt] & ctype_xdigit) != 0)
00687 {
00688 register int cc = *pt++;
00689 if (c == 0 && cc == '0') continue;
00690 count++;
00691
00692 #ifndef EBCDIC
00693 if (cc >= 'a') cc -= 32;
00694 c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
00695 #else
00696 if (cc >= 'a' && cc <= 'z') cc += 64;
00697 c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
00698 #endif
00699 }
00700
00701 if (*pt == '}')
00702 {
00703 if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
00704 ptr = pt;
00705 break;
00706 }
00707
00708
00709
00710 }
00711
00712
00713
00714 c = 0;
00715 while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
00716 {
00717 int cc;
00718 cc = *(++ptr);
00719 #ifndef EBCDIC
00720 if (cc >= 'a') cc -= 32;
00721 c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
00722 #else
00723 if (cc <= 'z') cc += 64;
00724 c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
00725 #endif
00726 }
00727 break;
00728
00729
00730
00731
00732
00733 case 'c':
00734 c = *(++ptr);
00735 if (c == 0)
00736 {
00737 *errorcodeptr = ERR2;
00738 break;
00739 }
00740
00741 #ifndef EBCDIC
00742 if (c >= 'a' && c <= 'z') c -= 32;
00743 c ^= 0x40;
00744 #else
00745 if (c >= 'a' && c <= 'z') c += 64;
00746 c ^= 0xC0;
00747 #endif
00748 break;
00749
00750
00751
00752
00753
00754
00755
00756 default:
00757 if ((options & PCRE_EXTRA) != 0) switch(c)
00758 {
00759 default:
00760 *errorcodeptr = ERR3;
00761 break;
00762 }
00763 break;
00764 }
00765 }
00766
00767 *ptrptr = ptr;
00768 return c;
00769 }
00770
00771
00772
00773 #ifdef SUPPORT_UCP
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792 static int
00793 get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
00794 {
00795 int c, i, bot, top;
00796 const uschar *ptr = *ptrptr;
00797 char name[32];
00798
00799 c = *(++ptr);
00800 if (c == 0) goto ERROR_RETURN;
00801
00802 *negptr = FALSE;
00803
00804
00805
00806
00807 if (c == '{')
00808 {
00809 if (ptr[1] == '^')
00810 {
00811 *negptr = TRUE;
00812 ptr++;
00813 }
00814 for (i = 0; i < (int)sizeof(name) - 1; i++)
00815 {
00816 c = *(++ptr);
00817 if (c == 0) goto ERROR_RETURN;
00818 if (c == '}') break;
00819 name[i] = c;
00820 }
00821 if (c !='}') goto ERROR_RETURN;
00822 name[i] = 0;
00823 }
00824
00825
00826
00827 else
00828 {
00829 name[0] = c;
00830 name[1] = 0;
00831 }
00832
00833 *ptrptr = ptr;
00834
00835
00836
00837 bot = 0;
00838 top = _pcre_utt_size;
00839
00840 while (bot < top)
00841 {
00842 i = (bot + top) >> 1;
00843 c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);
00844 if (c == 0)
00845 {
00846 *dptr = _pcre_utt[i].value;
00847 return _pcre_utt[i].type;
00848 }
00849 if (c > 0) bot = i + 1; else top = i;
00850 }
00851
00852 *errorcodeptr = ERR47;
00853 *ptrptr = ptr;
00854 return -1;
00855
00856 ERROR_RETURN:
00857 *errorcodeptr = ERR46;
00858 *ptrptr = ptr;
00859 return -1;
00860 }
00861 #endif
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881 static BOOL
00882 is_counted_repeat(const uschar *p)
00883 {
00884 if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
00885 while ((digitab[*p] & ctype_digit) != 0) p++;
00886 if (*p == '}') return TRUE;
00887
00888 if (*p++ != ',') return FALSE;
00889 if (*p == '}') return TRUE;
00890
00891 if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
00892 while ((digitab[*p] & ctype_digit) != 0) p++;
00893
00894 return (*p == '}');
00895 }
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918 static const uschar *
00919 read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr)
00920 {
00921 int min = 0;
00922 int max = -1;
00923
00924
00925
00926
00927 while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
00928 if (min < 0 || min > 65535)
00929 {
00930 *errorcodeptr = ERR5;
00931 return p;
00932 }
00933
00934
00935
00936
00937 if (*p == '}') max = min; else
00938 {
00939 if (*(++p) != '}')
00940 {
00941 max = 0;
00942 while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
00943 if (max < 0 || max > 65535)
00944 {
00945 *errorcodeptr = ERR5;
00946 return p;
00947 }
00948 if (max < min)
00949 {
00950 *errorcodeptr = ERR4;
00951 return p;
00952 }
00953 }
00954 }
00955
00956
00957
00958
00959 *minp = min;
00960 *maxp = max;
00961 return p;
00962 }
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987 static int
00988 find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,
00989 BOOL xmode)
00990 {
00991 const uschar *thisname;
00992 int count = cd->bracount;
00993
00994 for (; *ptr != 0; ptr++)
00995 {
00996 int term;
00997
00998
00999
01000 if (*ptr == '\\')
01001 {
01002 if (*(++ptr) == 0) return -1;
01003 if (*ptr == 'Q') for (;;)
01004 {
01005 while (*(++ptr) != 0 && *ptr != '\\') {};
01006 if (*ptr == 0) return -1;
01007 if (*(++ptr) == 'E') break;
01008 }
01009 continue;
01010 }
01011
01012
01013
01014
01015
01016
01017 if (*ptr == '[')
01018 {
01019 BOOL negate_class = FALSE;
01020 for (;;)
01021 {
01022 int c = *(++ptr);
01023 if (c == '\\')
01024 {
01025 if (ptr[1] == 'E') ptr++;
01026 else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
01027 else break;
01028 }
01029 else if (!negate_class && c == '^')
01030 negate_class = TRUE;
01031 else break;
01032 }
01033
01034
01035
01036
01037 if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
01038 ptr++;
01039
01040 while (*(++ptr) != ']')
01041 {
01042 if (*ptr == 0) return -1;
01043 if (*ptr == '\\')
01044 {
01045 if (*(++ptr) == 0) return -1;
01046 if (*ptr == 'Q') for (;;)
01047 {
01048 while (*(++ptr) != 0 && *ptr != '\\') {};
01049 if (*ptr == 0) return -1;
01050 if (*(++ptr) == 'E') break;
01051 }
01052 continue;
01053 }
01054 }
01055 continue;
01056 }
01057
01058
01059
01060 if (xmode && *ptr == '#')
01061 {
01062 while (*(++ptr) != 0 && *ptr != '\n') {};
01063 if (*ptr == 0) return -1;
01064 continue;
01065 }
01066
01067
01068
01069 if (*ptr != '(') continue;
01070 if (ptr[1] != '?' && ptr[1] != '*')
01071 {
01072 count++;
01073 if (name == NULL && count == lorn) return count;
01074 continue;
01075 }
01076
01077 ptr += 2;
01078 if (*ptr == 'P') ptr++;
01079
01080
01081
01082 if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&
01083 *ptr != '\'')
01084 continue;
01085
01086 count++;
01087
01088 if (name == NULL && count == lorn) return count;
01089 term = *ptr++;
01090 if (term == '<') term = '>';
01091 thisname = ptr;
01092 while (*ptr != term) ptr++;
01093 if (name != NULL && lorn == ptr - thisname &&
01094 strncmp((const char *)name, (const char *)thisname, lorn) == 0)
01095 return count;
01096 }
01097
01098 return -1;
01099 }
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123 static const uschar*
01124 first_significant_code(const uschar *code, int *options, int optbit,
01125 BOOL skipassert)
01126 {
01127 for (;;)
01128 {
01129 switch ((int)*code)
01130 {
01131 case OP_OPT:
01132 if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
01133 *options = (int)code[1];
01134 code += 2;
01135 break;
01136
01137 case OP_ASSERT_NOT:
01138 case OP_ASSERTBACK:
01139 case OP_ASSERTBACK_NOT:
01140 if (!skipassert) return code;
01141 do code += GET(code, 1); while (*code == OP_ALT);
01142 code += _pcre_OP_lengths[*code];
01143 break;
01144
01145 case OP_WORD_BOUNDARY:
01146 case OP_NOT_WORD_BOUNDARY:
01147 if (!skipassert) return code;
01148
01149
01150 case OP_CALLOUT:
01151 case OP_CREF:
01152 case OP_RREF:
01153 case OP_DEF:
01154 code += _pcre_OP_lengths[*code];
01155 break;
01156
01157 default:
01158 return code;
01159 }
01160 }
01161
01162 }
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183 static int
01184 find_fixedlength(uschar *code, int options)
01185 {
01186 int length = -1;
01187
01188 register int branchlength = 0;
01189 register uschar *cc = code + 1 + LINK_SIZE;
01190
01191
01192
01193
01194 for (;;)
01195 {
01196 int d;
01197 register int op = *cc;
01198 switch (op)
01199 {
01200 case OP_CBRA:
01201 case OP_BRA:
01202 case OP_ONCE:
01203 case OP_COND:
01204 d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options);
01205 if (d < 0) return d;
01206 branchlength += d;
01207 do cc += GET(cc, 1); while (*cc == OP_ALT);
01208 cc += 1 + LINK_SIZE;
01209 break;
01210
01211
01212
01213
01214
01215 case OP_ALT:
01216 case OP_KET:
01217 case OP_KETRMAX:
01218 case OP_KETRMIN:
01219 case OP_END:
01220 if (length < 0) length = branchlength;
01221 else if (length != branchlength) return -1;
01222 if (*cc != OP_ALT) return length;
01223 cc += 1 + LINK_SIZE;
01224 branchlength = 0;
01225 break;
01226
01227
01228
01229 case OP_ASSERT:
01230 case OP_ASSERT_NOT:
01231 case OP_ASSERTBACK:
01232 case OP_ASSERTBACK_NOT:
01233 do cc += GET(cc, 1); while (*cc == OP_ALT);
01234
01235
01236
01237
01238 case OP_REVERSE:
01239 case OP_CREF:
01240 case OP_RREF:
01241 case OP_DEF:
01242 case OP_OPT:
01243 case OP_CALLOUT:
01244 case OP_SOD:
01245 case OP_SOM:
01246 case OP_EOD:
01247 case OP_EODN:
01248 case OP_CIRC:
01249 case OP_DOLL:
01250 case OP_NOT_WORD_BOUNDARY:
01251 case OP_WORD_BOUNDARY:
01252 cc += _pcre_OP_lengths[*cc];
01253 break;
01254
01255
01256
01257 case OP_CHAR:
01258 case OP_CHARNC:
01259 case OP_NOT:
01260 branchlength++;
01261 cc += 2;
01262 #ifdef SUPPORT_UTF8
01263 if ((options & PCRE_UTF8) != 0)
01264 {
01265 while ((*cc & 0xc0) == 0x80) cc++;
01266 }
01267 #endif
01268 break;
01269
01270
01271
01272
01273 case OP_EXACT:
01274 branchlength += GET2(cc,1);
01275 cc += 4;
01276 #ifdef SUPPORT_UTF8
01277 if ((options & PCRE_UTF8) != 0)
01278 {
01279 while((*cc & 0x80) == 0x80) cc++;
01280 }
01281 #endif
01282 break;
01283
01284 case OP_TYPEEXACT:
01285 branchlength += GET2(cc,1);
01286 if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
01287 cc += 4;
01288 break;
01289
01290
01291
01292 case OP_PROP:
01293 case OP_NOTPROP:
01294 cc += 2;
01295
01296
01297 case OP_NOT_DIGIT:
01298 case OP_DIGIT:
01299 case OP_NOT_WHITESPACE:
01300 case OP_WHITESPACE:
01301 case OP_NOT_WORDCHAR:
01302 case OP_WORDCHAR:
01303 case OP_ANY:
01304 case OP_ALLANY:
01305 branchlength++;
01306 cc++;
01307 break;
01308
01309
01310
01311 case OP_ANYBYTE:
01312 return -2;
01313
01314
01315
01316 #ifdef SUPPORT_UTF8
01317 case OP_XCLASS:
01318 cc += GET(cc, 1) - 33;
01319
01320 #endif
01321
01322 case OP_CLASS:
01323 case OP_NCLASS:
01324 cc += 33;
01325
01326 switch (*cc)
01327 {
01328 case OP_CRSTAR:
01329 case OP_CRMINSTAR:
01330 case OP_CRQUERY:
01331 case OP_CRMINQUERY:
01332 return -1;
01333
01334 case OP_CRRANGE:
01335 case OP_CRMINRANGE:
01336 if (GET2(cc,1) != GET2(cc,3)) return -1;
01337 branchlength += GET2(cc,1);
01338 cc += 5;
01339 break;
01340
01341 default:
01342 branchlength++;
01343 }
01344 break;
01345
01346
01347
01348 default:
01349 return -1;
01350 }
01351 }
01352
01353 }
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373 static const uschar *
01374 find_bracket(const uschar *code, BOOL utf8, int number)
01375 {
01376 for (;;)
01377 {
01378 register int c = *code;
01379 if (c == OP_END) return NULL;
01380
01381
01382
01383
01384
01385 if (c == OP_XCLASS) code += GET(code, 1);
01386
01387
01388
01389 else if (c == OP_CBRA)
01390 {
01391 int n = GET2(code, 1+LINK_SIZE);
01392 if (n == number) return (uschar *)code;
01393 code += _pcre_OP_lengths[c];
01394 }
01395
01396
01397
01398
01399
01400 else
01401 {
01402 switch(c)
01403 {
01404 case OP_TYPESTAR:
01405 case OP_TYPEMINSTAR:
01406 case OP_TYPEPLUS:
01407 case OP_TYPEMINPLUS:
01408 case OP_TYPEQUERY:
01409 case OP_TYPEMINQUERY:
01410 case OP_TYPEPOSSTAR:
01411 case OP_TYPEPOSPLUS:
01412 case OP_TYPEPOSQUERY:
01413 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
01414 break;
01415
01416 case OP_TYPEUPTO:
01417 case OP_TYPEMINUPTO:
01418 case OP_TYPEEXACT:
01419 case OP_TYPEPOSUPTO:
01420 if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
01421 break;
01422 }
01423
01424
01425
01426 code += _pcre_OP_lengths[c];
01427
01428
01429
01430
01431
01432 #ifdef SUPPORT_UTF8
01433 if (utf8) switch(c)
01434 {
01435 case OP_CHAR:
01436 case OP_CHARNC:
01437 case OP_EXACT:
01438 case OP_UPTO:
01439 case OP_MINUPTO:
01440 case OP_POSUPTO:
01441 case OP_STAR:
01442 case OP_MINSTAR:
01443 case OP_POSSTAR:
01444 case OP_PLUS:
01445 case OP_MINPLUS:
01446 case OP_POSPLUS:
01447 case OP_QUERY:
01448 case OP_MINQUERY:
01449 case OP_POSQUERY:
01450 if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
01451 break;
01452 }
01453 #else
01454 (void)(utf8);
01455 #endif
01456 }
01457 }
01458 }
01459
01460
01461
01462
01463
01464
01465
01466
01467
01468
01469
01470
01471
01472
01473
01474
01475
01476 static const uschar *
01477 find_recurse(const uschar *code, BOOL utf8)
01478 {
01479 for (;;)
01480 {
01481 register int c = *code;
01482 if (c == OP_END) return NULL;
01483 if (c == OP_RECURSE) return code;
01484
01485
01486
01487
01488
01489 if (c == OP_XCLASS) code += GET(code, 1);
01490
01491
01492
01493
01494
01495 else
01496 {
01497 switch(c)
01498 {
01499 case OP_TYPESTAR:
01500 case OP_TYPEMINSTAR:
01501 case OP_TYPEPLUS:
01502 case OP_TYPEMINPLUS:
01503 case OP_TYPEQUERY:
01504 case OP_TYPEMINQUERY:
01505 case OP_TYPEPOSSTAR:
01506 case OP_TYPEPOSPLUS:
01507 case OP_TYPEPOSQUERY:
01508 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
01509 break;
01510
01511 case OP_TYPEPOSUPTO:
01512 case OP_TYPEUPTO:
01513 case OP_TYPEMINUPTO:
01514 case OP_TYPEEXACT:
01515 if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
01516 break;
01517 }
01518
01519
01520
01521 code += _pcre_OP_lengths[c];
01522
01523
01524
01525
01526
01527 #ifdef SUPPORT_UTF8
01528 if (utf8) switch(c)
01529 {
01530 case OP_CHAR:
01531 case OP_CHARNC:
01532 case OP_EXACT:
01533 case OP_UPTO:
01534 case OP_MINUPTO:
01535 case OP_POSUPTO:
01536 case OP_STAR:
01537 case OP_MINSTAR:
01538 case OP_POSSTAR:
01539 case OP_PLUS:
01540 case OP_MINPLUS:
01541 case OP_POSPLUS:
01542 case OP_QUERY:
01543 case OP_MINQUERY:
01544 case OP_POSQUERY:
01545 if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
01546 break;
01547 }
01548 #else
01549 (void)(utf8);
01550 #endif
01551 }
01552 }
01553 }
01554
01555
01556
01557
01558
01559
01560
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
01574
01575
01576
01577 static BOOL
01578 could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)
01579 {
01580 register int c;
01581 for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
01582 code < endcode;
01583 code = first_significant_code(code + _pcre_OP_lengths[c], NULL, 0, TRUE))
01584 {
01585 const uschar *ccode;
01586
01587 c = *code;
01588
01589
01590
01591
01592 if (c == OP_ASSERT)
01593 {
01594 do code += GET(code, 1); while (*code == OP_ALT);
01595 c = *code;
01596 continue;
01597 }
01598
01599
01600
01601 if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)
01602 {
01603 code += _pcre_OP_lengths[c];
01604 do code += GET(code, 1); while (*code == OP_ALT);
01605 c = *code;
01606 continue;
01607 }
01608
01609
01610
01611 if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)
01612 {
01613 BOOL empty_branch;
01614 if (GET(code, 1) == 0) return TRUE;
01615
01616
01617
01618 empty_branch = FALSE;
01619 do
01620 {
01621 if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
01622 empty_branch = TRUE;
01623 code += GET(code, 1);
01624 }
01625 while (*code == OP_ALT);
01626 if (!empty_branch) return FALSE;
01627 c = *code;
01628 continue;
01629 }
01630
01631
01632
01633 switch (c)
01634 {
01635
01636
01637
01638
01639
01640
01641 #ifdef SUPPORT_UTF8
01642 case OP_XCLASS:
01643 ccode = code += GET(code, 1);
01644 goto CHECK_CLASS_REPEAT;
01645 #endif
01646
01647 case OP_CLASS:
01648 case OP_NCLASS:
01649 ccode = code + 33;
01650
01651 #ifdef SUPPORT_UTF8
01652 CHECK_CLASS_REPEAT:
01653 #endif
01654
01655 switch (*ccode)
01656 {
01657 case OP_CRSTAR:
01658 case OP_CRMINSTAR:
01659 case OP_CRQUERY:
01660 case OP_CRMINQUERY:
01661 break;
01662
01663 default:
01664 case OP_CRPLUS:
01665 case OP_CRMINPLUS:
01666 return FALSE;
01667
01668 case OP_CRRANGE:
01669 case OP_CRMINRANGE:
01670 if (GET2(ccode, 1) > 0) return FALSE;
01671 break;
01672 }
01673 break;
01674
01675
01676
01677 case OP_PROP:
01678 case OP_NOTPROP:
01679 case OP_EXTUNI:
01680 case OP_NOT_DIGIT:
01681 case OP_DIGIT:
01682 case OP_NOT_WHITESPACE:
01683 case OP_WHITESPACE:
01684 case OP_NOT_WORDCHAR:
01685 case OP_WORDCHAR:
01686 case OP_ANY:
01687 case OP_ALLANY:
01688 case OP_ANYBYTE:
01689 case OP_CHAR:
01690 case OP_CHARNC:
01691 case OP_NOT:
01692 case OP_PLUS:
01693 case OP_MINPLUS:
01694 case OP_POSPLUS:
01695 case OP_EXACT:
01696 case OP_NOTPLUS:
01697 case OP_NOTMINPLUS:
01698 case OP_NOTPOSPLUS:
01699 case OP_NOTEXACT:
01700 case OP_TYPEPLUS:
01701 case OP_TYPEMINPLUS:
01702 case OP_TYPEPOSPLUS:
01703 case OP_TYPEEXACT:
01704 return FALSE;
01705
01706
01707
01708
01709 case OP_TYPESTAR:
01710 case OP_TYPEMINSTAR:
01711 case OP_TYPEPOSSTAR:
01712 case OP_TYPEQUERY:
01713 case OP_TYPEMINQUERY:
01714 case OP_TYPEPOSQUERY:
01715 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
01716 break;
01717
01718
01719
01720 case OP_TYPEUPTO:
01721 case OP_TYPEMINUPTO:
01722 case OP_TYPEPOSUPTO:
01723 if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
01724 break;
01725
01726
01727
01728 case OP_KET:
01729 case OP_KETRMAX:
01730 case OP_KETRMIN:
01731 case OP_ALT:
01732 return TRUE;
01733
01734
01735
01736
01737 #ifdef SUPPORT_UTF8
01738 case OP_STAR:
01739 case OP_MINSTAR:
01740 case OP_POSSTAR:
01741 case OP_QUERY:
01742 case OP_MINQUERY:
01743 case OP_POSQUERY:
01744 case OP_UPTO:
01745 case OP_MINUPTO:
01746 case OP_POSUPTO:
01747 if (utf8) while ((code[2] & 0xc0) == 0x80) code++;
01748 break;
01749 #endif
01750 }
01751 }
01752
01753 return TRUE;
01754 }
01755
01756
01757
01758
01759
01760
01761
01762
01763
01764
01765
01766
01767
01768
01769
01770
01771
01772
01773
01774
01775
01776 static BOOL
01777 could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
01778 BOOL utf8)
01779 {
01780 while (bcptr != NULL && bcptr->current >= code)
01781 {
01782 if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;
01783 bcptr = bcptr->outer;
01784 }
01785 return TRUE;
01786 }
01787
01788
01789
01790
01791
01792
01793
01794
01795
01796
01797
01798
01799
01800
01801
01802
01803
01804
01805
01806
01807
01808
01809
01810
01811
01812
01813
01814
01815
01816
01817
01818
01819
01820
01821 static BOOL
01822 check_posix_syntax(const uschar *ptr, const uschar **endptr)
01823 {
01824 int terminator;
01825 terminator = *(++ptr);
01826 for (++ptr; *ptr != 0; ptr++)
01827 {
01828 if (*ptr == '\\' && ptr[1] == ']') ptr++; else
01829 {
01830 if (*ptr == ']') return FALSE;
01831 if (*ptr == terminator && ptr[1] == ']')
01832 {
01833 *endptr = ptr;
01834 return TRUE;
01835 }
01836 }
01837 }
01838 return FALSE;
01839 }
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858 static int
01859 check_posix_name(const uschar *ptr, int len)
01860 {
01861 const char *pn = posix_names;
01862 register int yield = 0;
01863 while (posix_name_lengths[yield] != 0)
01864 {
01865 if (len == posix_name_lengths[yield] &&
01866 strncmp((const char *)ptr, pn, len) == 0) return yield;
01867 pn += posix_name_lengths[yield] + 1;
01868 yield++;
01869 }
01870 return -1;
01871 }
01872
01873
01874
01875
01876
01877
01878
01879
01880
01881
01882
01883
01884
01885
01886
01887
01888
01889
01890
01891
01892
01893
01894
01895
01896
01897
01898
01899
01900
01901
01902
01903
01904
01905 static void
01906 adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,
01907 uschar *save_hwm)
01908 {
01909 uschar *ptr = group;
01910
01911 while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)
01912 {
01913 int offset;
01914 uschar *hc;
01915
01916
01917
01918
01919 for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
01920 {
01921 offset = GET(hc, 0);
01922 if (cd->start_code + offset == ptr + 1)
01923 {
01924 PUT(hc, 0, offset + adjust);
01925 break;
01926 }
01927 }
01928
01929
01930
01931
01932 if (hc >= cd->hwm)
01933 {
01934 offset = GET(ptr, 1);
01935 if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);
01936 }
01937
01938 ptr += 1 + LINK_SIZE;
01939 }
01940 }
01941
01942
01943
01944
01945
01946
01947
01948
01949
01950
01951
01952
01953
01954
01955
01956
01957
01958
01959 static uschar *
01960 auto_callout(uschar *code, const uschar *ptr, compile_data *cd)
01961 {
01962 *code++ = OP_CALLOUT;
01963 *code++ = 255;
01964 PUT(code, 0, ptr - cd->start_pattern);
01965 PUT(code, LINK_SIZE, 0);
01966 return code + 2*LINK_SIZE;
01967 }
01968
01969
01970
01971
01972
01973
01974
01975
01976
01977
01978
01979
01980
01981
01982
01983
01984
01985
01986
01987 static void
01988 complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)
01989 {
01990 int length = ptr - cd->start_pattern - GET(previous_callout, 2);
01991 PUT(previous_callout, 2 + LINK_SIZE, length);
01992 }
01993
01994
01995
01996 #ifdef SUPPORT_UCP
01997
01998
01999
02000
02001
02002
02003
02004
02005
02006
02007
02008
02009
02010
02011
02012
02013
02014
02015 static BOOL
02016 get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,
02017 unsigned int *odptr)
02018 {
02019 unsigned int c, othercase, next;
02020
02021 for (c = *cptr; c <= d; c++)
02022 { if ((othercase = UCD_OTHERCASE(c)) != c) break; }
02023
02024 if (c > d) return FALSE;
02025
02026 *ocptr = othercase;
02027 next = othercase + 1;
02028
02029 for (++c; c <= d; c++)
02030 {
02031 if (UCD_OTHERCASE(c) != next) break;
02032 next++;
02033 }
02034
02035 *odptr = next - 1;
02036 *cptr = c;
02037
02038 return TRUE;
02039 }
02040 #endif
02041
02042
02043
02044
02045
02046
02047
02048
02049
02050
02051
02052
02053
02054
02055
02056
02057
02058
02059
02060
02061
02062
02063
02064 static BOOL
02065 check_auto_possessive(int op_code, int item, BOOL utf8, uschar *utf8_char,
02066 const uschar *ptr, int options, compile_data *cd)
02067 {
02068 int next;
02069
02070
02071
02072 if ((options & PCRE_EXTENDED) != 0)
02073 {
02074 for (;;)
02075 {
02076 while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
02077 if (*ptr == '#')
02078 {
02079 while (*(++ptr) != 0)
02080 if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
02081 }
02082 else break;
02083 }
02084 }
02085
02086
02087
02088
02089 if (*ptr == '\\')
02090 {
02091 int temperrorcode = 0;
02092 next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
02093 if (temperrorcode != 0) return FALSE;
02094 ptr++;
02095 }
02096
02097 else if ((cd->ctypes[*ptr] & ctype_meta) == 0)
02098 {
02099 #ifdef SUPPORT_UTF8
02100 if (utf8) { GETCHARINC(next, ptr); } else
02101 #endif
02102 next = *ptr++;
02103 }
02104
02105 else return FALSE;
02106
02107
02108
02109 if ((options & PCRE_EXTENDED) != 0)
02110 {
02111 for (;;)
02112 {
02113 while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
02114 if (*ptr == '#')
02115 {
02116 while (*(++ptr) != 0)
02117 if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
02118 }
02119 else break;
02120 }
02121 }
02122
02123
02124
02125 if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)
02126 return FALSE;
02127
02128
02129
02130
02131
02132
02133
02134
02135
02136 if (next >= 0) switch(op_code)
02137 {
02138 case OP_CHAR:
02139 #ifdef SUPPORT_UTF8
02140 if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
02141 #else
02142 (void)(utf8_char);
02143 #endif
02144 return item != next;
02145
02146
02147
02148
02149
02150 case OP_CHARNC:
02151 #ifdef SUPPORT_UTF8
02152 if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
02153 #endif
02154 if (item == next) return FALSE;
02155 #ifdef SUPPORT_UTF8
02156 if (utf8)
02157 {
02158 unsigned int othercase;
02159 if (next < 128) othercase = cd->fcc[next]; else
02160 #ifdef SUPPORT_UCP
02161 othercase = UCD_OTHERCASE((unsigned int)next);
02162 #else
02163 othercase = NOTACHAR;
02164 #endif
02165 return (unsigned int)item != othercase;
02166 }
02167 else
02168 #endif
02169 return (item != cd->fcc[next]);
02170
02171
02172
02173 case OP_NOT:
02174 if (item == next) return TRUE;
02175 if ((options & PCRE_CASELESS) == 0) return FALSE;
02176 #ifdef SUPPORT_UTF8
02177 if (utf8)
02178 {
02179 unsigned int othercase;
02180 if (next < 128) othercase = cd->fcc[next]; else
02181 #ifdef SUPPORT_UCP
02182 othercase = UCD_OTHERCASE(next);
02183 #else
02184 othercase = NOTACHAR;
02185 #endif
02186 return (unsigned int)item == othercase;
02187 }
02188 else
02189 #endif
02190 return (item == cd->fcc[next]);
02191
02192 case OP_DIGIT:
02193 return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;
02194
02195 case OP_NOT_DIGIT:
02196 return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;
02197
02198 case OP_WHITESPACE:
02199 return next > 127 || (cd->ctypes[next] & ctype_space) == 0;
02200
02201 case OP_NOT_WHITESPACE:
02202 return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;
02203
02204 case OP_WORDCHAR:
02205 return next > 127 || (cd->ctypes[next] & ctype_word) == 0;
02206
02207 case OP_NOT_WORDCHAR:
02208 return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;
02209
02210 case OP_HSPACE:
02211 case OP_NOT_HSPACE:
02212 switch(next)
02213 {
02214 case 0x09:
02215 case 0x20:
02216 case 0xa0:
02217 case 0x1680:
02218 case 0x180e:
02219 case 0x2000:
02220 case 0x2001:
02221 case 0x2002:
02222 case 0x2003:
02223 case 0x2004:
02224 case 0x2005:
02225 case 0x2006:
02226 case 0x2007:
02227 case 0x2008:
02228 case 0x2009:
02229 case 0x200A:
02230 case 0x202f:
02231 case 0x205f:
02232 case 0x3000:
02233 return op_code != OP_HSPACE;
02234 default:
02235 return op_code == OP_HSPACE;
02236 }
02237
02238 case OP_VSPACE:
02239 case OP_NOT_VSPACE:
02240 switch(next)
02241 {
02242 case 0x0a:
02243 case 0x0b:
02244 case 0x0c:
02245 case 0x0d:
02246 case 0x85:
02247 case 0x2028:
02248 case 0x2029:
02249 return op_code != OP_VSPACE;
02250 default:
02251 return op_code == OP_VSPACE;
02252 }
02253
02254 default:
02255 return FALSE;
02256 }
02257
02258
02259
02260
02261 switch(op_code)
02262 {
02263 case OP_CHAR:
02264 case OP_CHARNC:
02265 #ifdef SUPPORT_UTF8
02266 if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
02267 #endif
02268 switch(-next)
02269 {
02270 case ESC_d:
02271 return item > 127 || (cd->ctypes[item] & ctype_digit) == 0;
02272
02273 case ESC_D:
02274 return item <= 127 && (cd->ctypes[item] & ctype_digit) != 0;
02275
02276 case ESC_s:
02277 return item > 127 || (cd->ctypes[item] & ctype_space) == 0;
02278
02279 case ESC_S:
02280 return item <= 127 && (cd->ctypes[item] & ctype_space) != 0;
02281
02282 case ESC_w:
02283 return item > 127 || (cd->ctypes[item] & ctype_word) == 0;
02284
02285 case ESC_W:
02286 return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
02287
02288 case ESC_h:
02289 case ESC_H:
02290 switch(item)
02291 {
02292 case 0x09:
02293 case 0x20:
02294 case 0xa0:
02295 case 0x1680:
02296 case 0x180e:
02297 case 0x2000:
02298 case 0x2001:
02299 case 0x2002:
02300 case 0x2003:
02301 case 0x2004:
02302 case 0x2005:
02303 case 0x2006:
02304 case 0x2007:
02305 case 0x2008:
02306 case 0x2009:
02307 case 0x200A:
02308 case 0x202f:
02309 case 0x205f:
02310 case 0x3000:
02311 return -next != ESC_h;
02312 default:
02313 return -next == ESC_h;
02314 }
02315
02316 case ESC_v:
02317 case ESC_V:
02318 switch(item)
02319 {
02320 case 0x0a:
02321 case 0x0b:
02322 case 0x0c:
02323 case 0x0d:
02324 case 0x85:
02325 case 0x2028:
02326 case 0x2029:
02327 return -next != ESC_v;
02328 default:
02329 return -next == ESC_v;
02330 }
02331
02332 default:
02333 return FALSE;
02334 }
02335
02336 case OP_DIGIT:
02337 return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
02338 next == -ESC_h || next == -ESC_v;
02339
02340 case OP_NOT_DIGIT:
02341 return next == -ESC_d;
02342
02343 case OP_WHITESPACE:
02344 return next == -ESC_S || next == -ESC_d || next == -ESC_w;
02345
02346 case OP_NOT_WHITESPACE:
02347 return next == -ESC_s || next == -ESC_h || next == -ESC_v;
02348
02349 case OP_HSPACE:
02350 return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;
02351
02352 case OP_NOT_HSPACE:
02353 return next == -ESC_h;
02354
02355
02356 case OP_VSPACE:
02357 return next == -ESC_V || next == -ESC_d || next == -ESC_w;
02358
02359 case OP_NOT_VSPACE:
02360 return next == -ESC_v;
02361
02362 case OP_WORDCHAR:
02363 return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
02364
02365 case OP_NOT_WORDCHAR:
02366 return next == -ESC_w || next == -ESC_d;
02367
02368 default:
02369 return FALSE;
02370 }
02371
02372
02373 }
02374
02375
02376
02377
02378
02379
02380
02381
02382
02383
02384
02385
02386
02387
02388
02389
02390
02391
02392
02393
02394
02395
02396
02397
02398
02399
02400
02401
02402
02403 static BOOL
02404 compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,
02405 int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
02406 compile_data *cd, int *lengthptr)
02407 {
02408 int repeat_type, op_type;
02409 int repeat_min = 0, repeat_max = 0;
02410 int bravalue = 0;
02411 int greedy_default, greedy_non_default;
02412 int firstbyte, reqbyte;
02413 int zeroreqbyte, zerofirstbyte;
02414 int req_caseopt, reqvary, tempreqvary;
02415 int options = *optionsptr;
02416 int after_manual_callout = 0;
02417 int length_prevgroup = 0;
02418 register int c;
02419 register uschar *code = *codeptr;
02420 uschar *last_code = code;
02421 uschar *orig_code = code;
02422 uschar *tempcode;
02423 BOOL inescq = FALSE;
02424 BOOL groupsetfirstbyte = FALSE;
02425 const uschar *ptr = *ptrptr;
02426 const uschar *tempptr;
02427 uschar *previous = NULL;
02428 uschar *previous_callout = NULL;
02429 uschar *save_hwm = NULL;
02430 uschar classbits[32];
02431
02432 #ifdef SUPPORT_UTF8
02433 BOOL class_utf8;
02434 BOOL utf8 = (options & PCRE_UTF8) != 0;
02435 uschar *class_utf8data;
02436 uschar *class_utf8data_base;
02437 uschar utf8_char[6];
02438 #else
02439 BOOL utf8 = FALSE;
02440 uschar *utf8_char = NULL;
02441 #endif
02442
02443 #ifdef DEBUG
02444 if (lengthptr != NULL) DPRINTF((">> start branch\n"));
02445 #endif
02446
02447
02448
02449 greedy_default = ((options & PCRE_UNGREEDY) != 0);
02450 greedy_non_default = greedy_default ^ 1;
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462 firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET;
02463
02464
02465
02466
02467
02468
02469 req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
02470
02471
02472
02473 for (;; ptr++)
02474 {
02475 BOOL negate_class;
02476 BOOL should_flip_negation;
02477 BOOL possessive_quantifier;
02478 BOOL is_quantifier;
02479 BOOL is_recurse;
02480 BOOL reset_bracount;
02481 int class_charcount;
02482 int class_lastchar;
02483 int newoptions;
02484 int recno;
02485 int refsign;
02486 int skipbytes;
02487 int subreqbyte;
02488 int subfirstbyte;
02489 int terminator;
02490 int mclength;
02491 uschar mcbuffer[8];
02492
02493
02494
02495 c = *ptr;
02496
02497
02498
02499
02500 if (lengthptr != NULL)
02501 {
02502 #ifdef DEBUG
02503 if (code > cd->hwm) cd->hwm = code;
02504 #endif
02505 if (code > cd->start_workspace + COMPILE_WORK_SIZE)
02506 {
02507 *errorcodeptr = ERR52;
02508 goto FAILED;
02509 }
02510
02511
02512
02513
02514
02515
02516
02517 if (code < last_code) code = last_code;
02518
02519
02520
02521 if (OFLOW_MAX - *lengthptr < code - last_code)
02522 {
02523 *errorcodeptr = ERR20;
02524 goto FAILED;
02525 }
02526
02527 *lengthptr += code - last_code;
02528 DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
02529
02530
02531
02532
02533
02534 if (previous != NULL)
02535 {
02536 if (previous > orig_code)
02537 {
02538 memmove(orig_code, previous, code - previous);
02539 code -= previous - orig_code;
02540 previous = orig_code;
02541 }
02542 }
02543 else code = orig_code;
02544
02545
02546
02547
02548 last_code = code;
02549 }
02550
02551
02552
02553
02554 else if (cd->hwm > cd->start_workspace + COMPILE_WORK_SIZE)
02555 {
02556 *errorcodeptr = ERR52;
02557 goto FAILED;
02558 }
02559
02560
02561
02562 if (inescq && c != 0)
02563 {
02564 if (c == '\\' && ptr[1] == 'E')
02565 {
02566 inescq = FALSE;
02567 ptr++;
02568 continue;
02569 }
02570 else
02571 {
02572 if (previous_callout != NULL)
02573 {
02574 if (lengthptr == NULL)
02575 complete_callout(previous_callout, ptr, cd);
02576 previous_callout = NULL;
02577 }
02578 if ((options & PCRE_AUTO_CALLOUT) != 0)
02579 {
02580 previous_callout = code;
02581 code = auto_callout(code, ptr, cd);
02582 }
02583 goto NORMAL_CHAR;
02584 }
02585 }
02586
02587
02588
02589
02590 is_quantifier = c == '*' || c == '+' || c == '?' ||
02591 (c == '{' && is_counted_repeat(ptr+1));
02592
02593 if (!is_quantifier && previous_callout != NULL &&
02594 after_manual_callout-- <= 0)
02595 {
02596 if (lengthptr == NULL)
02597 complete_callout(previous_callout, ptr, cd);
02598 previous_callout = NULL;
02599 }
02600
02601
02602
02603 if ((options & PCRE_EXTENDED) != 0)
02604 {
02605 if ((cd->ctypes[c] & ctype_space) != 0) continue;
02606 if (c == '#')
02607 {
02608 while (*(++ptr) != 0)
02609 {
02610 if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
02611 }
02612 if (*ptr != 0) continue;
02613
02614
02615 c = 0;
02616 }
02617 }
02618
02619
02620
02621 if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier)
02622 {
02623 previous_callout = code;
02624 code = auto_callout(code, ptr, cd);
02625 }
02626
02627 switch(c)
02628 {
02629
02630 case 0:
02631 case '|':
02632 case ')':
02633 *firstbyteptr = firstbyte;
02634 *reqbyteptr = reqbyte;
02635 *codeptr = code;
02636 *ptrptr = ptr;
02637 if (lengthptr != NULL)
02638 {
02639 if (OFLOW_MAX - *lengthptr < code - last_code)
02640 {
02641 *errorcodeptr = ERR20;
02642 goto FAILED;
02643 }
02644 *lengthptr += code - last_code;
02645 DPRINTF((">> end branch\n"));
02646 }
02647 return TRUE;
02648
02649
02650
02651
02652
02653
02654 case '^':
02655 if ((options & PCRE_MULTILINE) != 0)
02656 {
02657 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
02658 }
02659 previous = NULL;
02660 *code++ = OP_CIRC;
02661 break;
02662
02663 case '$':
02664 previous = NULL;
02665 *code++ = OP_DOLL;
02666 break;
02667
02668
02669
02670
02671 case '.':
02672 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
02673 zerofirstbyte = firstbyte;
02674 zeroreqbyte = reqbyte;
02675 previous = code;
02676 *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
02677 break;
02678
02679
02680
02681
02682
02683
02684
02685
02686
02687
02688
02689
02690
02691
02692
02693
02694
02695 case ']':
02696 if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
02697 {
02698 *errorcodeptr = ERR64;
02699 goto FAILED;
02700 }
02701 goto NORMAL_CHAR;
02702
02703 case '[':
02704 previous = code;
02705
02706
02707
02708
02709 if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
02710 check_posix_syntax(ptr, &tempptr))
02711 {
02712 *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
02713 goto FAILED;
02714 }
02715
02716
02717
02718
02719
02720 negate_class = FALSE;
02721 for (;;)
02722 {
02723 c = *(++ptr);
02724 if (c == '\\')
02725 {
02726 if (ptr[1] == 'E') ptr++;
02727 else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
02728 else break;
02729 }
02730 else if (!negate_class && c == '^')
02731 negate_class = TRUE;
02732 else break;
02733 }
02734
02735
02736
02737
02738
02739
02740 if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
02741 {
02742 *code++ = negate_class? OP_ALLANY : OP_FAIL;
02743 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
02744 zerofirstbyte = firstbyte;
02745 break;
02746 }
02747
02748
02749
02750
02751
02752 should_flip_negation = FALSE;
02753
02754
02755
02756
02757
02758 class_charcount = 0;
02759 class_lastchar = -1;
02760
02761
02762
02763
02764
02765
02766 memset(classbits, 0, 32 * sizeof(uschar));
02767
02768 #ifdef SUPPORT_UTF8
02769 class_utf8 = FALSE;
02770 class_utf8data = code + LINK_SIZE + 2;
02771 class_utf8data_base = class_utf8data;
02772 #endif
02773
02774
02775
02776
02777
02778 if (c != 0) do
02779 {
02780 const uschar *oldptr;
02781
02782 #ifdef SUPPORT_UTF8
02783 if (utf8 && c > 127)
02784 {
02785 GETCHARLEN(c, ptr, ptr);
02786 }
02787
02788
02789
02790
02791
02792
02793 if (lengthptr != NULL)
02794 {
02795 *lengthptr += class_utf8data - class_utf8data_base;
02796 class_utf8data = class_utf8data_base;
02797 }
02798
02799 #endif
02800
02801
02802
02803 if (inescq)
02804 {
02805 if (c == '\\' && ptr[1] == 'E')
02806 {
02807 inescq = FALSE;
02808 ptr++;
02809 continue;
02810 }
02811 goto CHECK_RANGE;
02812 }
02813
02814
02815
02816
02817
02818
02819
02820 if (c == '[' &&
02821 (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
02822 check_posix_syntax(ptr, &tempptr))
02823 {
02824 BOOL local_negate = FALSE;
02825 int posix_class, taboffset, tabopt;
02826 register const uschar *cbits = cd->cbits;
02827 uschar pbits[32];
02828
02829 if (ptr[1] != ':')
02830 {
02831 *errorcodeptr = ERR31;
02832 goto FAILED;
02833 }
02834
02835 ptr += 2;
02836 if (*ptr == '^')
02837 {
02838 local_negate = TRUE;
02839 should_flip_negation = TRUE;
02840 ptr++;
02841 }
02842
02843 posix_class = check_posix_name(ptr, tempptr - ptr);
02844 if (posix_class < 0)
02845 {
02846 *errorcodeptr = ERR30;
02847 goto FAILED;
02848 }
02849
02850
02851
02852
02853
02854 if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
02855 posix_class = 0;
02856
02857
02858
02859
02860
02861
02862 posix_class *= 3;
02863
02864
02865
02866 memcpy(pbits, cbits + posix_class_maps[posix_class],
02867 32 * sizeof(uschar));
02868
02869
02870
02871 taboffset = posix_class_maps[posix_class + 1];
02872 tabopt = posix_class_maps[posix_class + 2];
02873
02874 if (taboffset >= 0)
02875 {
02876 if (tabopt >= 0)
02877 for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset];
02878 else
02879 for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset];
02880 }
02881
02882
02883
02884
02885 if (tabopt < 0) tabopt = -tabopt;
02886 if (tabopt == 1) pbits[1] &= ~0x3c;
02887 else if (tabopt == 2) pbits[11] &= 0x7f;
02888
02889
02890
02891
02892 if (local_negate)
02893 for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c];
02894 else
02895 for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
02896
02897 ptr = tempptr + 1;
02898 class_charcount = 10;
02899 continue;
02900 }
02901
02902
02903
02904
02905
02906
02907
02908
02909 if (c == '\\')
02910 {
02911 c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
02912 if (*errorcodeptr != 0) goto FAILED;
02913
02914 if (-c == ESC_b) c = '\b';
02915 else if (-c == ESC_X) c = 'X';
02916 else if (-c == ESC_R) c = 'R';
02917 else if (-c == ESC_Q)
02918 {
02919 if (ptr[1] == '\\' && ptr[2] == 'E')
02920 {
02921 ptr += 2;
02922 }
02923 else inescq = TRUE;
02924 continue;
02925 }
02926 else if (-c == ESC_E) continue;
02927
02928 if (c < 0)
02929 {
02930 register const uschar *cbits = cd->cbits;
02931 class_charcount += 2;
02932
02933
02934
02935 if (lengthptr == NULL) switch (-c)
02936 {
02937 case ESC_d:
02938 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];
02939 continue;
02940
02941 case ESC_D:
02942 should_flip_negation = TRUE;
02943 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
02944 continue;
02945
02946 case ESC_w:
02947 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word];
02948 continue;
02949
02950 case ESC_W:
02951 should_flip_negation = TRUE;
02952 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
02953 continue;
02954
02955 case ESC_s:
02956 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
02957 classbits[1] &= ~0x08;
02958 continue;
02959
02960 case ESC_S:
02961 should_flip_negation = TRUE;
02962 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
02963 classbits[1] |= 0x08;
02964 continue;
02965
02966 default:
02967 break;
02968 }
02969
02970
02971
02972 else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||
02973 c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;
02974
02975
02976
02977
02978 if (-c == ESC_h)
02979 {
02980 SETBIT(classbits, 0x09);
02981 SETBIT(classbits, 0x20);
02982 SETBIT(classbits, 0xa0);
02983 #ifdef SUPPORT_UTF8
02984 if (utf8)
02985 {
02986 class_utf8 = TRUE;
02987 *class_utf8data++ = XCL_SINGLE;
02988 class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);
02989 *class_utf8data++ = XCL_SINGLE;
02990 class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);
02991 *class_utf8data++ = XCL_RANGE;
02992 class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);
02993 class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);
02994 *class_utf8data++ = XCL_SINGLE;
02995 class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);
02996 *class_utf8data++ = XCL_SINGLE;
02997 class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);
02998 *class_utf8data++ = XCL_SINGLE;
02999 class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);
03000 }
03001 #endif
03002 continue;
03003 }
03004
03005 if (-c == ESC_H)
03006 {
03007 for (c = 0; c < 32; c++)
03008 {
03009 int x = 0xff;
03010 switch (c)
03011 {
03012 case 0x09/8: x ^= 1 << (0x09%8); break;
03013 case 0x20/8: x ^= 1 << (0x20%8); break;
03014 case 0xa0/8: x ^= 1 << (0xa0%8); break;
03015 default: break;
03016 }
03017 classbits[c] |= x;
03018 }
03019
03020 #ifdef SUPPORT_UTF8
03021 if (utf8)
03022 {
03023 class_utf8 = TRUE;
03024 *class_utf8data++ = XCL_RANGE;
03025 class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
03026 class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);
03027 *class_utf8data++ = XCL_RANGE;
03028 class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);
03029 class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);
03030 *class_utf8data++ = XCL_RANGE;
03031 class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);
03032 class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);
03033 *class_utf8data++ = XCL_RANGE;
03034 class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);
03035 class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);
03036 *class_utf8data++ = XCL_RANGE;
03037 class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);
03038 class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);
03039 *class_utf8data++ = XCL_RANGE;
03040 class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);
03041 class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);
03042 *class_utf8data++ = XCL_RANGE;
03043 class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);
03044 class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
03045 }
03046 #endif
03047 continue;
03048 }
03049
03050 if (-c == ESC_v)
03051 {
03052 SETBIT(classbits, 0x0a);
03053 SETBIT(classbits, 0x0b);
03054 SETBIT(classbits, 0x0c);
03055 SETBIT(classbits, 0x0d);
03056 SETBIT(classbits, 0x85);
03057 #ifdef SUPPORT_UTF8
03058 if (utf8)
03059 {
03060 class_utf8 = TRUE;
03061 *class_utf8data++ = XCL_RANGE;
03062 class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);
03063 class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
03064 }
03065 #endif
03066 continue;
03067 }
03068
03069 if (-c == ESC_V)
03070 {
03071 for (c = 0; c < 32; c++)
03072 {
03073 int x = 0xff;
03074 switch (c)
03075 {
03076 case 0x0a/8: x ^= 1 << (0x0a%8);
03077 x ^= 1 << (0x0b%8);
03078 x ^= 1 << (0x0c%8);
03079 x ^= 1 << (0x0d%8);
03080 break;
03081 case 0x85/8: x ^= 1 << (0x85%8); break;
03082 default: break;
03083 }
03084 classbits[c] |= x;
03085 }
03086
03087 #ifdef SUPPORT_UTF8
03088 if (utf8)
03089 {
03090 class_utf8 = TRUE;
03091 *class_utf8data++ = XCL_RANGE;
03092 class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
03093 class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);
03094 *class_utf8data++ = XCL_RANGE;
03095 class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
03096 class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
03097 }
03098 #endif
03099 continue;
03100 }
03101
03102
03103
03104 #ifdef SUPPORT_UCP
03105 if (-c == ESC_p || -c == ESC_P)
03106 {
03107 BOOL negated;
03108 int pdata;
03109 int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
03110 if (ptype < 0) goto FAILED;
03111 class_utf8 = TRUE;
03112 *class_utf8data++ = ((-c == ESC_p) != negated)?
03113 XCL_PROP : XCL_NOTPROP;
03114 *class_utf8data++ = ptype;
03115 *class_utf8data++ = pdata;
03116 class_charcount -= 2;
03117 continue;
03118 }
03119 #endif
03120
03121
03122
03123
03124 if ((options & PCRE_EXTRA) != 0)
03125 {
03126 *errorcodeptr = ERR7;
03127 goto FAILED;
03128 }
03129
03130 class_charcount -= 2;
03131 c = *ptr;
03132 }
03133
03134
03135
03136
03137 }
03138
03139
03140
03141
03142
03143
03144 CHECK_RANGE:
03145 while (ptr[1] == '\\' && ptr[2] == 'E')
03146 {
03147 inescq = FALSE;
03148 ptr += 2;
03149 }
03150
03151 oldptr = ptr;
03152
03153
03154
03155 if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;
03156
03157
03158
03159 if (!inescq && ptr[1] == '-')
03160 {
03161 int d;
03162 ptr += 2;
03163 while (*ptr == '\\' && ptr[1] == 'E') ptr += 2;
03164
03165
03166
03167
03168 while (*ptr == '\\' && ptr[1] == 'Q')
03169 {
03170 ptr += 2;
03171 if (*ptr == '\\' && ptr[1] == 'E') { ptr += 2; continue; }
03172 inescq = TRUE;
03173 break;
03174 }
03175
03176 if (*ptr == 0 || (!inescq && *ptr == ']'))
03177 {
03178 ptr = oldptr;
03179 goto LONE_SINGLE_CHARACTER;
03180 }
03181
03182 #ifdef SUPPORT_UTF8
03183 if (utf8)
03184 {
03185 GETCHARLEN(d, ptr, ptr);
03186 }
03187 else
03188 #endif
03189 d = *ptr;
03190
03191
03192
03193
03194
03195 if (!inescq && d == '\\')
03196 {
03197 d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
03198 if (*errorcodeptr != 0) goto FAILED;
03199
03200
03201
03202
03203 if (d < 0)
03204 {
03205 if (d == -ESC_b) d = '\b';
03206 else if (d == -ESC_X) d = 'X';
03207 else if (d == -ESC_R) d = 'R'; else
03208 {
03209 ptr = oldptr;
03210 goto LONE_SINGLE_CHARACTER;
03211 }
03212 }
03213 }
03214
03215
03216
03217
03218 if (d < c)
03219 {
03220 *errorcodeptr = ERR8;
03221 goto FAILED;
03222 }
03223
03224 if (d == c) goto LONE_SINGLE_CHARACTER;
03225
03226
03227
03228 if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;
03229
03230
03231
03232
03233
03234
03235 #ifdef SUPPORT_UTF8
03236 if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
03237 {
03238 class_utf8 = TRUE;
03239
03240
03241
03242
03243
03244 #ifdef SUPPORT_UCP
03245 if ((options & PCRE_CASELESS) != 0)
03246 {
03247 unsigned int occ, ocd;
03248 unsigned int cc = c;
03249 unsigned int origd = d;
03250 while (get_othercase_range(&cc, origd, &occ, &ocd))
03251 {
03252 if (occ >= (unsigned int)c &&
03253 ocd <= (unsigned int)d)
03254 continue;
03255
03256 if (occ < (unsigned int)c &&
03257 ocd >= (unsigned int)c - 1)
03258 {
03259 c = occ;
03260 continue;
03261 }
03262 if (ocd > (unsigned int)d &&
03263 occ <= (unsigned int)d + 1)
03264 {
03265 d = ocd;
03266 continue;
03267 }
03268
03269 if (occ == ocd)
03270 {
03271 *class_utf8data++ = XCL_SINGLE;
03272 }
03273 else
03274 {
03275 *class_utf8data++ = XCL_RANGE;
03276 class_utf8data += _pcre_ord2utf8(occ, class_utf8data);
03277 }
03278 class_utf8data += _pcre_ord2utf8(ocd, class_utf8data);
03279 }
03280 }
03281 #endif
03282
03283
03284
03285
03286 *class_utf8data++ = XCL_RANGE;
03287 class_utf8data += _pcre_ord2utf8(c, class_utf8data);
03288 class_utf8data += _pcre_ord2utf8(d, class_utf8data);
03289
03290
03291
03292
03293
03294 #ifdef SUPPORT_UCP
03295 continue;
03296 #else
03297 if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
03298
03299
03300
03301 d = 127;
03302
03303 #endif
03304 }
03305 #endif
03306
03307
03308
03309
03310
03311 class_charcount += d - c + 1;
03312 class_lastchar = d;
03313
03314
03315
03316 if (lengthptr == NULL) for (; c <= d; c++)
03317 {
03318 classbits[c/8] |= (1 << (c&7));
03319 if ((options & PCRE_CASELESS) != 0)
03320 {
03321 int uc = cd->fcc[c];
03322 classbits[uc/8] |= (1 << (uc&7));
03323 }
03324 }
03325
03326 continue;
03327 }
03328
03329
03330
03331
03332
03333 LONE_SINGLE_CHARACTER:
03334
03335
03336
03337 #ifdef SUPPORT_UTF8
03338 if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
03339 {
03340 class_utf8 = TRUE;
03341 *class_utf8data++ = XCL_SINGLE;
03342 class_utf8data += _pcre_ord2utf8(c, class_utf8data);
03343
03344 #ifdef SUPPORT_UCP
03345 if ((options & PCRE_CASELESS) != 0)
03346 {
03347 unsigned int othercase;
03348 if ((othercase = UCD_OTHERCASE(c)) != c)
03349 {
03350 *class_utf8data++ = XCL_SINGLE;
03351 class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
03352 }
03353 }
03354 #endif
03355
03356 }
03357 else
03358 #endif
03359
03360
03361 {
03362 classbits[c/8] |= (1 << (c&7));
03363 if ((options & PCRE_CASELESS) != 0)
03364 {
03365 c = cd->fcc[c];
03366 classbits[c/8] |= (1 << (c&7));
03367 }
03368 class_charcount++;
03369 class_lastchar = c;
03370 }
03371 }
03372
03373
03374
03375 while ((c = *(++ptr)) != 0 && (c != ']' || inescq));
03376
03377 if (c == 0)
03378 {
03379 *errorcodeptr = ERR6;
03380 goto FAILED;
03381 }
03382
03383
03384
03385
03386
03387
03388 #if 0
03389
03390
03391 if (negate_class)
03392 {
03393 if ((classbits[1] & 0x24) != 0x24) cd->external_flags |= PCRE_HASCRORLF;
03394 }
03395 else
03396 {
03397 if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF;
03398 }
03399 #endif
03400
03401
03402
03403
03404
03405
03406
03407
03408
03409
03410
03411
03412
03413
03414
03415
03416
03417
03418
03419 #ifdef SUPPORT_UTF8
03420 if (class_charcount == 1 && !class_utf8 &&
03421 (!utf8 || !negate_class || class_lastchar < 128))
03422 #else
03423 if (class_charcount == 1)
03424 #endif
03425 {
03426 zeroreqbyte = reqbyte;
03427
03428
03429
03430 if (negate_class)
03431 {
03432 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
03433 zerofirstbyte = firstbyte;
03434 *code++ = OP_NOT;
03435 *code++ = class_lastchar;
03436 break;
03437 }
03438
03439
03440
03441
03442 #ifdef SUPPORT_UTF8
03443 if (utf8 && class_lastchar > 127)
03444 mclength = _pcre_ord2utf8(class_lastchar, mcbuffer);
03445 else
03446 #endif
03447 {
03448 mcbuffer[0] = class_lastchar;
03449 mclength = 1;
03450 }
03451 goto ONE_CHAR;
03452 }
03453
03454
03455
03456
03457
03458
03459 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
03460 zerofirstbyte = firstbyte;
03461 zeroreqbyte = reqbyte;
03462
03463
03464
03465
03466
03467
03468
03469
03470 #ifdef SUPPORT_UTF8
03471 if (class_utf8 && !should_flip_negation)
03472 {
03473 *class_utf8data++ = XCL_END;
03474 *code++ = OP_XCLASS;
03475 code += LINK_SIZE;
03476 *code = negate_class? XCL_NOT : 0;
03477
03478
03479
03480
03481 if (class_charcount > 0)
03482 {
03483 *code++ |= XCL_MAP;
03484 memmove(code + 32, code, class_utf8data - code);
03485 memcpy(code, classbits, 32);
03486 code = class_utf8data + 32;
03487 }
03488 else code = class_utf8data;
03489
03490
03491
03492 PUT(previous, 1, code - previous);
03493 break;
03494 }
03495 #endif
03496
03497
03498
03499
03500
03501
03502 *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
03503 if (negate_class)
03504 {
03505 if (lengthptr == NULL)
03506 for (c = 0; c < 32; c++) code[c] = ~classbits[c];
03507 }
03508 else
03509 {
03510 memcpy(code, classbits, 32);
03511 }
03512 code += 32;
03513 break;
03514
03515
03516
03517
03518
03519
03520 case '{':
03521 if (!is_quantifier) goto NORMAL_CHAR;
03522 ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
03523 if (*errorcodeptr != 0) goto FAILED;
03524 goto REPEAT;
03525
03526 case '*':
03527 repeat_min = 0;
03528 repeat_max = -1;
03529 goto REPEAT;
03530
03531 case '+':
03532 repeat_min = 1;
03533 repeat_max = -1;
03534 goto REPEAT;
03535
03536 case '?':
03537 repeat_min = 0;
03538 repeat_max = 1;
03539
03540 REPEAT:
03541 if (previous == NULL)
03542 {
03543 *errorcodeptr = ERR9;
03544 goto FAILED;
03545 }
03546
03547 if (repeat_min == 0)
03548 {
03549 firstbyte = zerofirstbyte;
03550 reqbyte = zeroreqbyte;
03551 }
03552
03553
03554
03555 reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
03556
03557 op_type = 0;
03558 possessive_quantifier = FALSE;
03559
03560
03561
03562
03563 tempcode = previous;
03564
03565
03566
03567
03568
03569
03570
03571 if (ptr[1] == '+')
03572 {
03573 repeat_type = 0;
03574 possessive_quantifier = TRUE;
03575 ptr++;
03576 }
03577 else if (ptr[1] == '?')
03578 {
03579 repeat_type = greedy_non_default;
03580 ptr++;
03581 }
03582 else repeat_type = greedy_default;
03583
03584
03585
03586
03587
03588
03589
03590 if (*previous == OP_CHAR || *previous == OP_CHARNC)
03591 {
03592
03593
03594
03595
03596
03597 #ifdef SUPPORT_UTF8
03598 if (utf8 && (code[-1] & 0x80) != 0)
03599 {
03600 uschar *lastchar = code - 1;
03601 while((*lastchar & 0xc0) == 0x80) lastchar--;
03602 c = code - lastchar;
03603 memcpy(utf8_char, lastchar, c);
03604 c |= 0x80;
03605 }
03606 else
03607 #endif
03608
03609
03610
03611
03612 {
03613 c = code[-1];
03614 if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt;
03615 }
03616
03617
03618
03619
03620
03621
03622 if (!possessive_quantifier &&
03623 repeat_max < 0 &&
03624 check_auto_possessive(*previous, c, utf8, utf8_char, ptr + 1,
03625 options, cd))
03626 {
03627 repeat_type = 0;
03628 possessive_quantifier = TRUE;
03629 }
03630
03631 goto OUTPUT_SINGLE_REPEAT;
03632 }
03633
03634
03635
03636
03637
03638
03639
03640 else if (*previous == OP_NOT)
03641 {
03642 op_type = OP_NOTSTAR - OP_STAR;
03643 c = previous[1];
03644 if (!possessive_quantifier &&
03645 repeat_max < 0 &&
03646 check_auto_possessive(OP_NOT, c, utf8, NULL, ptr + 1, options, cd))
03647 {
03648 repeat_type = 0;
03649 possessive_quantifier = TRUE;
03650 }
03651 goto OUTPUT_SINGLE_REPEAT;
03652 }
03653
03654
03655
03656
03657
03658
03659
03660
03661 else if (*previous < OP_EODN)
03662 {
03663 uschar *oldcode;
03664 int prop_type, prop_value;
03665 op_type = OP_TYPESTAR - OP_STAR;
03666 c = *previous;
03667
03668 if (!possessive_quantifier &&
03669 repeat_max < 0 &&
03670 check_auto_possessive(c, 0, utf8, NULL, ptr + 1, options, cd))
03671 {
03672 repeat_type = 0;
03673 possessive_quantifier = TRUE;
03674 }
03675
03676 OUTPUT_SINGLE_REPEAT:
03677 if (*previous == OP_PROP || *previous == OP_NOTPROP)
03678 {
03679 prop_type = previous[1];
03680 prop_value = previous[2];
03681 }
03682 else prop_type = prop_value = -1;
03683
03684 oldcode = code;
03685 code = previous;
03686
03687
03688
03689
03690 if (repeat_max == 0) goto END_REPEAT;
03691
03692
03693
03694
03695 if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
03696
03697
03698
03699 repeat_type += op_type;
03700
03701
03702
03703
03704 if (repeat_min == 0)
03705 {
03706 if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
03707 else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
03708 else
03709 {
03710 *code++ = OP_UPTO + repeat_type;
03711 PUT2INC(code, 0, repeat_max);
03712 }
03713 }
03714
03715
03716
03717
03718
03719
03720 else if (repeat_min == 1)
03721 {
03722 if (repeat_max == -1)
03723 *code++ = OP_PLUS + repeat_type;
03724 else
03725 {
03726 code = oldcode;
03727 if (repeat_max == 1) goto END_REPEAT;
03728 *code++ = OP_UPTO + repeat_type;
03729 PUT2INC(code, 0, repeat_max - 1);
03730 }
03731 }
03732
03733
03734
03735
03736 else
03737 {
03738 *code++ = OP_EXACT + op_type;
03739 PUT2INC(code, 0, repeat_min);
03740
03741
03742
03743
03744
03745
03746
03747 if (repeat_max < 0)
03748 {
03749 #ifdef SUPPORT_UTF8
03750 if (utf8 && c >= 128)
03751 {
03752 memcpy(code, utf8_char, c & 7);
03753 code += c & 7;
03754 }
03755 else
03756 #endif
03757 {
03758 *code++ = c;
03759 if (prop_type >= 0)
03760 {
03761 *code++ = prop_type;
03762 *code++ = prop_value;
03763 }
03764 }
03765 *code++ = OP_STAR + repeat_type;
03766 }
03767
03768
03769
03770
03771
03772 else if (repeat_max != repeat_min)
03773 {
03774 #ifdef SUPPORT_UTF8
03775 if (utf8 && c >= 128)
03776 {
03777 memcpy(code, utf8_char, c & 7);
03778 code += c & 7;
03779 }
03780 else
03781 #endif
03782 *code++ = c;
03783 if (prop_type >= 0)
03784 {
03785 *code++ = prop_type;
03786 *code++ = prop_value;
03787 }
03788 repeat_max -= repeat_min;
03789
03790 if (repeat_max == 1)
03791 {
03792 *code++ = OP_QUERY + repeat_type;
03793 }
03794 else
03795 {
03796 *code++ = OP_UPTO + repeat_type;
03797 PUT2INC(code, 0, repeat_max);
03798 }
03799 }
03800 }
03801
03802
03803
03804 #ifdef SUPPORT_UTF8
03805 if (utf8 && c >= 128)
03806 {
03807 memcpy(code, utf8_char, c & 7);
03808 code += c & 7;
03809 }
03810 else
03811 #endif
03812 *code++ = c;
03813
03814
03815
03816
03817 #ifdef SUPPORT_UCP
03818 if (prop_type >= 0)
03819 {
03820 *code++ = prop_type;
03821 *code++ = prop_value;
03822 }
03823 #endif
03824 }
03825
03826
03827
03828
03829 else if (*previous == OP_CLASS ||
03830 *previous == OP_NCLASS ||
03831 #ifdef SUPPORT_UTF8
03832 *previous == OP_XCLASS ||
03833 #endif
03834 *previous == OP_REF)
03835 {
03836 if (repeat_max == 0)
03837 {
03838 code = previous;
03839 goto END_REPEAT;
03840 }
03841
03842
03843
03844
03845 if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
03846
03847 if (repeat_min == 0 && repeat_max == -1)
03848 *code++ = OP_CRSTAR + repeat_type;
03849 else if (repeat_min == 1 && repeat_max == -1)
03850 *code++ = OP_CRPLUS + repeat_type;
03851 else if (repeat_min == 0 && repeat_max == 1)
03852 *code++ = OP_CRQUERY + repeat_type;
03853 else
03854 {
03855 *code++ = OP_CRRANGE + repeat_type;
03856 PUT2INC(code, 0, repeat_min);
03857 if (repeat_max == -1) repeat_max = 0;
03858 PUT2INC(code, 0, repeat_max);
03859 }
03860 }
03861
03862
03863
03864
03865 else if (*previous == OP_BRA || *previous == OP_CBRA ||
03866 *previous == OP_ONCE || *previous == OP_COND)
03867 {
03868 register int i;
03869 int ketoffset = 0;
03870 int len = code - previous;
03871 uschar *bralink = NULL;
03872
03873
03874
03875 if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
03876 {
03877 *errorcodeptr = ERR55;
03878 goto FAILED;
03879 }
03880
03881
03882
03883
03884
03885
03886
03887 if (repeat_max == -1)
03888 {
03889 register uschar *ket = previous;
03890 do ket += GET(ket, 1); while (*ket != OP_KET);
03891 ketoffset = code - ket;
03892 }
03893
03894
03895
03896
03897
03898
03899
03900
03901 if (repeat_min == 0)
03902 {
03903
03904
03905
03906
03907
03908
03909
03910
03911
03912
03913
03914
03915
03916
03917
03918
03919
03920
03921
03922
03923
03924 if (repeat_max <= 1)
03925 {
03926 *code = OP_END;
03927 adjust_recurse(previous, 1, utf8, cd, save_hwm);
03928 memmove(previous+1, previous, len);
03929 code++;
03930 if (repeat_max == 0)
03931 {
03932 *previous++ = OP_SKIPZERO;
03933 goto END_REPEAT;
03934 }
03935 *previous++ = OP_BRAZERO + repeat_type;
03936 }
03937
03938
03939
03940
03941
03942
03943
03944
03945
03946 else
03947 {
03948 int offset;
03949 *code = OP_END;
03950 adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);
03951 memmove(previous + 2 + LINK_SIZE, previous, len);
03952 code += 2 + LINK_SIZE;
03953 *previous++ = OP_BRAZERO + repeat_type;
03954 *previous++ = OP_BRA;
03955
03956
03957
03958
03959 offset = (bralink == NULL)? 0 : previous - bralink;
03960 bralink = previous;
03961 PUTINC(previous, 0, offset);
03962 }
03963
03964 repeat_max--;
03965 }
03966
03967
03968
03969
03970
03971
03972
03973
03974 else
03975 {
03976 if (repeat_min > 1)
03977 {
03978
03979
03980
03981
03982 if (lengthptr != NULL)
03983 {
03984 int delta = (repeat_min - 1)*length_prevgroup;
03985 if ((double)(repeat_min - 1)*(double)length_prevgroup >
03986 (double)INT_MAX ||
03987 OFLOW_MAX - *lengthptr < delta)
03988 {
03989 *errorcodeptr = ERR20;
03990 goto FAILED;
03991 }
03992 *lengthptr += delta;
03993 }
03994
03995
03996
03997 else
03998 {
03999 if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
04000 for (i = 1; i < repeat_min; i++)
04001 {
04002 uschar *hc;
04003 uschar *this_hwm = cd->hwm;
04004 memcpy(code, previous, len);
04005 for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
04006 {
04007 PUT(cd->hwm, 0, GET(hc, 0) + len);
04008 cd->hwm += LINK_SIZE;
04009 }
04010 save_hwm = this_hwm;
04011 code += len;
04012 }
04013 }
04014 }
04015
04016 if (repeat_max > 0) repeat_max -= repeat_min;
04017 }
04018
04019
04020
04021
04022
04023
04024
04025
04026 if (repeat_max >= 0)
04027 {
04028
04029
04030
04031
04032
04033
04034 if (lengthptr != NULL && repeat_max > 0)
04035 {
04036 int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
04037 2 - 2*LINK_SIZE;
04038 if ((double)repeat_max *
04039 (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
04040 > (double)INT_MAX ||
04041 OFLOW_MAX - *lengthptr < delta)
04042 {
04043 *errorcodeptr = ERR20;
04044 goto FAILED;
04045 }
04046 *lengthptr += delta;
04047 }
04048
04049
04050
04051 else for (i = repeat_max - 1; i >= 0; i--)
04052 {
04053 uschar *hc;
04054 uschar *this_hwm = cd->hwm;
04055
04056 *code++ = OP_BRAZERO + repeat_type;
04057
04058
04059
04060
04061 if (i != 0)
04062 {
04063 int offset;
04064 *code++ = OP_BRA;
04065 offset = (bralink == NULL)? 0 : code - bralink;
04066 bralink = code;
04067 PUTINC(code, 0, offset);
04068 }
04069
04070 memcpy(code, previous, len);
04071 for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
04072 {
04073 PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
04074 cd->hwm += LINK_SIZE;
04075 }
04076 save_hwm = this_hwm;
04077 code += len;
04078 }
04079
04080
04081
04082
04083 while (bralink != NULL)
04084 {
04085 int oldlinkoffset;
04086 int offset = code - bralink + 1;
04087 uschar *bra = code - offset;
04088 oldlinkoffset = GET(bra, 1);
04089 bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
04090 *code++ = OP_KET;
04091 PUTINC(code, 0, offset);
04092 PUT(bra, 1, offset);
04093 }
04094 }
04095
04096
04097
04098
04099
04100
04101
04102
04103
04104
04105
04106
04107 else
04108 {
04109 uschar *ketcode = code - ketoffset;
04110 uschar *bracode = ketcode - GET(ketcode, 1);
04111 *ketcode = OP_KETRMAX + repeat_type;
04112 if (lengthptr == NULL && *bracode != OP_ONCE)
04113 {
04114 uschar *scode = bracode;
04115 do
04116 {
04117 if (could_be_empty_branch(scode, ketcode, utf8))
04118 {
04119 *bracode += OP_SBRA - OP_BRA;
04120 break;
04121 }
04122 scode += GET(scode, 1);
04123 }
04124 while (*scode == OP_ALT);
04125 }
04126 }
04127 }
04128
04129
04130
04131
04132
04133
04134 else if (*previous == OP_FAIL) goto END_REPEAT;
04135
04136
04137
04138 else
04139 {
04140 *errorcodeptr = ERR11;
04141 goto FAILED;
04142 }
04143
04144
04145
04146
04147
04148
04149
04150
04151
04152
04153
04154
04155
04156
04157 if (possessive_quantifier)
04158 {
04159 int len;
04160 if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
04161 *tempcode == OP_NOTEXACT)
04162 tempcode += _pcre_OP_lengths[*tempcode] +
04163 ((*tempcode == OP_TYPEEXACT &&
04164 (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
04165 len = code - tempcode;
04166 if (len > 0) switch (*tempcode)
04167 {
04168 case OP_STAR: *tempcode = OP_POSSTAR; break;
04169 case OP_PLUS: *tempcode = OP_POSPLUS; break;
04170 case OP_QUERY: *tempcode = OP_POSQUERY; break;
04171 case OP_UPTO: *tempcode = OP_POSUPTO; break;
04172
04173 case OP_TYPESTAR: *tempcode = OP_TYPEPOSSTAR; break;
04174 case OP_TYPEPLUS: *tempcode = OP_TYPEPOSPLUS; break;
04175 case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break;
04176 case OP_TYPEUPTO: *tempcode = OP_TYPEPOSUPTO; break;
04177
04178 case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break;
04179 case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break;
04180 case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;
04181 case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break;
04182
04183 default:
04184 memmove(tempcode + 1+LINK_SIZE, tempcode, len);
04185 code += 1 + LINK_SIZE;
04186 len += 1 + LINK_SIZE;
04187 tempcode[0] = OP_ONCE;
04188 *code++ = OP_KET;
04189 PUTINC(code, 0, len);
04190 PUT(tempcode, 1, len);
04191 break;
04192 }
04193 }
04194
04195
04196
04197
04198
04199 END_REPEAT:
04200 previous = NULL;
04201 cd->req_varyopt |= reqvary;
04202 break;
04203
04204
04205
04206
04207
04208
04209
04210 case '(':
04211 newoptions = options;
04212 skipbytes = 0;
04213 bravalue = OP_CBRA;
04214 save_hwm = cd->hwm;
04215 reset_bracount = FALSE;
04216
04217
04218
04219 if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
04220 {
04221 int i, namelen;
04222 const char *vn = verbnames;
04223 const uschar *name = ++ptr;
04224 previous = NULL;
04225 while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
04226 if (*ptr == ':')
04227 {
04228 *errorcodeptr = ERR59;
04229 goto FAILED;
04230 }
04231 if (*ptr != ')')
04232 {
04233 *errorcodeptr = ERR60;
04234 goto FAILED;
04235 }
04236 namelen = ptr - name;
04237 for (i = 0; i < verbcount; i++)
04238 {
04239 if (namelen == verbs[i].len &&
04240 strncmp((char *)name, vn, namelen) == 0)
04241 {
04242 *code = verbs[i].op;
04243 if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
04244 break;
04245 }
04246 vn += verbs[i].len + 1;
04247 }
04248 if (i < verbcount) continue;
04249 *errorcodeptr = ERR60;
04250 goto FAILED;
04251 }
04252
04253
04254
04255
04256 else if (*ptr == '?')
04257 {
04258 int i, set, unset, namelen;
04259 int *optset;
04260 const uschar *name;
04261 uschar *slot;
04262
04263 switch (*(++ptr))
04264 {
04265 case '#':
04266 ptr++;
04267 while (*ptr != 0 && *ptr != ')') ptr++;
04268 if (*ptr == 0)
04269 {
04270 *errorcodeptr = ERR18;
04271 goto FAILED;
04272 }
04273 continue;
04274
04275
04276
04277 case '|':
04278 reset_bracount = TRUE;
04279
04280
04281
04282 case ':':
04283 bravalue = OP_BRA;
04284 ptr++;
04285 break;
04286
04287
04288
04289 case '(':
04290 bravalue = OP_COND;
04291
04292
04293
04294
04295
04296
04297
04298
04299
04300
04301
04302
04303
04304
04305
04306
04307
04308
04309 if (ptr[1] == '?' && (ptr[2] == '=' || ptr[2] == '!' || ptr[2] == '<'))
04310 break;
04311
04312
04313
04314
04315 code[1+LINK_SIZE] = OP_CREF;
04316 skipbytes = 3;
04317 refsign = -1;
04318
04319
04320
04321 if (ptr[1] == 'R' && ptr[2] == '&')
04322 {
04323 terminator = -1;
04324 ptr += 2;
04325 code[1+LINK_SIZE] = OP_RREF;
04326 }
04327
04328
04329
04330
04331 else if (ptr[1] == '<')
04332 {
04333 terminator = '>';
04334 ptr++;
04335 }
04336 else if (ptr[1] == '\'')
04337 {
04338 terminator = '\'';
04339 ptr++;
04340 }
04341 else
04342 {
04343 terminator = 0;
04344 if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
04345 }
04346
04347
04348
04349 if ((cd->ctypes[ptr[1]] & ctype_word) == 0)
04350 {
04351 ptr += 1;
04352 *errorcodeptr = ERR28;
04353 goto FAILED;
04354 }
04355
04356
04357
04358 recno = 0;
04359 name = ++ptr;
04360 while ((cd->ctypes[*ptr] & ctype_word) != 0)
04361 {
04362 if (recno >= 0)
04363 recno = ((digitab[*ptr] & ctype_digit) != 0)?
04364 recno * 10 + *ptr - '0' : -1;
04365 ptr++;
04366 }
04367 namelen = ptr - name;
04368
04369 if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != ')')
04370 {
04371 ptr--;
04372 *errorcodeptr = ERR26;
04373 goto FAILED;
04374 }
04375
04376
04377
04378 if (lengthptr != NULL) break;
04379
04380
04381
04382
04383
04384 if (refsign > 0)
04385 {
04386 if (recno <= 0)
04387 {
04388 *errorcodeptr = ERR58;
04389 goto FAILED;
04390 }
04391 recno = (refsign == '-')?
04392 cd->bracount - recno + 1 : recno +cd->bracount;
04393 if (recno <= 0 || recno > cd->final_bracount)
04394 {
04395 *errorcodeptr = ERR15;
04396 goto FAILED;
04397 }
04398 PUT2(code, 2+LINK_SIZE, recno);
04399 break;
04400 }
04401
04402
04403
04404
04405 slot = cd->name_table;
04406 for (i = 0; i < cd->names_found; i++)
04407 {
04408 if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
04409 slot += cd->name_entry_size;
04410 }
04411
04412
04413
04414 if (i < cd->names_found)
04415 {
04416 recno = GET2(slot, 0);
04417 PUT2(code, 2+LINK_SIZE, recno);
04418 }
04419
04420
04421
04422 else if ((i = find_parens(ptr, cd, name, namelen,
04423 (options & PCRE_EXTENDED) != 0)) > 0)
04424 {
04425 PUT2(code, 2+LINK_SIZE, i);
04426 }
04427
04428
04429
04430
04431
04432
04433
04434 else if (terminator != 0)
04435 {
04436 *errorcodeptr = ERR15;
04437 goto FAILED;
04438 }
04439
04440
04441
04442
04443 else if (*name == 'R')
04444 {
04445 recno = 0;
04446 for (i = 1; i < namelen; i++)
04447 {
04448 if ((digitab[name[i]] & ctype_digit) == 0)
04449 {
04450 *errorcodeptr = ERR15;
04451 goto FAILED;
04452 }
04453 recno = recno * 10 + name[i] - '0';
04454 }
04455 if (recno == 0) recno = RREF_ANY;
04456 code[1+LINK_SIZE] = OP_RREF;
04457 PUT2(code, 2+LINK_SIZE, recno);
04458 }
04459
04460
04461
04462
04463 else if (namelen == 6 && strncmp((char *)name, "DEFINE", 6) == 0)
04464 {
04465 code[1+LINK_SIZE] = OP_DEF;
04466 skipbytes = 1;
04467 }
04468
04469
04470
04471
04472 else if (recno > 0 && recno <= cd->final_bracount)
04473 {
04474 PUT2(code, 2+LINK_SIZE, recno);
04475 }
04476
04477
04478
04479 else
04480 {
04481 *errorcodeptr = (recno == 0)? ERR35: ERR15;
04482 goto FAILED;
04483 }
04484 break;
04485
04486
04487
04488 case '=':
04489 bravalue = OP_ASSERT;
04490 ptr++;
04491 break;
04492
04493
04494
04495 case '!':
04496 ptr++;
04497 if (*ptr == ')')
04498 {
04499 *code++ = OP_FAIL;
04500 previous = NULL;
04501 continue;
04502 }
04503 bravalue = OP_ASSERT_NOT;
04504 break;
04505
04506
04507
04508 case '<':
04509 switch (ptr[1])
04510 {
04511 case '=':
04512 bravalue = OP_ASSERTBACK;
04513 ptr += 2;
04514 break;
04515
04516 case '!':
04517 bravalue = OP_ASSERTBACK_NOT;
04518 ptr += 2;
04519 break;
04520
04521 default:
04522 if ((cd->ctypes[ptr[1]] & ctype_word) != 0) goto DEFINE_NAME;
04523 ptr++;
04524 *errorcodeptr = ERR24;
04525 goto FAILED;
04526 }
04527 break;
04528
04529
04530
04531 case '>':
04532 bravalue = OP_ONCE;
04533 ptr++;
04534 break;
04535
04536
04537
04538 case 'C':
04539 previous_callout = code;
04540 after_manual_callout = 1;
04541 *code++ = OP_CALLOUT;
04542 {
04543 int n = 0;
04544 while ((digitab[*(++ptr)] & ctype_digit) != 0)
04545 n = n * 10 + *ptr - '0';
04546 if (*ptr != ')')
04547 {
04548 *errorcodeptr = ERR39;
04549 goto FAILED;
04550 }
04551 if (n > 255)
04552 {
04553 *errorcodeptr = ERR38;
04554 goto FAILED;
04555 }
04556 *code++ = n;
04557 PUT(code, 0, ptr - cd->start_pattern + 1);
04558 PUT(code, LINK_SIZE, 0);
04559 code += 2 * LINK_SIZE;
04560 }
04561 previous = NULL;
04562 continue;
04563
04564
04565
04566 case 'P':
04567 if (*(++ptr) == '=' || *ptr == '>')
04568 {
04569 is_recurse = *ptr == '>';
04570 terminator = ')';
04571 goto NAMED_REF_OR_RECURSE;
04572 }
04573 else if (*ptr != '<')
04574 {
04575 *errorcodeptr = ERR41;
04576 goto FAILED;
04577 }
04578
04579
04580
04581
04582 DEFINE_NAME:
04583 case '\'':
04584 {
04585 terminator = (*ptr == '<')? '>' : '\'';
04586 name = ++ptr;
04587
04588 while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
04589 namelen = ptr - name;
04590
04591
04592
04593 if (lengthptr != NULL)
04594 {
04595 if (*ptr != terminator)
04596 {
04597 *errorcodeptr = ERR42;
04598 goto FAILED;
04599 }
04600 if (cd->names_found >= MAX_NAME_COUNT)
04601 {
04602 *errorcodeptr = ERR49;
04603 goto FAILED;
04604 }
04605 if (namelen + 3 > cd->name_entry_size)
04606 {
04607 cd->name_entry_size = namelen + 3;
04608 if (namelen > MAX_NAME_SIZE)
04609 {
04610 *errorcodeptr = ERR48;
04611 goto FAILED;
04612 }
04613 }
04614 }
04615
04616
04617
04618 else
04619 {
04620 slot = cd->name_table;
04621 for (i = 0; i < cd->names_found; i++)
04622 {
04623 int crc = memcmp(name, slot+2, namelen);
04624 if (crc == 0)
04625 {
04626 if (slot[2+namelen] == 0)
04627 {
04628 if ((options & PCRE_DUPNAMES) == 0)
04629 {
04630 *errorcodeptr = ERR43;
04631 goto FAILED;
04632 }
04633 }
04634 else crc = -1;
04635 }
04636 if (crc < 0)
04637 {
04638 memmove(slot + cd->name_entry_size, slot,
04639 (cd->names_found - i) * cd->name_entry_size);
04640 break;
04641 }
04642 slot += cd->name_entry_size;
04643 }
04644
04645 PUT2(slot, 0, cd->bracount + 1);
04646 memcpy(slot + 2, name, namelen);
04647 slot[2+namelen] = 0;
04648 }
04649 }
04650
04651
04652
04653 ptr++;
04654 cd->names_found++;
04655 goto NUMBERED_GROUP;
04656
04657
04658
04659 case '&':
04660 terminator = ')';
04661 is_recurse = TRUE;
04662
04663
04664
04665
04666
04667
04668
04669
04670 NAMED_REF_OR_RECURSE:
04671 name = ++ptr;
04672 while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
04673 namelen = ptr - name;
04674
04675
04676
04677
04678 if (lengthptr != NULL)
04679 {
04680 if (namelen == 0)
04681 {
04682 *errorcodeptr = ERR62;
04683 goto FAILED;
04684 }
04685 if (*ptr != terminator)
04686 {
04687 *errorcodeptr = ERR42;
04688 goto FAILED;
04689 }
04690 if (namelen > MAX_NAME_SIZE)
04691 {
04692 *errorcodeptr = ERR48;
04693 goto FAILED;
04694 }
04695 recno = 0;
04696 }
04697
04698
04699
04700
04701
04702
04703 else
04704 {
04705 slot = cd->name_table;
04706 for (i = 0; i < cd->names_found; i++)
04707 {
04708 if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
04709 slot[2+namelen] == 0)
04710 break;
04711 slot += cd->name_entry_size;
04712 }
04713
04714 if (i < cd->names_found)
04715 {
04716 recno = GET2(slot, 0);
04717 }
04718 else if ((recno =
04719 find_parens(ptr, cd, name, namelen,
04720 (options & PCRE_EXTENDED) != 0)) <= 0)
04721 {
04722 *errorcodeptr = ERR15;
04723 goto FAILED;
04724 }
04725 }
04726
04727
04728
04729
04730 if (is_recurse) goto HANDLE_RECURSION;
04731 else goto HANDLE_REFERENCE;
04732
04733
04734
04735 case 'R':
04736 ptr++;
04737
04738
04739
04740
04741 case '-': case '+':
04742 case '0': case '1': case '2': case '3': case '4':
04743 case '5': case '6': case '7': case '8': case '9':
04744 {
04745 const uschar *called;
04746 terminator = ')';
04747
04748
04749
04750
04751
04752
04753
04754 HANDLE_NUMERICAL_RECURSION:
04755
04756 if ((refsign = *ptr) == '+')
04757 {
04758 ptr++;
04759 if ((digitab[*ptr] & ctype_digit) == 0)
04760 {
04761 *errorcodeptr = ERR63;
04762 goto FAILED;
04763 }
04764 }
04765 else if (refsign == '-')
04766 {
04767 if ((digitab[ptr[1]] & ctype_digit) == 0)
04768 goto OTHER_CHAR_AFTER_QUERY;
04769 ptr++;
04770 }
04771
04772 recno = 0;
04773 while((digitab[*ptr] & ctype_digit) != 0)
04774 recno = recno * 10 + *ptr++ - '0';
04775
04776 if (*ptr != terminator)
04777 {
04778 *errorcodeptr = ERR29;
04779 goto FAILED;
04780 }
04781
04782 if (refsign == '-')
04783 {
04784 if (recno == 0)
04785 {
04786 *errorcodeptr = ERR58;
04787 goto FAILED;
04788 }
04789 recno = cd->bracount - recno + 1;
04790 if (recno <= 0)
04791 {
04792 *errorcodeptr = ERR15;
04793 goto FAILED;
04794 }
04795 }
04796 else if (refsign == '+')
04797 {
04798 if (recno == 0)
04799 {
04800 *errorcodeptr = ERR58;
04801 goto FAILED;
04802 }
04803 recno += cd->bracount;
04804 }
04805
04806
04807
04808 HANDLE_RECURSION:
04809
04810 previous = code;
04811 called = cd->start_code;
04812
04813
04814
04815
04816
04817
04818
04819
04820 if (lengthptr == NULL)
04821 {
04822 *code = OP_END;
04823 if (recno != 0) called = find_bracket(cd->start_code, utf8, recno);
04824
04825
04826
04827 if (called == NULL)
04828 {
04829 if (find_parens(ptr, cd, NULL, recno,
04830 (options & PCRE_EXTENDED) != 0) < 0)
04831 {
04832 *errorcodeptr = ERR15;
04833 goto FAILED;
04834 }
04835 called = cd->start_code + recno;
04836 PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);
04837 }
04838
04839
04840
04841
04842
04843 else if (GET(called, 1) == 0 &&
04844 could_be_empty(called, code, bcptr, utf8))
04845 {
04846 *errorcodeptr = ERR40;
04847 goto FAILED;
04848 }
04849 }
04850
04851
04852
04853
04854
04855 *code = OP_ONCE;
04856 PUT(code, 1, 2 + 2*LINK_SIZE);
04857 code += 1 + LINK_SIZE;
04858
04859 *code = OP_RECURSE;
04860 PUT(code, 1, called - cd->start_code);
04861 code += 1 + LINK_SIZE;
04862
04863 *code = OP_KET;
04864 PUT(code, 1, 2 + 2*LINK_SIZE);
04865 code += 1 + LINK_SIZE;
04866
04867 length_prevgroup = 3 + 3*LINK_SIZE;
04868 }
04869
04870
04871
04872 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
04873 continue;
04874
04875
04876
04877 default:
04878 OTHER_CHAR_AFTER_QUERY:
04879 set = unset = 0;
04880 optset = &set;
04881
04882 while (*ptr != ')' && *ptr != ':')
04883 {
04884 switch (*ptr++)
04885 {
04886 case '-': optset = &unset; break;
04887
04888 case 'J':
04889 *optset |= PCRE_DUPNAMES;
04890 cd->external_flags |= PCRE_JCHANGED;
04891 break;
04892
04893 case 'i': *optset |= PCRE_CASELESS; break;
04894 case 'm': *optset |= PCRE_MULTILINE; break;
04895 case 's': *optset |= PCRE_DOTALL; break;
04896 case 'x': *optset |= PCRE_EXTENDED; break;
04897 case 'U': *optset |= PCRE_UNGREEDY; break;
04898 case 'X': *optset |= PCRE_EXTRA; break;
04899
04900 default: *errorcodeptr = ERR12;
04901 ptr--;
04902 goto FAILED;
04903 }
04904 }
04905
04906
04907
04908 newoptions = (options | set) & (~unset);
04909
04910
04911
04912
04913
04914
04915
04916
04917
04918
04919
04920
04921
04922
04923
04924
04925
04926
04927
04928
04929
04930
04931
04932 if (*ptr == ')')
04933 {
04934 if (code == cd->start_code + 1 + LINK_SIZE &&
04935 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
04936 {
04937 cd->external_options = newoptions;
04938 }
04939 else
04940 {
04941 if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
04942 {
04943 *code++ = OP_OPT;
04944 *code++ = newoptions & PCRE_IMS;
04945 }
04946 greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
04947 greedy_non_default = greedy_default ^ 1;
04948 req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
04949 }
04950
04951
04952
04953
04954
04955
04956 *optionsptr = options = newoptions;
04957 previous = NULL;
04958 continue;
04959 }
04960
04961
04962
04963
04964
04965
04966 bravalue = OP_BRA;
04967 ptr++;
04968 }
04969 }
04970
04971
04972
04973
04974
04975 else if ((options & PCRE_NO_AUTO_CAPTURE) != 0)
04976 {
04977 bravalue = OP_BRA;
04978 }
04979
04980
04981
04982 else
04983 {
04984 NUMBERED_GROUP:
04985 cd->bracount += 1;
04986 PUT2(code, 1+LINK_SIZE, cd->bracount);
04987 skipbytes = 2;
04988 }
04989
04990
04991
04992
04993
04994
04995
04996 previous = (bravalue >= OP_ONCE)? code : NULL;
04997 *code = bravalue;
04998 tempcode = code;
04999 tempreqvary = cd->req_varyopt;
05000 length_prevgroup = 0;
05001
05002 if (!compile_regex(
05003 newoptions,
05004 options & PCRE_IMS,
05005 &tempcode,
05006 &ptr,
05007 errorcodeptr,
05008 (bravalue == OP_ASSERTBACK ||
05009 bravalue == OP_ASSERTBACK_NOT),
05010 reset_bracount,
05011 skipbytes,
05012 &subfirstbyte,
05013 &subreqbyte,
05014 bcptr,
05015 cd,
05016 (lengthptr == NULL)? NULL :
05017 &length_prevgroup
05018 ))
05019 goto FAILED;
05020
05021
05022
05023
05024
05025
05026
05027
05028
05029
05030
05031 if (bravalue == OP_COND && lengthptr == NULL)
05032 {
05033 uschar *tc = code;
05034 int condcount = 0;
05035
05036 do {
05037 condcount++;
05038 tc += GET(tc,1);
05039 }
05040 while (*tc != OP_KET);
05041
05042
05043
05044
05045 if (code[LINK_SIZE+1] == OP_DEF)
05046 {
05047 if (condcount > 1)
05048 {
05049 *errorcodeptr = ERR54;
05050 goto FAILED;
05051 }
05052 bravalue = OP_DEF;
05053 }
05054
05055
05056
05057
05058
05059 else
05060 {
05061 if (condcount > 2)
05062 {
05063 *errorcodeptr = ERR27;
05064 goto FAILED;
05065 }
05066 if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE;
05067 }
05068 }
05069
05070
05071
05072 if (*ptr != ')')
05073 {
05074 *errorcodeptr = ERR14;
05075 goto FAILED;
05076 }
05077
05078
05079
05080
05081
05082
05083 if (lengthptr != NULL)
05084 {
05085 if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
05086 {
05087 *errorcodeptr = ERR20;
05088 goto FAILED;
05089 }
05090 *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
05091 *code++ = OP_BRA;
05092 PUTINC(code, 0, 1 + LINK_SIZE);
05093 *code++ = OP_KET;
05094 PUTINC(code, 0, 1 + LINK_SIZE);
05095 break;
05096 }
05097
05098
05099
05100 code = tempcode;
05101
05102
05103
05104
05105 if (bravalue == OP_DEF) break;
05106
05107
05108
05109
05110
05111
05112
05113
05114 zeroreqbyte = reqbyte;
05115 zerofirstbyte = firstbyte;
05116 groupsetfirstbyte = FALSE;
05117
05118 if (bravalue >= OP_ONCE)
05119 {
05120
05121
05122
05123
05124
05125
05126 if (firstbyte == REQ_UNSET)
05127 {
05128 if (subfirstbyte >= 0)
05129 {
05130 firstbyte = subfirstbyte;
05131 groupsetfirstbyte = TRUE;
05132 }
05133 else firstbyte = REQ_NONE;
05134 zerofirstbyte = REQ_NONE;
05135 }
05136
05137
05138
05139
05140
05141 else if (subfirstbyte >= 0 && subreqbyte < 0)
05142 subreqbyte = subfirstbyte | tempreqvary;
05143
05144
05145
05146
05147 if (subreqbyte >= 0) reqbyte = subreqbyte;
05148 }
05149
05150
05151
05152
05153
05154
05155
05156
05157
05158 else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte;
05159 break;
05160
05161
05162
05163
05164
05165
05166
05167
05168
05169
05170 case '\\':
05171 tempptr = ptr;
05172 c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);
05173 if (*errorcodeptr != 0) goto FAILED;
05174
05175 if (c < 0)
05176 {
05177 if (-c == ESC_Q)
05178 {
05179 if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2;
05180 else inescq = TRUE;
05181 continue;
05182 }
05183
05184 if (-c == ESC_E) continue;
05185
05186
05187
05188
05189 if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z)
05190 firstbyte = REQ_NONE;
05191
05192
05193
05194 zerofirstbyte = firstbyte;
05195 zeroreqbyte = reqbyte;
05196
05197
05198
05199
05200
05201
05202
05203
05204 if (-c == ESC_g)
05205 {
05206 const uschar *p;
05207 save_hwm = cd->hwm;
05208 terminator = (*(++ptr) == '<')? '>' : '\'';
05209
05210
05211
05212
05213
05214
05215 skipbytes = 0;
05216 reset_bracount = FALSE;
05217
05218
05219
05220 if (ptr[1] != '+' && ptr[1] != '-')
05221 {
05222 BOOL isnumber = TRUE;
05223 for (p = ptr + 1; *p != 0 && *p != terminator; p++)
05224 {
05225 if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
05226 if ((cd->ctypes[*p] & ctype_word) == 0) break;
05227 }
05228 if (*p != terminator)
05229 {
05230 *errorcodeptr = ERR57;
05231 break;
05232 }
05233 if (isnumber)
05234 {
05235 ptr++;
05236 goto HANDLE_NUMERICAL_RECURSION;
05237 }
05238 is_recurse = TRUE;
05239 goto NAMED_REF_OR_RECURSE;
05240 }
05241
05242
05243
05244 p = ptr + 2;
05245 while ((digitab[*p] & ctype_digit) != 0) p++;
05246 if (*p != terminator)
05247 {
05248 *errorcodeptr = ERR57;
05249 break;
05250 }
05251 ptr++;
05252 goto HANDLE_NUMERICAL_RECURSION;
05253 }
05254
05255
05256
05257
05258 if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
05259 {
05260 is_recurse = FALSE;
05261 terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
05262 goto NAMED_REF_OR_RECURSE;
05263 }
05264
05265
05266
05267
05268
05269 if (-c >= ESC_REF)
05270 {
05271 recno = -c - ESC_REF;
05272
05273 HANDLE_REFERENCE:
05274 if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
05275 previous = code;
05276 *code++ = OP_REF;
05277 PUT2INC(code, 0, recno);
05278 cd->backref_map |= (recno < 32)? (1 << recno) : 1;
05279 if (recno > cd->top_backref) cd->top_backref = recno;
05280 }
05281
05282
05283
05284 #ifdef SUPPORT_UCP
05285 else if (-c == ESC_P || -c == ESC_p)
05286 {
05287 BOOL negated;
05288 int pdata;
05289 int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
05290 if (ptype < 0) goto FAILED;
05291 previous = code;
05292 *code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
05293 *code++ = ptype;
05294 *code++ = pdata;
05295 }
05296 #else
05297
05298
05299
05300
05301 else if (-c == ESC_X || -c == ESC_P || -c == ESC_p)
05302 {
05303 *errorcodeptr = ERR45;
05304 goto FAILED;
05305 }
05306 #endif
05307
05308
05309
05310
05311 else
05312 {
05313 previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
05314 *code++ = -c;
05315 }
05316 continue;
05317 }
05318
05319
05320
05321
05322
05323 #ifdef SUPPORT_UTF8
05324 if (utf8 && c > 127)
05325 mclength = _pcre_ord2utf8(c, mcbuffer);
05326 else
05327 #endif
05328
05329 {
05330 mcbuffer[0] = c;
05331 mclength = 1;
05332 }
05333 goto ONE_CHAR;
05334
05335
05336
05337
05338
05339
05340
05341 default:
05342 NORMAL_CHAR:
05343 mclength = 1;
05344 mcbuffer[0] = c;
05345
05346 #ifdef SUPPORT_UTF8
05347 if (utf8 && c >= 0xc0)
05348 {
05349 while ((ptr[1] & 0xc0) == 0x80)
05350 mcbuffer[mclength++] = *(++ptr);
05351 }
05352 #endif
05353
05354
05355
05356
05357 ONE_CHAR:
05358 previous = code;
05359 *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR;
05360 for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
05361
05362
05363
05364 if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')
05365 cd->external_flags |= PCRE_HASCRORLF;
05366
05367
05368
05369
05370
05371
05372 if (firstbyte == REQ_UNSET)
05373 {
05374 zerofirstbyte = REQ_NONE;
05375 zeroreqbyte = reqbyte;
05376
05377
05378
05379
05380 if (mclength == 1 || req_caseopt == 0)
05381 {
05382 firstbyte = mcbuffer[0] | req_caseopt;
05383 if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt;
05384 }
05385 else firstbyte = reqbyte = REQ_NONE;
05386 }
05387
05388
05389
05390
05391 else
05392 {
05393 zerofirstbyte = firstbyte;
05394 zeroreqbyte = reqbyte;
05395 if (mclength == 1 || req_caseopt == 0)
05396 reqbyte = code[-1] | req_caseopt | cd->req_varyopt;
05397 }
05398
05399 break;
05400 }
05401 }
05402
05403
05404
05405
05406
05407
05408 FAILED:
05409 *ptrptr = ptr;
05410 return FALSE;
05411 }
05412
05413
05414
05415
05416
05417
05418
05419
05420
05421
05422
05423
05424
05425
05426
05427
05428
05429
05430
05431
05432
05433
05434
05435
05436
05437
05438
05439
05440
05441
05442
05443
05444
05445
05446
05447
05448
05449
05450
05451 static BOOL
05452 compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,
05453 int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
05454 int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd,
05455 int *lengthptr)
05456 {
05457 const uschar *ptr = *ptrptr;
05458 uschar *code = *codeptr;
05459 uschar *last_branch = code;
05460 uschar *start_bracket = code;
05461 uschar *reverse_count = NULL;
05462 int firstbyte, reqbyte;
05463 int branchfirstbyte, branchreqbyte;
05464 int length;
05465 int orig_bracount;
05466 int max_bracount;
05467 branch_chain bc;
05468
05469 bc.outer = bcptr;
05470 bc.current = code;
05471
05472 firstbyte = reqbyte = REQ_UNSET;
05473
05474
05475
05476
05477
05478
05479
05480
05481 length = 2 + 2*LINK_SIZE + skipbytes;
05482
05483
05484
05485
05486
05487
05488
05489
05490 PUT(code, 1, 0);
05491 code += 1 + LINK_SIZE + skipbytes;
05492
05493
05494
05495 orig_bracount = max_bracount = cd->bracount;
05496 for (;;)
05497 {
05498
05499
05500
05501 if (reset_bracount) cd->bracount = orig_bracount;
05502
05503
05504
05505 if ((options & PCRE_IMS) != oldims)
05506 {
05507 *code++ = OP_OPT;
05508 *code++ = options & PCRE_IMS;
05509 length += 2;
05510 }
05511
05512
05513
05514 if (lookbehind)
05515 {
05516 *code++ = OP_REVERSE;
05517 reverse_count = code;
05518 PUTINC(code, 0, 0);
05519 length += 1 + LINK_SIZE;
05520 }
05521
05522
05523
05524
05525 if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,
05526 &branchreqbyte, &bc, cd, (lengthptr == NULL)? NULL : &length))
05527 {
05528 *ptrptr = ptr;
05529 return FALSE;
05530 }
05531
05532
05533
05534
05535 if (cd->bracount > max_bracount) max_bracount = cd->bracount;
05536
05537
05538
05539 if (lengthptr == NULL)
05540 {
05541
05542
05543
05544 if (*last_branch != OP_ALT)
05545 {
05546 firstbyte = branchfirstbyte;
05547 reqbyte = branchreqbyte;
05548 }
05549
05550
05551
05552
05553
05554
05555 else
05556 {
05557
05558
05559
05560
05561 if (firstbyte >= 0 && firstbyte != branchfirstbyte)
05562 {
05563 if (reqbyte < 0) reqbyte = firstbyte;
05564 firstbyte = REQ_NONE;
05565 }
05566
05567
05568
05569
05570 if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
05571 branchreqbyte = branchfirstbyte;
05572
05573
05574
05575 if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))
05576 reqbyte = REQ_NONE;
05577 else reqbyte |= branchreqbyte;
05578 }
05579
05580
05581
05582
05583
05584 if (lookbehind)
05585 {
05586 int fixed_length;
05587 *code = OP_END;
05588 fixed_length = find_fixedlength(last_branch, options);
05589 DPRINTF(("fixed length = %d\n", fixed_length));
05590 if (fixed_length < 0)
05591 {
05592 *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;
05593 *ptrptr = ptr;
05594 return FALSE;
05595 }
05596 PUT(reverse_count, 0, fixed_length);
05597 }
05598 }
05599
05600
05601
05602
05603
05604
05605
05606
05607
05608
05609 if (*ptr != '|')
05610 {
05611 if (lengthptr == NULL)
05612 {
05613 int branch_length = code - last_branch;
05614 do
05615 {
05616 int prev_length = GET(last_branch, 1);
05617 PUT(last_branch, 1, branch_length);
05618 branch_length = prev_length;
05619 last_branch -= branch_length;
05620 }
05621 while (branch_length > 0);
05622 }
05623
05624
05625
05626 *code = OP_KET;
05627 PUT(code, 1, code - start_bracket);
05628 code += 1 + LINK_SIZE;
05629
05630
05631
05632 if ((options & PCRE_IMS) != oldims && *ptr == ')')
05633 {
05634 *code++ = OP_OPT;
05635 *code++ = oldims;
05636 length += 2;
05637 }
05638
05639
05640
05641 cd->bracount = max_bracount;
05642
05643
05644
05645 *codeptr = code;
05646 *ptrptr = ptr;
05647 *firstbyteptr = firstbyte;
05648 *reqbyteptr = reqbyte;
05649 if (lengthptr != NULL)
05650 {
05651 if (OFLOW_MAX - *lengthptr < length)
05652 {
05653 *errorcodeptr = ERR20;
05654 return FALSE;
05655 }
05656 *lengthptr += length;
05657 }
05658 return TRUE;
05659 }
05660
05661
05662
05663
05664
05665
05666
05667
05668
05669
05670 if (lengthptr != NULL)
05671 {
05672 code = *codeptr + 1 + LINK_SIZE + skipbytes;
05673 length += 1 + LINK_SIZE;
05674 }
05675 else
05676 {
05677 *code = OP_ALT;
05678 PUT(code, 1, code - last_branch);
05679 bc.current = last_branch = code;
05680 code += 1 + LINK_SIZE;
05681 }
05682
05683 ptr++;
05684 }
05685
05686 }
05687
05688
05689
05690
05691
05692
05693
05694
05695
05696
05697
05698
05699
05700
05701
05702
05703
05704
05705
05706
05707
05708
05709
05710
05711
05712
05713
05714
05715
05716
05717
05718
05719
05720
05721
05722
05723
05724
05725
05726
05727
05728
05729 static BOOL
05730 is_anchored(register const uschar *code, int *options, unsigned int bracket_map,
05731 unsigned int backref_map)
05732 {
05733 do {
05734 const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
05735 options, PCRE_MULTILINE, FALSE);
05736 register int op = *scode;
05737
05738
05739
05740 if (op == OP_BRA)
05741 {
05742 if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;
05743 }
05744
05745
05746
05747 else if (op == OP_CBRA)
05748 {
05749 int n = GET2(scode, 1+LINK_SIZE);
05750 int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
05751 if (!is_anchored(scode, options, new_map, backref_map)) return FALSE;
05752 }
05753
05754
05755
05756 else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
05757 {
05758 if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;
05759 }
05760
05761
05762
05763
05764 else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
05765 op == OP_TYPEPOSSTAR))
05766 {
05767 if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0)
05768 return FALSE;
05769 }
05770
05771
05772
05773 else if (op != OP_SOD && op != OP_SOM &&
05774 ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
05775 return FALSE;
05776 code += GET(code, 1);
05777 }
05778 while (*code == OP_ALT);
05779 return TRUE;
05780 }
05781
05782
05783
05784
05785
05786
05787
05788
05789
05790
05791
05792
05793
05794
05795
05796
05797
05798
05799
05800
05801
05802
05803
05804
05805 static BOOL
05806 is_startline(const uschar *code, unsigned int bracket_map,
05807 unsigned int backref_map)
05808 {
05809 do {
05810 const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
05811 NULL, 0, FALSE);
05812 register int op = *scode;
05813
05814
05815
05816 if (op == OP_BRA)
05817 {
05818 if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
05819 }
05820
05821
05822
05823 else if (op == OP_CBRA)
05824 {
05825 int n = GET2(scode, 1+LINK_SIZE);
05826 int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
05827 if (!is_startline(scode, new_map, backref_map)) return FALSE;
05828 }
05829
05830
05831
05832 else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
05833 { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }
05834
05835
05836
05837
05838 else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
05839 {
05840 if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;
05841 }
05842
05843
05844
05845 else if (op != OP_CIRC) return FALSE;
05846
05847
05848
05849 code += GET(code, 1);
05850 }
05851 while (*code == OP_ALT);
05852 return TRUE;
05853 }
05854
05855
05856
05857
05858
05859
05860
05861
05862
05863
05864
05865
05866
05867
05868
05869
05870
05871
05872
05873
05874
05875
05876
05877 static int
05878 find_firstassertedchar(const uschar *code, int *options, BOOL inassert)
05879 {
05880 register int c = -1;
05881 do {
05882 int d;
05883 const uschar *scode =
05884 first_significant_code(code + 1+LINK_SIZE, options, PCRE_CASELESS, TRUE);
05885 register int op = *scode;
05886
05887 switch(op)
05888 {
05889 default:
05890 return -1;
05891
05892 case OP_BRA:
05893 case OP_CBRA:
05894 case OP_ASSERT:
05895 case OP_ONCE:
05896 case OP_COND:
05897 if ((d = find_firstassertedchar(scode, options, op == OP_ASSERT)) < 0)
05898 return -1;
05899 if (c < 0) c = d; else if (c != d) return -1;
05900 break;
05901
05902 case OP_EXACT:
05903 scode += 2;
05904
05905 case OP_CHAR:
05906 case OP_CHARNC:
05907 case OP_PLUS:
05908 case OP_MINPLUS:
05909 case OP_POSPLUS:
05910 if (!inassert) return -1;
05911 if (c < 0)
05912 {
05913 c = scode[1];
05914 if ((*options & PCRE_CASELESS) != 0) c |= REQ_CASELESS;
05915 }
05916 else if (c != scode[1]) return -1;
05917 break;
05918 }
05919
05920 code += GET(code, 1);
05921 }
05922 while (*code == OP_ALT);
05923 return c;
05924 }
05925
05926
05927
05928
05929
05930
05931
05932
05933
05934
05935
05936
05937
05938
05939
05940
05941
05942
05943
05944
05945
05946
05947
05948
05949
05950 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
05951 pcre_compile(const char *pattern, int options, const char **errorptr,
05952 int *erroroffset, const unsigned char *tables)
05953 {
05954 return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
05955 }
05956
05957
05958 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
05959 pcre_compile2(const char *pattern, int options, int *errorcodeptr,
05960 const char **errorptr, int *erroroffset, const unsigned char *tables)
05961 {
05962 real_pcre *re;
05963 int length = 1;
05964 int firstbyte, reqbyte, newline;
05965 int errorcode = 0;
05966 int skipatstart = 0;
05967 #ifdef SUPPORT_UTF8
05968 BOOL utf8;
05969 #endif
05970 size_t size;
05971 uschar *code;
05972 const uschar *codestart;
05973 const uschar *ptr;
05974 compile_data compile_block;
05975 compile_data *cd = &compile_block;
05976
05977
05978
05979
05980
05981
05982
05983 uschar cworkspace[COMPILE_WORK_SIZE];
05984
05985
05986
05987 ptr = (const uschar *)pattern;
05988
05989
05990
05991
05992
05993 if (errorptr == NULL)
05994 {
05995 if (errorcodeptr != NULL) *errorcodeptr = 99;
05996 return NULL;
05997 }
05998
05999 *errorptr = NULL;
06000 if (errorcodeptr != NULL) *errorcodeptr = ERR0;
06001
06002
06003
06004 if (erroroffset == NULL)
06005 {
06006 errorcode = ERR16;
06007 goto PCRE_EARLY_ERROR_RETURN2;
06008 }
06009
06010 *erroroffset = 0;
06011
06012
06013
06014 #ifdef SUPPORT_UTF8
06015 utf8 = (options & PCRE_UTF8) != 0;
06016 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
06017 (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
06018 {
06019 errorcode = ERR44;
06020 goto PCRE_EARLY_ERROR_RETURN2;
06021 }
06022 #else
06023 if ((options & PCRE_UTF8) != 0)
06024 {
06025 errorcode = ERR32;
06026 goto PCRE_EARLY_ERROR_RETURN;
06027 }
06028 #endif
06029
06030 if ((options & ~PUBLIC_OPTIONS) != 0)
06031 {
06032 errorcode = ERR17;
06033 goto PCRE_EARLY_ERROR_RETURN;
06034 }
06035
06036
06037
06038 if (tables == NULL) tables = _pcre_default_tables;
06039 cd->lcc = tables + lcc_offset;
06040 cd->fcc = tables + fcc_offset;
06041 cd->cbits = tables + cbits_offset;
06042 cd->ctypes = tables + ctypes_offset;
06043
06044
06045
06046
06047 while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')
06048 {
06049 int newnl = 0;
06050 int newbsr = 0;
06051
06052 if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0)
06053 { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
06054 else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3) == 0)
06055 { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
06056 else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5) == 0)
06057 { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
06058 else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0)
06059 { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
06060 else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8) == 0)
06061 { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
06062
06063 else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0)
06064 { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
06065 else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0)
06066 { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
06067
06068 if (newnl != 0)
06069 options = (options & ~PCRE_NEWLINE_BITS) | newnl;
06070 else if (newbsr != 0)
06071 options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
06072 else break;
06073 }
06074
06075
06076
06077 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
06078 {
06079 case 0:
06080 case PCRE_BSR_ANYCRLF:
06081 case PCRE_BSR_UNICODE:
06082 break;
06083 default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
06084 }
06085
06086
06087
06088
06089
06090 switch (options & PCRE_NEWLINE_BITS)
06091 {
06092 case 0: newline = NEWLINE; break;
06093 case PCRE_NEWLINE_CR: newline = '\r'; break;
06094 case PCRE_NEWLINE_LF: newline = '\n'; break;
06095 case PCRE_NEWLINE_CR+
06096 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
06097 case PCRE_NEWLINE_ANY: newline = -1; break;
06098 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
06099 default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
06100 }
06101
06102 if (newline == -2)
06103 {
06104 cd->nltype = NLTYPE_ANYCRLF;
06105 }
06106 else if (newline < 0)
06107 {
06108 cd->nltype = NLTYPE_ANY;
06109 }
06110 else
06111 {
06112 cd->nltype = NLTYPE_FIXED;
06113 if (newline > 255)
06114 {
06115 cd->nllen = 2;
06116 cd->nl[0] = (newline >> 8) & 255;
06117 cd->nl[1] = newline & 255;
06118 }
06119 else
06120 {
06121 cd->nllen = 1;
06122 cd->nl[0] = newline;
06123 }
06124 }
06125
06126
06127
06128
06129
06130 cd->top_backref = 0;
06131 cd->backref_map = 0;
06132
06133
06134
06135 DPRINTF(("------------------------------------------------------------------\n"));
06136 DPRINTF(("%s\n", pattern));
06137
06138
06139
06140
06141
06142
06143
06144
06145 cd->bracount = cd->final_bracount = 0;
06146 cd->names_found = 0;
06147 cd->name_entry_size = 0;
06148 cd->name_table = NULL;
06149 cd->start_workspace = cworkspace;
06150 cd->start_code = cworkspace;
06151 cd->hwm = cworkspace;
06152 cd->start_pattern = (const uschar *)pattern;
06153 cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
06154 cd->req_varyopt = 0;
06155 cd->external_options = options;
06156 cd->external_flags = 0;
06157
06158
06159
06160
06161
06162
06163
06164 ptr += skipatstart;
06165 code = cworkspace;
06166 *code = OP_BRA;
06167 (void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
06168 &code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
06169 &length);
06170 if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
06171
06172 DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
06173 cd->hwm - cworkspace));
06174
06175 if (length > MAX_PATTERN_SIZE)
06176 {
06177 errorcode = ERR20;
06178 goto PCRE_EARLY_ERROR_RETURN;
06179 }
06180
06181
06182
06183
06184
06185
06186 size = length + sizeof(real_pcre) + cd->names_found * (cd->name_entry_size + 3);
06187 re = (real_pcre *)(pcre_malloc)(size);
06188
06189 if (re == NULL)
06190 {
06191 errorcode = ERR21;
06192 goto PCRE_EARLY_ERROR_RETURN;
06193 }
06194
06195
06196
06197
06198
06199
06200
06201 re->magic_number = MAGIC_NUMBER;
06202 re->size = size;
06203 re->options = cd->external_options;
06204 re->flags = cd->external_flags;
06205 re->dummy1 = 0;
06206 re->first_byte = 0;
06207 re->req_byte = 0;
06208 re->name_table_offset = sizeof(real_pcre);
06209 re->name_entry_size = cd->name_entry_size;
06210 re->name_count = cd->names_found;
06211 re->ref_count = 0;
06212 re->tables = (tables == _pcre_default_tables)? NULL : tables;
06213 re->nullpad = NULL;
06214
06215
06216
06217
06218
06219
06220
06221
06222 cd->final_bracount = cd->bracount;
06223 cd->bracount = 0;
06224 cd->names_found = 0;
06225 cd->name_table = (uschar *)re + re->name_table_offset;
06226 codestart = cd->name_table + re->name_entry_size * re->name_count;
06227 cd->start_code = codestart;
06228 cd->hwm = cworkspace;
06229 cd->req_varyopt = 0;
06230 cd->had_accept = FALSE;
06231
06232
06233
06234
06235
06236 ptr = (const uschar *)pattern + skipatstart;
06237 code = (uschar *)codestart;
06238 *code = OP_BRA;
06239 (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
06240 &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
06241 re->top_bracket = cd->bracount;
06242 re->top_backref = cd->top_backref;
06243 re->flags = cd->external_flags;
06244
06245 if (cd->had_accept) reqbyte = -1;
06246
06247
06248
06249 if (errorcode == 0 && *ptr != 0) errorcode = ERR22;
06250
06251
06252
06253
06254 *code++ = OP_END;
06255
06256 #ifndef DEBUG
06257 if (code - codestart > length) errorcode = ERR23;
06258 #endif
06259
06260
06261
06262 while (errorcode == 0 && cd->hwm > cworkspace)
06263 {
06264 int offset, recno;
06265 const uschar *groupptr;
06266 cd->hwm -= LINK_SIZE;
06267 offset = GET(cd->hwm, 0);
06268 recno = GET(codestart, offset);
06269 groupptr = find_bracket(codestart, (re->options & PCRE_UTF8) != 0, recno);
06270 if (groupptr == NULL) errorcode = ERR53;
06271 else PUT(((uschar *)codestart), offset, groupptr - codestart);
06272 }
06273
06274
06275
06276
06277 if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
06278
06279
06280
06281 if (errorcode != 0)
06282 {
06283 (pcre_free)(re);
06284 PCRE_EARLY_ERROR_RETURN:
06285 *erroroffset = ptr - (const uschar *)pattern;
06286 PCRE_EARLY_ERROR_RETURN2:
06287 *errorptr = find_error_text(errorcode);
06288 if (errorcodeptr != NULL) *errorcodeptr = errorcode;
06289 return NULL;
06290 }
06291
06292
06293
06294
06295
06296
06297
06298
06299
06300
06301
06302 if ((re->options & PCRE_ANCHORED) == 0)
06303 {
06304 int temp_options = re->options;
06305 if (is_anchored(codestart, &temp_options, 0, cd->backref_map))
06306 re->options |= PCRE_ANCHORED;
06307 else
06308 {
06309 if (firstbyte < 0)
06310 firstbyte = find_firstassertedchar(codestart, &temp_options, FALSE);
06311 if (firstbyte >= 0)
06312 {
06313 int ch = firstbyte & 255;
06314 re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
06315 cd->fcc[ch] == ch)? ch : firstbyte;
06316 re->flags |= PCRE_FIRSTSET;
06317 }
06318 else if (is_startline(codestart, 0, cd->backref_map))
06319 re->flags |= PCRE_STARTLINE;
06320 }
06321 }
06322
06323
06324
06325
06326
06327 if (reqbyte >= 0 &&
06328 ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0))
06329 {
06330 int ch = reqbyte & 255;
06331 re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
06332 cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
06333 re->flags |= PCRE_REQCHSET;
06334 }
06335
06336
06337
06338
06339 #ifdef DEBUG
06340
06341 printf("Length = %d top_bracket = %d top_backref = %d\n",
06342 length, re->top_bracket, re->top_backref);
06343
06344 printf("Options=%08x\n", re->options);
06345
06346 if ((re->flags & PCRE_FIRSTSET) != 0)
06347 {
06348 int ch = re->first_byte & 255;
06349 const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?
06350 "" : " (caseless)";
06351 if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);
06352 else printf("First char = \\x%02x%s\n", ch, caseless);
06353 }
06354
06355 if ((re->flags & PCRE_REQCHSET) != 0)
06356 {
06357 int ch = re->req_byte & 255;
06358 const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?
06359 "" : " (caseless)";
06360 if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);
06361 else printf("Req char = \\x%02x%s\n", ch, caseless);
06362 }
06363
06364 pcre_printint(re, stdout, TRUE);
06365
06366
06367
06368
06369 if (code - codestart > length)
06370 {
06371 (pcre_free)(re);
06372 *errorptr = find_error_text(ERR23);
06373 *erroroffset = ptr - (uschar *)pattern;
06374 if (errorcodeptr != NULL) *errorcodeptr = ERR23;
06375 return NULL;
06376 }
06377 #endif
06378
06379 return (pcre *)re;
06380 }
06381
06382