00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045 #ifdef HAVE_CONFIG_H
00046 #include "config.h"
00047 #endif
00048
00049 #include "pcre_internal.h"
00050
00051
00052
00053
00054 enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073 static void
00074 set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
00075 {
00076 start_bits[c/8] |= (1 << (c&7));
00077 if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
00078 start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
00079 }
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107 static int
00108 set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
00109 BOOL utf8, compile_data *cd)
00110 {
00111 register int c;
00112 int yield = SSB_DONE;
00113
00114 #if 0
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127 volatile int dummy;
00128
00129 #endif
00130
00131 do
00132 {
00133 const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
00134 BOOL try_next = TRUE;
00135
00136 while (try_next)
00137 {
00138 int rc;
00139 switch(*tcode)
00140 {
00141
00142
00143 default:
00144 return SSB_FAIL;
00145
00146
00147
00148
00149
00150
00151 case OP_BRA:
00152 case OP_SBRA:
00153 case OP_CBRA:
00154 case OP_SCBRA:
00155 case OP_ONCE:
00156 case OP_ASSERT:
00157 rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
00158 if (rc == SSB_FAIL) return SSB_FAIL;
00159 if (rc == SSB_DONE) try_next = FALSE; else
00160 {
00161 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
00162 tcode += 1 + LINK_SIZE;
00163 }
00164 break;
00165
00166
00167
00168
00169
00170
00171
00172
00173 case OP_ALT:
00174 yield = SSB_CONTINUE;
00175 try_next = FALSE;
00176 break;
00177
00178 case OP_KET:
00179 case OP_KETRMAX:
00180 case OP_KETRMIN:
00181 return SSB_CONTINUE;
00182
00183
00184
00185 case OP_CALLOUT:
00186 tcode += 2 + 2*LINK_SIZE;
00187 break;
00188
00189
00190
00191 case OP_ASSERT_NOT:
00192 case OP_ASSERTBACK:
00193 case OP_ASSERTBACK_NOT:
00194 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
00195 tcode += 1 + LINK_SIZE;
00196 break;
00197
00198
00199
00200 case OP_OPT:
00201 caseless = (tcode[1] & PCRE_CASELESS) != 0;
00202 tcode += 2;
00203 break;
00204
00205
00206
00207 case OP_BRAZERO:
00208 case OP_BRAMINZERO:
00209 if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
00210 return SSB_FAIL;
00211
00212
00213
00214
00215
00216 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
00217 tcode += 1 + LINK_SIZE;
00218 break;
00219
00220
00221
00222 case OP_SKIPZERO:
00223 tcode++;
00224 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
00225 tcode += 1 + LINK_SIZE;
00226 break;
00227
00228
00229
00230 case OP_STAR:
00231 case OP_MINSTAR:
00232 case OP_POSSTAR:
00233 case OP_QUERY:
00234 case OP_MINQUERY:
00235 case OP_POSQUERY:
00236 set_bit(start_bits, tcode[1], caseless, cd);
00237 tcode += 2;
00238 #ifdef SUPPORT_UTF8
00239 if (utf8 && tcode[-1] >= 0xc0)
00240 tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
00241 #endif
00242 break;
00243
00244
00245
00246 case OP_UPTO:
00247 case OP_MINUPTO:
00248 case OP_POSUPTO:
00249 set_bit(start_bits, tcode[3], caseless, cd);
00250 tcode += 4;
00251 #ifdef SUPPORT_UTF8
00252 if (utf8 && tcode[-1] >= 0xc0)
00253 tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
00254 #endif
00255 break;
00256
00257
00258
00259 case OP_EXACT:
00260 tcode += 2;
00261
00262 case OP_CHAR:
00263 case OP_CHARNC:
00264 case OP_PLUS:
00265 case OP_MINPLUS:
00266 case OP_POSPLUS:
00267 set_bit(start_bits, tcode[1], caseless, cd);
00268 try_next = FALSE;
00269 break;
00270
00271
00272
00273 case OP_NOT_DIGIT:
00274 for (c = 0; c < 32; c++)
00275 start_bits[c] |= ~cd->cbits[c+cbit_digit];
00276 try_next = FALSE;
00277 break;
00278
00279 case OP_DIGIT:
00280 for (c = 0; c < 32; c++)
00281 start_bits[c] |= cd->cbits[c+cbit_digit];
00282 try_next = FALSE;
00283 break;
00284
00285
00286
00287
00288 case OP_NOT_WHITESPACE:
00289 for (c = 0; c < 32; c++)
00290 {
00291 int d = cd->cbits[c+cbit_space];
00292 if (c == 1) d &= ~0x08;
00293 start_bits[c] |= ~d;
00294 }
00295 try_next = FALSE;
00296 break;
00297
00298
00299
00300
00301 case OP_WHITESPACE:
00302 for (c = 0; c < 32; c++)
00303 {
00304 int d = cd->cbits[c+cbit_space];
00305 if (c == 1) d &= ~0x08;
00306 start_bits[c] |= d;
00307 }
00308 try_next = FALSE;
00309 break;
00310
00311 case OP_NOT_WORDCHAR:
00312 for (c = 0; c < 32; c++)
00313 start_bits[c] |= ~cd->cbits[c+cbit_word];
00314 try_next = FALSE;
00315 break;
00316
00317 case OP_WORDCHAR:
00318 for (c = 0; c < 32; c++)
00319 start_bits[c] |= cd->cbits[c+cbit_word];
00320 try_next = FALSE;
00321 break;
00322
00323
00324
00325
00326 case OP_TYPEPLUS:
00327 case OP_TYPEMINPLUS:
00328 tcode++;
00329 break;
00330
00331 case OP_TYPEEXACT:
00332 tcode += 3;
00333 break;
00334
00335
00336
00337
00338 case OP_TYPEUPTO:
00339 case OP_TYPEMINUPTO:
00340 case OP_TYPEPOSUPTO:
00341 tcode += 2;
00342
00343 case OP_TYPESTAR:
00344 case OP_TYPEMINSTAR:
00345 case OP_TYPEPOSSTAR:
00346 case OP_TYPEQUERY:
00347 case OP_TYPEMINQUERY:
00348 case OP_TYPEPOSQUERY:
00349 switch(tcode[1])
00350 {
00351 case OP_ANY:
00352 case OP_ALLANY:
00353 return SSB_FAIL;
00354
00355 case OP_NOT_DIGIT:
00356 for (c = 0; c < 32; c++)
00357 start_bits[c] |= ~cd->cbits[c+cbit_digit];
00358 break;
00359
00360 case OP_DIGIT:
00361 for (c = 0; c < 32; c++)
00362 start_bits[c] |= cd->cbits[c+cbit_digit];
00363 break;
00364
00365
00366
00367
00368 case OP_NOT_WHITESPACE:
00369 for (c = 0; c < 32; c++)
00370 {
00371 int d = cd->cbits[c+cbit_space];
00372 if (c == 1) d &= ~0x08;
00373 start_bits[c] |= ~d;
00374 }
00375 break;
00376
00377
00378
00379
00380 case OP_WHITESPACE:
00381 for (c = 0; c < 32; c++)
00382 {
00383 int d = cd->cbits[c+cbit_space];
00384 if (c == 1) d &= ~0x08;
00385 start_bits[c] |= d;
00386 }
00387 break;
00388
00389 case OP_NOT_WORDCHAR:
00390 for (c = 0; c < 32; c++)
00391 start_bits[c] |= ~cd->cbits[c+cbit_word];
00392 break;
00393
00394 case OP_WORDCHAR:
00395 for (c = 0; c < 32; c++)
00396 start_bits[c] |= cd->cbits[c+cbit_word];
00397 break;
00398 }
00399
00400 tcode += 2;
00401 break;
00402
00403
00404
00405
00406
00407
00408
00409 case OP_NCLASS:
00410 #ifdef SUPPORT_UTF8
00411 if (utf8)
00412 {
00413 start_bits[24] |= 0xf0;
00414 memset(start_bits+25, 0xff, 7);
00415 }
00416 #endif
00417
00418
00419 case OP_CLASS:
00420 {
00421 tcode++;
00422
00423
00424
00425
00426
00427
00428
00429 #ifdef SUPPORT_UTF8
00430 if (utf8)
00431 {
00432 for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
00433 for (c = 128; c < 256; c++)
00434 {
00435 if ((tcode[c/8] && (1 << (c&7))) != 0)
00436 {
00437 int d = (c >> 6) | 0xc0;
00438 start_bits[d/8] |= (1 << (d&7));
00439 c = (c & 0xc0) + 0x40 - 1;
00440 }
00441 }
00442 }
00443
00444
00445
00446 else
00447 #endif
00448 {
00449 for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
00450 }
00451
00452
00453
00454 tcode += 32;
00455 switch (*tcode)
00456 {
00457 case OP_CRSTAR:
00458 case OP_CRMINSTAR:
00459 case OP_CRQUERY:
00460 case OP_CRMINQUERY:
00461 tcode++;
00462 break;
00463
00464 case OP_CRRANGE:
00465 case OP_CRMINRANGE:
00466 if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
00467 else try_next = FALSE;
00468 break;
00469
00470 default:
00471 try_next = FALSE;
00472 break;
00473 }
00474 }
00475 break;
00476
00477 }
00478 }
00479
00480 code += GET(code, 1);
00481 }
00482 while (*code == OP_ALT);
00483 return yield;
00484 }
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
00508 pcre_study(const pcre *external_re, int options, const char **errorptr)
00509 {
00510 uschar start_bits[32];
00511 pcre_extra *extra;
00512 pcre_study_data *study;
00513 const uschar *tables;
00514 uschar *code;
00515 compile_data compile_block;
00516 const real_pcre *re = (const real_pcre *)external_re;
00517
00518 *errorptr = NULL;
00519
00520 if (re == NULL || re->magic_number != MAGIC_NUMBER)
00521 {
00522 *errorptr = "argument is not a compiled regular expression";
00523 return NULL;
00524 }
00525
00526 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
00527 {
00528 *errorptr = "unknown or incorrect option bit(s) set";
00529 return NULL;
00530 }
00531
00532 code = (uschar *)re + re->name_table_offset +
00533 (re->name_count * re->name_entry_size);
00534
00535
00536
00537
00538
00539 if ((re->options & PCRE_ANCHORED) != 0 ||
00540 (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
00541 return NULL;
00542
00543
00544
00545 tables = re->tables;
00546 if (tables == NULL)
00547 (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
00548 (void *)(&tables));
00549
00550 compile_block.lcc = tables + lcc_offset;
00551 compile_block.fcc = tables + fcc_offset;
00552 compile_block.cbits = tables + cbits_offset;
00553 compile_block.ctypes = tables + ctypes_offset;
00554
00555
00556
00557 memset(start_bits, 0, 32 * sizeof(uschar));
00558 if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
00559 (re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
00560
00561
00562
00563
00564
00565
00566
00567
00568 extra = (pcre_extra *)(pcre_malloc)
00569 (sizeof(pcre_extra) + sizeof(pcre_study_data));
00570
00571 if (extra == NULL)
00572 {
00573 *errorptr = "failed to get memory";
00574 return NULL;
00575 }
00576
00577 study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
00578 extra->flags = PCRE_EXTRA_STUDY_DATA;
00579 extra->study_data = study;
00580
00581 study->size = sizeof(pcre_study_data);
00582 study->options = PCRE_STUDY_MAPPED;
00583 memcpy(study->start_bits, start_bits, sizeof(start_bits));
00584
00585 return extra;
00586 }
00587
00588