00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <stdio.h>
00022 #include <ctype.h>
00023 #include <string.h>
00024
00025
00026 #include "Match.h"
00027
00028
00029
00030 #define BOL '^' // start-of-line anchor
00031 #define EOL '$' // end-of-line anchor
00032 #define ANY '.' // matches any character
00033 #define CCL '[' // start a character class
00034 #define CCLEND ']' // end a character class
00035 #define NCCL '^' // negates character class if 1st character
00036 #define CLOSURE '*' // Kleene closure (matches 0 or more)
00037 #define PCLOSE '+' // Positive closure (1 or more)
00038 #define OPT '?' // Optional closure (0 or 1)
00039
00040 enum Eaction {
00041
00042 kBOL = (0x8000 | '^'),
00043 kEOL = (0x8000 | '$'),
00044 kANY = (0x8000 | '.'),
00045 kCCL = (0x8000 | '['),
00046 kOPT = (0x8000 | '?'),
00047 kCLOSE = (0x8000 | '*'),
00048 kPCLOSE = (0x8000 | '+'),
00049 kEND = (0x8000 | 0)
00050
00051 };
00052
00053 #if 1
00054 #define ISHEXDIGIT(x) isxdigit((unsigned char)x)
00055 #else
00056 #define ISHEXDIGIT(x) (isdigit(x) \
00057 || ('a'<=(x) && (x)<='f') \
00058 || ('A'<=(x) && (x)<='F') )
00059 #endif
00060
00061 #define ISOCTDIGIT(x) ('0'<=(x) && (x)<='7')
00062
00063
00064 #define MAPSIZE 16 // need this many Pattern_t elements for
00065
00066
00067
00068 inline void ADVANCE(const Pattern_t*& pat)
00069 {
00070
00071
00072
00073
00074 if (*pat++ == (Pattern_t) kCCL)
00075 pat += MAPSIZE;
00076 }
00077
00078
00079
00080
00081
00082
00083
00084 #ifdef SETBIT // from Rtypes.h
00085 #undef SETBIT
00086 #endif
00087
00088 static void SETBIT(unsigned char b, Pattern_t* map)
00089 {
00090 map[b >> 4] |= 1 << (b & 0x0f);
00091 }
00092
00093
00094 static int TSTBIT(unsigned char b, const Pattern_t* map)
00095 {
00096 return map[b >> 4] & (1 << (b & 0x0f));
00097 }
00098
00099
00100
00101 #define E_NONE 0 // Possible return values from pat_err
00102 #define E_ILLEGAL 1 // Set in Makepat() to indicate prob-
00103 #define E_NOMEM 2 // lems that came up while making the
00104 #define E_PAT 3 // pattern template.
00105
00106
00107
00108 static const char* doccl(Pattern_t*, const char*);
00109 static int hex2bin(int);
00110 static int oct2bin(int);
00111 static int omatch(const char**, size_t*, const Pattern_t*, const char*);
00112 static const char* patcmp(const char*, size_t, const Pattern_t*, const char*);
00113 static int esc(const char**);
00114
00115
00116
00117
00118 int Makepat(const char* exp,
00119 Pattern_t* pat,
00120 int maxpat)
00121 {
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131 Pattern_t* cur;
00132 Pattern_t* prev;
00133 int theError = E_ILLEGAL;
00134
00135 if (!*exp)
00136 goto exit;
00137
00138 if (*exp == CLOSURE || *exp == PCLOSE || *exp == OPT)
00139 goto exit;
00140
00141 theError = E_NOMEM;
00142 if (!pat) goto exit;
00143
00144 prev = cur = pat;
00145 theError = E_PAT;
00146
00147 while (*exp) {
00148
00149 if (cur >= &pat[maxpat - 1]) goto exit;
00150
00151 switch (*exp) {
00152 case ANY:
00153 *cur = (Pattern_t)kANY;
00154 prev = cur++;
00155 ++exp;
00156 break;
00157
00158 case BOL:
00159 *cur = (cur == pat) ? (Pattern_t)kBOL : (unsigned char)*exp;
00160 prev = cur++;
00161 ++exp;
00162 break;
00163
00164 case EOL:
00165 *cur = (!exp[1]) ? (Pattern_t)kEOL : (unsigned char)*exp;
00166 prev = cur++;
00167 ++exp;
00168 break;
00169
00170 case CCL:
00171 if (((cur - pat) + MAPSIZE) >= maxpat)
00172 goto exit;
00173 prev = cur;
00174 *cur++ = (Pattern_t)kCCL;
00175 exp = doccl(cur, exp);
00176 if (*exp != CCLEND) goto exit;
00177 ++exp;
00178 cur += MAPSIZE;
00179 break;
00180
00181 case OPT:
00182 case CLOSURE:
00183 case PCLOSE:
00184 switch (*prev) {
00185 case kBOL:
00186 case kEOL:
00187 case kOPT:
00188 case kPCLOSE:
00189 case kCLOSE:
00190 goto exit;
00191 }
00192
00193 memmove( prev+1, prev, (cur-prev)*sizeof(*cur));
00194 *prev = (Pattern_t) (*exp == OPT) ? kOPT :
00195 (*exp == PCLOSE) ? kPCLOSE :
00196 kCLOSE;
00197 ++cur;
00198 ++exp;
00199 break;
00200
00201 default:
00202 prev = cur;
00203 *cur++ = esc(&exp);
00204 break;
00205 }
00206 }
00207
00208 *cur = (Pattern_t)kEND;
00209 theError = E_NONE;
00210
00211 exit:
00212 return theError;
00213 }
00214
00215
00216 const char *Matchs(const char* str,
00217 size_t slen,
00218 const Pattern_t* pat,
00219 const char** startpat)
00220 {
00221
00222
00223 if (!pat) return 0;
00224 const char* endp = 0;
00225 if (*pat == (Pattern_t)kBOL) {
00226
00227 endp = patcmp(str, slen, pat+1, str);
00228 } else {
00229
00230 const char* start = str;
00231 while ((endp = patcmp(str, slen, pat, start)) == 0 && slen != 0)
00232 ++str, --slen;
00233 }
00234 *startpat = str;
00235 return endp;
00236 }
00237
00238
00239 static const char *doccl(Pattern_t* map, const char* src)
00240 {
00241
00242
00243
00244 int negative;
00245
00246 ++src;
00247 negative = (*src == NCCL);
00248 if (negative)
00249 ++src;
00250 memset(map, 0, MAPSIZE*sizeof(*map));
00251
00252 while (*src && *src != CCLEND) {
00253 int isCCL = (*src == CCL);
00254 unsigned char first = esc(&src);
00255 SETBIT(first, map);
00256 if (isCCL && *src && *src == CCLEND) {
00257 first = esc(&src);
00258 SETBIT(first, map);
00259 }
00260 if (*src == '-' && src[1] && src[1] != CCLEND) {
00261 ++src;
00262 unsigned char last = esc(&src);
00263 if (first <= last) while (first < last ) SETBIT(++first, map);
00264 else while (last < first) SETBIT(last++, map);
00265 }
00266 }
00267
00268 if (negative) {
00269 for (int i = MAPSIZE; --i >= 0;)
00270 *map++ ^= ~0;
00271 }
00272
00273 return src;
00274 }
00275
00276
00277 static const char *patcmp(const char* str,
00278 size_t slen,
00279 const Pattern_t* pat,
00280 const char* start)
00281 {
00282
00283
00284
00285
00286
00287
00288
00289
00290 if (!pat)
00291 return 0;
00292
00293 while (*pat != (Pattern_t)kEND) {
00294 if (*pat == (Pattern_t)kOPT) {
00295
00296
00297
00298
00299 omatch(&str, &slen, ++pat, start);
00300 ADVANCE(pat);
00301
00302 } else if (*pat != (Pattern_t)kCLOSE &&
00303 *pat != (Pattern_t)kPCLOSE) {
00304
00305
00306
00307
00308 if (!omatch(&str, &slen, pat, start))
00309 return 0;
00310 ADVANCE(pat);
00311
00312 } else {
00313
00314 if (*pat++ == (Pattern_t)kPCLOSE)
00315 if (!omatch(&str, &slen, pat, start))
00316 return 0;
00317
00318
00319
00320 const char* bocl = str;
00321 while (slen && omatch(&str, &slen, pat, start))
00322 ;
00323 ADVANCE(pat);
00324 if (*pat == (Pattern_t)kEND)
00325 break;
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336 const char* end;
00337 while ((end = patcmp(str, slen, pat, start)) == 0) {
00338 ++slen, --str;
00339 if (str < bocl) break;
00340 }
00341 return end;
00342
00343 }
00344 }
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354 return str;
00355 }
00356
00357
00358 static int omatch(const char** strp,
00359 size_t* slenp,
00360 const Pattern_t* pat,
00361 const char* start)
00362 {
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372 switch (*pat) {
00373
00374 case kBOL: return (*strp == start);
00375
00376
00377 case kEOL: return (*slenp == 0);
00378
00379
00380
00381 case kANY: if (**strp == '\n') return 0;
00382 break;
00383
00384
00385 case kCCL: if (*slenp == 0 || !TSTBIT(**strp, pat + 1)) return 0;
00386 break;
00387
00388
00389 default: if (*slenp == 0 || (unsigned char) **strp != *pat) return 0;
00390 break;
00391 }
00392
00393 if (*slenp) {
00394 ++*strp;
00395 --*slenp;
00396 }
00397 return 2;
00398 }
00399
00400
00401 static int hex2bin(int c)
00402 {
00403
00404
00405
00406 return (isdigit(c) ? (c)-'0': ((toupper((unsigned char)c))-'A')+10) & 0xf;
00407 }
00408
00409
00410 static int oct2bin(int c)
00411 {
00412
00413
00414
00415 return ( ((c)-'0') & 0x7 );
00416 }
00417
00418
00419 static int esc(const char** s)
00420 {
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438 int rval;
00439
00440 if (**s != '\\')
00441 rval = *((*s)++);
00442 else {
00443 ++(*s);
00444 switch (toupper((unsigned char)**s)) {
00445 case '\0': rval = '\\'; break;
00446 case 'B': rval = '\b'; break;
00447 case 'F': rval = '\f'; break;
00448 case 'N': rval = '\n'; break;
00449 case 'R': rval = '\r'; break;
00450 case 'S': rval = ' '; break;
00451 case 'T': rval = '\t'; break;
00452 case 'E': rval = '\033'; break;
00453
00454 case '^':
00455 rval = *++(*s) ;
00456 rval = toupper((unsigned char)rval) - '@';
00457 break;
00458
00459 case 'X':
00460 rval = 0;
00461 ++(*s);
00462 if (ISHEXDIGIT(**s)) {
00463 rval = hex2bin((unsigned char) *(*s)++);
00464 }
00465 if (ISHEXDIGIT(**s)) {
00466 rval <<= 4;
00467 rval |= hex2bin((unsigned char) *(*s)++);
00468 }
00469 --(*s);
00470 break;
00471
00472 default:
00473 if (!ISOCTDIGIT(**s))
00474 rval = **s;
00475 else {
00476 rval = oct2bin(*(*s)++);
00477 if (ISOCTDIGIT(**s)) {
00478 rval <<= 3;
00479 rval |= oct2bin(*(*s)++);
00480 }
00481 if (ISOCTDIGIT(**s)) {
00482 rval <<= 3;
00483 rval |= oct2bin(*(*s)++);
00484 }
00485 --(*s);
00486 }
00487 break;
00488 }
00489 ++(*s);
00490 }
00491 return (unsigned char)rval;
00492 }