00001 /************************************************* 00002 * Perl-Compatible Regular Expressions * 00003 *************************************************/ 00004 00005 /* PCRE is a library of functions to support regular expressions whose syntax 00006 and semantics are as close as possible to those of the Perl 5 language. 00007 00008 Written by Philip Hazel 00009 Copyright (c) 1997-2008 University of Cambridge 00010 00011 ----------------------------------------------------------------------------- 00012 Redistribution and use in source and binary forms, with or without 00013 modification, are permitted provided that the following conditions are met: 00014 00015 * Redistributions of source code must retain the above copyright notice, 00016 this list of conditions and the following disclaimer. 00017 00018 * Redistributions in binary form must reproduce the above copyright 00019 notice, this list of conditions and the following disclaimer in the 00020 documentation and/or other materials provided with the distribution. 00021 00022 * Neither the name of the University of Cambridge nor the names of its 00023 contributors may be used to endorse or promote products derived from 00024 this software without specific prior written permission. 00025 00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00036 POSSIBILITY OF SUCH DAMAGE. 00037 ----------------------------------------------------------------------------- 00038 */ 00039 00040 00041 /* This module contains some convenience functions for extracting substrings 00042 from the subject string after a regex match has succeeded. The original idea 00043 for these functions came from Scott Wimer. */ 00044 00045 00046 #ifdef HAVE_CONFIG_H 00047 #include "config.h" 00048 #endif 00049 00050 #include "pcre_internal.h" 00051 00052 00053 /************************************************* 00054 * Find number for named string * 00055 *************************************************/ 00056 00057 /* This function is used by the get_first_set() function below, as well 00058 as being generally available. It assumes that names are unique. 00059 00060 Arguments: 00061 code the compiled regex 00062 stringname the name whose number is required 00063 00064 Returns: the number of the named parentheses, or a negative number 00065 (PCRE_ERROR_NOSUBSTRING) if not found 00066 */ 00067 00068 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00069 pcre_get_stringnumber(const pcre *code, const char *stringname) 00070 { 00071 int rc; 00072 int entrysize; 00073 int top, bot; 00074 uschar *nametable; 00075 00076 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 00077 return rc; 00078 if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 00079 00080 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 00081 return rc; 00082 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 00083 return rc; 00084 00085 bot = 0; 00086 while (top > bot) 00087 { 00088 int mid = (top + bot) / 2; 00089 uschar *entry = nametable + entrysize*mid; 00090 int c = strcmp(stringname, (char *)(entry + 2)); 00091 if (c == 0) return (entry[0] << 8) + entry[1]; 00092 if (c > 0) bot = mid + 1; else top = mid; 00093 } 00094 00095 return PCRE_ERROR_NOSUBSTRING; 00096 } 00097 00098 00099 00100 /************************************************* 00101 * Find (multiple) entries for named string * 00102 *************************************************/ 00103 00104 /* This is used by the get_first_set() function below, as well as being 00105 generally available. It is used when duplicated names are permitted. 00106 00107 Arguments: 00108 code the compiled regex 00109 stringname the name whose entries required 00110 firstptr where to put the pointer to the first entry 00111 lastptr where to put the pointer to the last entry 00112 00113 Returns: the length of each entry, or a negative number 00114 (PCRE_ERROR_NOSUBSTRING) if not found 00115 */ 00116 00117 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00118 pcre_get_stringtable_entries(const pcre *code, const char *stringname, 00119 char **firstptr, char **lastptr) 00120 { 00121 int rc; 00122 int entrysize; 00123 int top, bot; 00124 uschar *nametable, *lastentry; 00125 00126 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) 00127 return rc; 00128 if (top <= 0) return PCRE_ERROR_NOSUBSTRING; 00129 00130 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) 00131 return rc; 00132 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) 00133 return rc; 00134 00135 lastentry = nametable + entrysize * (top - 1); 00136 bot = 0; 00137 while (top > bot) 00138 { 00139 int mid = (top + bot) / 2; 00140 uschar *entry = nametable + entrysize*mid; 00141 int c = strcmp(stringname, (char *)(entry + 2)); 00142 if (c == 0) 00143 { 00144 uschar *first = entry; 00145 uschar *last = entry; 00146 while (first > nametable) 00147 { 00148 if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break; 00149 first -= entrysize; 00150 } 00151 while (last < lastentry) 00152 { 00153 if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break; 00154 last += entrysize; 00155 } 00156 *firstptr = (char *)first; 00157 *lastptr = (char *)last; 00158 return entrysize; 00159 } 00160 if (c > 0) bot = mid + 1; else top = mid; 00161 } 00162 00163 return PCRE_ERROR_NOSUBSTRING; 00164 } 00165 00166 00167 00168 /************************************************* 00169 * Find first set of multiple named strings * 00170 *************************************************/ 00171 00172 /* This function allows for duplicate names in the table of named substrings. 00173 It returns the number of the first one that was set in a pattern match. 00174 00175 Arguments: 00176 code the compiled regex 00177 stringname the name of the capturing substring 00178 ovector the vector of matched substrings 00179 00180 Returns: the number of the first that is set, 00181 or the number of the last one if none are set, 00182 or a negative number on error 00183 */ 00184 00185 static int 00186 get_first_set(const pcre *code, const char *stringname, int *ovector) 00187 { 00188 const real_pcre *re = (const real_pcre *)code; 00189 int entrysize; 00190 char *first, *last; 00191 uschar *entry; 00192 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) 00193 return pcre_get_stringnumber(code, stringname); 00194 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); 00195 if (entrysize <= 0) return entrysize; 00196 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize) 00197 { 00198 int n = (entry[0] << 8) + entry[1]; 00199 if (ovector[n*2] >= 0) return n; 00200 } 00201 return (first[0] << 8) + first[1]; 00202 } 00203 00204 00205 00206 00207 /************************************************* 00208 * Copy captured string to given buffer * 00209 *************************************************/ 00210 00211 /* This function copies a single captured substring into a given buffer. 00212 Note that we use memcpy() rather than strncpy() in case there are binary zeros 00213 in the string. 00214 00215 Arguments: 00216 subject the subject string that was matched 00217 ovector pointer to the offsets table 00218 stringcount the number of substrings that were captured 00219 (i.e. the yield of the pcre_exec call, unless 00220 that was zero, in which case it should be 1/3 00221 of the offset table size) 00222 stringnumber the number of the required substring 00223 buffer where to put the substring 00224 size the size of the buffer 00225 00226 Returns: if successful: 00227 the length of the copied string, not including the zero 00228 that is put on the end; can be zero 00229 if not successful: 00230 PCRE_ERROR_NOMEMORY (-6) buffer too small 00231 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 00232 */ 00233 00234 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00235 pcre_copy_substring(const char *subject, int *ovector, int stringcount, 00236 int stringnumber, char *buffer, int size) 00237 { 00238 int yield; 00239 if (stringnumber < 0 || stringnumber >= stringcount) 00240 return PCRE_ERROR_NOSUBSTRING; 00241 stringnumber *= 2; 00242 yield = ovector[stringnumber+1] - ovector[stringnumber]; 00243 if (size < yield + 1) return PCRE_ERROR_NOMEMORY; 00244 memcpy(buffer, subject + ovector[stringnumber], yield); 00245 buffer[yield] = 0; 00246 return yield; 00247 } 00248 00249 00250 00251 /************************************************* 00252 * Copy named captured string to given buffer * 00253 *************************************************/ 00254 00255 /* This function copies a single captured substring into a given buffer, 00256 identifying it by name. If the regex permits duplicate names, the first 00257 substring that is set is chosen. 00258 00259 Arguments: 00260 code the compiled regex 00261 subject the subject string that was matched 00262 ovector pointer to the offsets table 00263 stringcount the number of substrings that were captured 00264 (i.e. the yield of the pcre_exec call, unless 00265 that was zero, in which case it should be 1/3 00266 of the offset table size) 00267 stringname the name of the required substring 00268 buffer where to put the substring 00269 size the size of the buffer 00270 00271 Returns: if successful: 00272 the length of the copied string, not including the zero 00273 that is put on the end; can be zero 00274 if not successful: 00275 PCRE_ERROR_NOMEMORY (-6) buffer too small 00276 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 00277 */ 00278 00279 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00280 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector, 00281 int stringcount, const char *stringname, char *buffer, int size) 00282 { 00283 int n = get_first_set(code, stringname, ovector); 00284 if (n <= 0) return n; 00285 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); 00286 } 00287 00288 00289 00290 /************************************************* 00291 * Copy all captured strings to new store * 00292 *************************************************/ 00293 00294 /* This function gets one chunk of store and builds a list of pointers and all 00295 of the captured substrings in it. A NULL pointer is put on the end of the list. 00296 00297 Arguments: 00298 subject the subject string that was matched 00299 ovector pointer to the offsets table 00300 stringcount the number of substrings that were captured 00301 (i.e. the yield of the pcre_exec call, unless 00302 that was zero, in which case it should be 1/3 00303 of the offset table size) 00304 listptr set to point to the list of pointers 00305 00306 Returns: if successful: 0 00307 if not successful: 00308 PCRE_ERROR_NOMEMORY (-6) failed to get store 00309 */ 00310 00311 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00312 pcre_get_substring_list(const char *subject, int *ovector, int stringcount, 00313 const char ***listptr) 00314 { 00315 int i; 00316 int size = sizeof(char *); 00317 int double_count = stringcount * 2; 00318 char **stringlist; 00319 char *p; 00320 00321 for (i = 0; i < double_count; i += 2) 00322 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1; 00323 00324 stringlist = (char **)(pcre_malloc)(size); 00325 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; 00326 00327 *listptr = (const char **)stringlist; 00328 p = (char *)(stringlist + stringcount + 1); 00329 00330 for (i = 0; i < double_count; i += 2) 00331 { 00332 int len = ovector[i+1] - ovector[i]; 00333 memcpy(p, subject + ovector[i], len); 00334 *stringlist++ = p; 00335 p += len; 00336 *p++ = 0; 00337 } 00338 00339 *stringlist = NULL; 00340 return 0; 00341 } 00342 00343 00344 00345 /************************************************* 00346 * Free store obtained by get_substring_list * 00347 *************************************************/ 00348 00349 /* This function exists for the benefit of people calling PCRE from non-C 00350 programs that can call its functions, but not free() or (pcre_free)() directly. 00351 00352 Argument: the result of a previous pcre_get_substring_list() 00353 Returns: nothing 00354 */ 00355 00356 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION 00357 pcre_free_substring_list(const char **pointer) 00358 { 00359 (pcre_free)((void *)pointer); 00360 } 00361 00362 00363 00364 /************************************************* 00365 * Copy captured string to new store * 00366 *************************************************/ 00367 00368 /* This function copies a single captured substring into a piece of new 00369 store 00370 00371 Arguments: 00372 subject the subject string that was matched 00373 ovector pointer to the offsets table 00374 stringcount the number of substrings that were captured 00375 (i.e. the yield of the pcre_exec call, unless 00376 that was zero, in which case it should be 1/3 00377 of the offset table size) 00378 stringnumber the number of the required substring 00379 stringptr where to put a pointer to the substring 00380 00381 Returns: if successful: 00382 the length of the string, not including the zero that 00383 is put on the end; can be zero 00384 if not successful: 00385 PCRE_ERROR_NOMEMORY (-6) failed to get store 00386 PCRE_ERROR_NOSUBSTRING (-7) substring not present 00387 */ 00388 00389 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00390 pcre_get_substring(const char *subject, int *ovector, int stringcount, 00391 int stringnumber, const char **stringptr) 00392 { 00393 int yield; 00394 char *substring; 00395 if (stringnumber < 0 || stringnumber >= stringcount) 00396 return PCRE_ERROR_NOSUBSTRING; 00397 stringnumber *= 2; 00398 yield = ovector[stringnumber+1] - ovector[stringnumber]; 00399 substring = (char *)(pcre_malloc)(yield + 1); 00400 if (substring == NULL) return PCRE_ERROR_NOMEMORY; 00401 memcpy(substring, subject + ovector[stringnumber], yield); 00402 substring[yield] = 0; 00403 *stringptr = substring; 00404 return yield; 00405 } 00406 00407 00408 00409 /************************************************* 00410 * Copy named captured string to new store * 00411 *************************************************/ 00412 00413 /* This function copies a single captured substring, identified by name, into 00414 new store. If the regex permits duplicate names, the first substring that is 00415 set is chosen. 00416 00417 Arguments: 00418 code the compiled regex 00419 subject the subject string that was matched 00420 ovector pointer to the offsets table 00421 stringcount the number of substrings that were captured 00422 (i.e. the yield of the pcre_exec call, unless 00423 that was zero, in which case it should be 1/3 00424 of the offset table size) 00425 stringname the name of the required substring 00426 stringptr where to put the pointer 00427 00428 Returns: if successful: 00429 the length of the copied string, not including the zero 00430 that is put on the end; can be zero 00431 if not successful: 00432 PCRE_ERROR_NOMEMORY (-6) couldn't get memory 00433 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring 00434 */ 00435 00436 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 00437 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector, 00438 int stringcount, const char *stringname, const char **stringptr) 00439 { 00440 int n = get_first_set(code, stringname, ovector); 00441 if (n <= 0) return n; 00442 return pcre_get_substring(subject, ovector, stringcount, n, stringptr); 00443 } 00444 00445 00446 00447 00448 /************************************************* 00449 * Free store obtained by get_substring * 00450 *************************************************/ 00451 00452 /* This function exists for the benefit of people calling PCRE from non-C 00453 programs that can call its functions, but not free() or (pcre_free)() directly. 00454 00455 Argument: the result of a previous pcre_get_substring() 00456 Returns: nothing 00457 */ 00458 00459 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION 00460 pcre_free_substring(const char *pointer) 00461 { 00462 (pcre_free)((void *)pointer); 00463 } 00464 00465 /* End of pcre_get.c */