pcre_get.c

Go to the documentation of this file.
00001 /*************************************************
00002 *      Perl-Compatible Regular Expressions       *
00003 *************************************************/
00004 
00005 /* PCRE is a library of functions to support regular expressions whose syntax
00006 and semantics are as close as possible to those of the Perl 5 language.
00007 
00008                        Written by Philip Hazel
00009            Copyright (c) 1997-2008 University of Cambridge
00010 
00011 -----------------------------------------------------------------------------
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014 
00015     * Redistributions of source code must retain the above copyright notice,
00016       this list of conditions and the following disclaimer.
00017 
00018     * Redistributions in binary form must reproduce the above copyright
00019       notice, this list of conditions and the following disclaimer in the
00020       documentation and/or other materials provided with the distribution.
00021 
00022     * Neither the name of the University of Cambridge nor the names of its
00023       contributors may be used to endorse or promote products derived from
00024       this software without specific prior written permission.
00025 
00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00036 POSSIBILITY OF SUCH DAMAGE.
00037 -----------------------------------------------------------------------------
00038 */
00039 
00040 
00041 /* This module contains some convenience functions for extracting substrings
00042 from the subject string after a regex match has succeeded. The original idea
00043 for these functions came from Scott Wimer. */
00044 
00045 
00046 #ifdef HAVE_CONFIG_H
00047 #include "config.h"
00048 #endif
00049 
00050 #include "pcre_internal.h"
00051 
00052 
00053 /*************************************************
00054 *           Find number for named string         *
00055 *************************************************/
00056 
00057 /* This function is used by the get_first_set() function below, as well
00058 as being generally available. It assumes that names are unique.
00059 
00060 Arguments:
00061   code        the compiled regex
00062   stringname  the name whose number is required
00063 
00064 Returns:      the number of the named parentheses, or a negative number
00065                 (PCRE_ERROR_NOSUBSTRING) if not found
00066 */
00067 
00068 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00069 pcre_get_stringnumber(const pcre *code, const char *stringname)
00070 {
00071 int rc;
00072 int entrysize;
00073 int top, bot;
00074 uschar *nametable;
00075 
00076 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
00077   return rc;
00078 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
00079 
00080 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
00081   return rc;
00082 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
00083   return rc;
00084 
00085 bot = 0;
00086 while (top > bot)
00087   {
00088   int mid = (top + bot) / 2;
00089   uschar *entry = nametable + entrysize*mid;
00090   int c = strcmp(stringname, (char *)(entry + 2));
00091   if (c == 0) return (entry[0] << 8) + entry[1];
00092   if (c > 0) bot = mid + 1; else top = mid;
00093   }
00094 
00095 return PCRE_ERROR_NOSUBSTRING;
00096 }
00097 
00098 
00099 
00100 /*************************************************
00101 *     Find (multiple) entries for named string   *
00102 *************************************************/
00103 
00104 /* This is used by the get_first_set() function below, as well as being
00105 generally available. It is used when duplicated names are permitted.
00106 
00107 Arguments:
00108   code        the compiled regex
00109   stringname  the name whose entries required
00110   firstptr    where to put the pointer to the first entry
00111   lastptr     where to put the pointer to the last entry
00112 
00113 Returns:      the length of each entry, or a negative number
00114                 (PCRE_ERROR_NOSUBSTRING) if not found
00115 */
00116 
00117 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00118 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
00119   char **firstptr, char **lastptr)
00120 {
00121 int rc;
00122 int entrysize;
00123 int top, bot;
00124 uschar *nametable, *lastentry;
00125 
00126 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
00127   return rc;
00128 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
00129 
00130 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
00131   return rc;
00132 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
00133   return rc;
00134 
00135 lastentry = nametable + entrysize * (top - 1);
00136 bot = 0;
00137 while (top > bot)
00138   {
00139   int mid = (top + bot) / 2;
00140   uschar *entry = nametable + entrysize*mid;
00141   int c = strcmp(stringname, (char *)(entry + 2));
00142   if (c == 0)
00143     {
00144     uschar *first = entry;
00145     uschar *last = entry;
00146     while (first > nametable)
00147       {
00148       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
00149       first -= entrysize;
00150       }
00151     while (last < lastentry)
00152       {
00153       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
00154       last += entrysize;
00155       }
00156     *firstptr = (char *)first;
00157     *lastptr = (char *)last;
00158     return entrysize;
00159     }
00160   if (c > 0) bot = mid + 1; else top = mid;
00161   }
00162 
00163 return PCRE_ERROR_NOSUBSTRING;
00164 }
00165 
00166 
00167 
00168 /*************************************************
00169 *    Find first set of multiple named strings    *
00170 *************************************************/
00171 
00172 /* This function allows for duplicate names in the table of named substrings.
00173 It returns the number of the first one that was set in a pattern match.
00174 
00175 Arguments:
00176   code         the compiled regex
00177   stringname   the name of the capturing substring
00178   ovector      the vector of matched substrings
00179 
00180 Returns:       the number of the first that is set,
00181                or the number of the last one if none are set,
00182                or a negative number on error
00183 */
00184 
00185 static int
00186 get_first_set(const pcre *code, const char *stringname, int *ovector)
00187 {
00188 const real_pcre *re = (const real_pcre *)code;
00189 int entrysize;
00190 char *first, *last;
00191 uschar *entry;
00192 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
00193   return pcre_get_stringnumber(code, stringname);
00194 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
00195 if (entrysize <= 0) return entrysize;
00196 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
00197   {
00198   int n = (entry[0] << 8) + entry[1];
00199   if (ovector[n*2] >= 0) return n;
00200   }
00201 return (first[0] << 8) + first[1];
00202 }
00203 
00204 
00205 
00206 
00207 /*************************************************
00208 *      Copy captured string to given buffer      *
00209 *************************************************/
00210 
00211 /* This function copies a single captured substring into a given buffer.
00212 Note that we use memcpy() rather than strncpy() in case there are binary zeros
00213 in the string.
00214 
00215 Arguments:
00216   subject        the subject string that was matched
00217   ovector        pointer to the offsets table
00218   stringcount    the number of substrings that were captured
00219                    (i.e. the yield of the pcre_exec call, unless
00220                    that was zero, in which case it should be 1/3
00221                    of the offset table size)
00222   stringnumber   the number of the required substring
00223   buffer         where to put the substring
00224   size           the size of the buffer
00225 
00226 Returns:         if successful:
00227                    the length of the copied string, not including the zero
00228                    that is put on the end; can be zero
00229                  if not successful:
00230                    PCRE_ERROR_NOMEMORY (-6) buffer too small
00231                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
00232 */
00233 
00234 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00235 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
00236   int stringnumber, char *buffer, int size)
00237 {
00238 int yield;
00239 if (stringnumber < 0 || stringnumber >= stringcount)
00240   return PCRE_ERROR_NOSUBSTRING;
00241 stringnumber *= 2;
00242 yield = ovector[stringnumber+1] - ovector[stringnumber];
00243 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
00244 memcpy(buffer, subject + ovector[stringnumber], yield);
00245 buffer[yield] = 0;
00246 return yield;
00247 }
00248 
00249 
00250 
00251 /*************************************************
00252 *   Copy named captured string to given buffer   *
00253 *************************************************/
00254 
00255 /* This function copies a single captured substring into a given buffer,
00256 identifying it by name. If the regex permits duplicate names, the first
00257 substring that is set is chosen.
00258 
00259 Arguments:
00260   code           the compiled regex
00261   subject        the subject string that was matched
00262   ovector        pointer to the offsets table
00263   stringcount    the number of substrings that were captured
00264                    (i.e. the yield of the pcre_exec call, unless
00265                    that was zero, in which case it should be 1/3
00266                    of the offset table size)
00267   stringname     the name of the required substring
00268   buffer         where to put the substring
00269   size           the size of the buffer
00270 
00271 Returns:         if successful:
00272                    the length of the copied string, not including the zero
00273                    that is put on the end; can be zero
00274                  if not successful:
00275                    PCRE_ERROR_NOMEMORY (-6) buffer too small
00276                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
00277 */
00278 
00279 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00280 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
00281   int stringcount, const char *stringname, char *buffer, int size)
00282 {
00283 int n = get_first_set(code, stringname, ovector);
00284 if (n <= 0) return n;
00285 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
00286 }
00287 
00288 
00289 
00290 /*************************************************
00291 *      Copy all captured strings to new store    *
00292 *************************************************/
00293 
00294 /* This function gets one chunk of store and builds a list of pointers and all
00295 of the captured substrings in it. A NULL pointer is put on the end of the list.
00296 
00297 Arguments:
00298   subject        the subject string that was matched
00299   ovector        pointer to the offsets table
00300   stringcount    the number of substrings that were captured
00301                    (i.e. the yield of the pcre_exec call, unless
00302                    that was zero, in which case it should be 1/3
00303                    of the offset table size)
00304   listptr        set to point to the list of pointers
00305 
00306 Returns:         if successful: 0
00307                  if not successful:
00308                    PCRE_ERROR_NOMEMORY (-6) failed to get store
00309 */
00310 
00311 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00312 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
00313   const char ***listptr)
00314 {
00315 int i;
00316 int size = sizeof(char *);
00317 int double_count = stringcount * 2;
00318 char **stringlist;
00319 char *p;
00320 
00321 for (i = 0; i < double_count; i += 2)
00322   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
00323 
00324 stringlist = (char **)(pcre_malloc)(size);
00325 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
00326 
00327 *listptr = (const char **)stringlist;
00328 p = (char *)(stringlist + stringcount + 1);
00329 
00330 for (i = 0; i < double_count; i += 2)
00331   {
00332   int len = ovector[i+1] - ovector[i];
00333   memcpy(p, subject + ovector[i], len);
00334   *stringlist++ = p;
00335   p += len;
00336   *p++ = 0;
00337   }
00338 
00339 *stringlist = NULL;
00340 return 0;
00341 }
00342 
00343 
00344 
00345 /*************************************************
00346 *   Free store obtained by get_substring_list    *
00347 *************************************************/
00348 
00349 /* This function exists for the benefit of people calling PCRE from non-C
00350 programs that can call its functions, but not free() or (pcre_free)() directly.
00351 
00352 Argument:   the result of a previous pcre_get_substring_list()
00353 Returns:    nothing
00354 */
00355 
00356 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
00357 pcre_free_substring_list(const char **pointer)
00358 {
00359 (pcre_free)((void *)pointer);
00360 }
00361 
00362 
00363 
00364 /*************************************************
00365 *      Copy captured string to new store         *
00366 *************************************************/
00367 
00368 /* This function copies a single captured substring into a piece of new
00369 store
00370 
00371 Arguments:
00372   subject        the subject string that was matched
00373   ovector        pointer to the offsets table
00374   stringcount    the number of substrings that were captured
00375                    (i.e. the yield of the pcre_exec call, unless
00376                    that was zero, in which case it should be 1/3
00377                    of the offset table size)
00378   stringnumber   the number of the required substring
00379   stringptr      where to put a pointer to the substring
00380 
00381 Returns:         if successful:
00382                    the length of the string, not including the zero that
00383                    is put on the end; can be zero
00384                  if not successful:
00385                    PCRE_ERROR_NOMEMORY (-6) failed to get store
00386                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
00387 */
00388 
00389 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00390 pcre_get_substring(const char *subject, int *ovector, int stringcount,
00391   int stringnumber, const char **stringptr)
00392 {
00393 int yield;
00394 char *substring;
00395 if (stringnumber < 0 || stringnumber >= stringcount)
00396   return PCRE_ERROR_NOSUBSTRING;
00397 stringnumber *= 2;
00398 yield = ovector[stringnumber+1] - ovector[stringnumber];
00399 substring = (char *)(pcre_malloc)(yield + 1);
00400 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
00401 memcpy(substring, subject + ovector[stringnumber], yield);
00402 substring[yield] = 0;
00403 *stringptr = substring;
00404 return yield;
00405 }
00406 
00407 
00408 
00409 /*************************************************
00410 *   Copy named captured string to new store      *
00411 *************************************************/
00412 
00413 /* This function copies a single captured substring, identified by name, into
00414 new store. If the regex permits duplicate names, the first substring that is
00415 set is chosen.
00416 
00417 Arguments:
00418   code           the compiled regex
00419   subject        the subject string that was matched
00420   ovector        pointer to the offsets table
00421   stringcount    the number of substrings that were captured
00422                    (i.e. the yield of the pcre_exec call, unless
00423                    that was zero, in which case it should be 1/3
00424                    of the offset table size)
00425   stringname     the name of the required substring
00426   stringptr      where to put the pointer
00427 
00428 Returns:         if successful:
00429                    the length of the copied string, not including the zero
00430                    that is put on the end; can be zero
00431                  if not successful:
00432                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
00433                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
00434 */
00435 
00436 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
00437 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
00438   int stringcount, const char *stringname, const char **stringptr)
00439 {
00440 int n = get_first_set(code, stringname, ovector);
00441 if (n <= 0) return n;
00442 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
00443 }
00444 
00445 
00446 
00447 
00448 /*************************************************
00449 *       Free store obtained by get_substring     *
00450 *************************************************/
00451 
00452 /* This function exists for the benefit of people calling PCRE from non-C
00453 programs that can call its functions, but not free() or (pcre_free)() directly.
00454 
00455 Argument:   the result of a previous pcre_get_substring()
00456 Returns:    nothing
00457 */
00458 
00459 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
00460 pcre_free_substring(const char *pointer)
00461 {
00462 (pcre_free)((void *)pointer);
00463 }
00464 
00465 /* End of pcre_get.c */

Generated on Tue Jul 5 14:11:57 2011 for ROOT_528-00b_version by  doxygen 1.5.1