00001 /************************************************* 00002 * Perl-Compatible Regular Expressions * 00003 *************************************************/ 00004 00005 /* PCRE is a library of functions to support regular expressions whose syntax 00006 and semantics are as close as possible to those of the Perl 5 language. 00007 00008 Written by Philip Hazel 00009 Copyright (c) 1997-2008 University of Cambridge 00010 00011 ----------------------------------------------------------------------------- 00012 Redistribution and use in source and binary forms, with or without 00013 modification, are permitted provided that the following conditions are met: 00014 00015 * Redistributions of source code must retain the above copyright notice, 00016 this list of conditions and the following disclaimer. 00017 00018 * Redistributions in binary form must reproduce the above copyright 00019 notice, this list of conditions and the following disclaimer in the 00020 documentation and/or other materials provided with the distribution. 00021 00022 * Neither the name of the University of Cambridge nor the names of its 00023 contributors may be used to endorse or promote products derived from 00024 this software without specific prior written permission. 00025 00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00036 POSSIBILITY OF SUCH DAMAGE. 00037 ----------------------------------------------------------------------------- 00038 */ 00039 00040 00041 /* This module contains the external function pcre_maketables(), which builds 00042 character tables for PCRE in the current locale. The file is compiled on its 00043 own as part of the PCRE library. However, it is also included in the 00044 compilation of dftables.c, in which case the macro DFTABLES is defined. */ 00045 00046 00047 #ifndef DFTABLES 00048 # ifdef HAVE_CONFIG_H 00049 # include "config.h" 00050 # endif 00051 # include "pcre_internal.h" 00052 #endif 00053 00054 00055 /************************************************* 00056 * Create PCRE character tables * 00057 *************************************************/ 00058 00059 /* This function builds a set of character tables for use by PCRE and returns 00060 a pointer to them. They are build using the ctype functions, and consequently 00061 their contents will depend upon the current locale setting. When compiled as 00062 part of the library, the store is obtained via pcre_malloc(), but when compiled 00063 inside dftables, use malloc(). 00064 00065 Arguments: none 00066 Returns: pointer to the contiguous block of data 00067 */ 00068 00069 const unsigned char * 00070 pcre_maketables(void) 00071 { 00072 unsigned char *yield, *p; 00073 int i; 00074 00075 #ifndef DFTABLES 00076 yield = (unsigned char*)(pcre_malloc)(tables_length); 00077 #else 00078 yield = (unsigned char*)malloc(tables_length); 00079 #endif 00080 00081 if (yield == NULL) return NULL; 00082 p = yield; 00083 00084 /* First comes the lower casing table */ 00085 00086 for (i = 0; i < 256; i++) *p++ = tolower(i); 00087 00088 /* Next the case-flipping table */ 00089 00090 for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); 00091 00092 /* Then the character class tables. Don't try to be clever and save effort on 00093 exclusive ones - in some locales things may be different. Note that the table 00094 for "space" includes everything "isspace" gives, including VT in the default 00095 locale. This makes it work for the POSIX class [:space:]. Note also that it is 00096 possible for a character to be alnum or alpha without being lower or upper, 00097 such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at 00098 least under Debian Linux's locales as of 12/2005). So we must test for alnum 00099 specially. */ 00100 00101 memset(p, 0, cbit_length); 00102 for (i = 0; i < 256; i++) 00103 { 00104 if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7); 00105 if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7); 00106 if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7); 00107 if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7); 00108 if (i == '_') p[cbit_word + i/8] |= 1 << (i&7); 00109 if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7); 00110 if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7); 00111 if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7); 00112 if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7); 00113 if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7); 00114 if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7); 00115 } 00116 p += cbit_length; 00117 00118 /* Finally, the character type table. In this, we exclude VT from the white 00119 space chars, because Perl doesn't recognize it as such for \s and for comments 00120 within regexes. */ 00121 00122 for (i = 0; i < 256; i++) 00123 { 00124 int x = 0; 00125 if (i != 0x0b && isspace(i)) x += ctype_space; 00126 if (isalpha(i)) x += ctype_letter; 00127 if (isdigit(i)) x += ctype_digit; 00128 if (isxdigit(i)) x += ctype_xdigit; 00129 if (isalnum(i) || i == '_') x += ctype_word; 00130 00131 /* Note: strchr includes the terminating zero in the characters it considers. 00132 In this instance, that is ok because we want binary zero to be flagged as a 00133 meta-character, which in this sense is any character that terminates a run 00134 of data characters. */ 00135 00136 if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta; 00137 *p++ = x; 00138 } 00139 00140 return yield; 00141 } 00142 00143 /* End of pcre_maketables.c */