00001 /************************************************* 00002 * Perl-Compatible Regular Expressions * 00003 *************************************************/ 00004 00005 /* PCRE is a library of functions to support regular expressions whose syntax 00006 and semantics are as close as possible to those of the Perl 5 language. 00007 00008 Written by Philip Hazel 00009 Copyright (c) 1997-2008 University of Cambridge 00010 00011 ----------------------------------------------------------------------------- 00012 Redistribution and use in source and binary forms, with or without 00013 modification, are permitted provided that the following conditions are met: 00014 00015 * Redistributions of source code must retain the above copyright notice, 00016 this list of conditions and the following disclaimer. 00017 00018 * Redistributions in binary form must reproduce the above copyright 00019 notice, this list of conditions and the following disclaimer in the 00020 documentation and/or other materials provided with the distribution. 00021 00022 * Neither the name of the University of Cambridge nor the names of its 00023 contributors may be used to endorse or promote products derived from 00024 this software without specific prior written permission. 00025 00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00036 POSSIBILITY OF SUCH DAMAGE. 00037 ----------------------------------------------------------------------------- 00038 */ 00039 00040 00041 /* This module contains an internal function that is used to match an extended 00042 class (one that contains characters whose values are > 255). It is used by both 00043 pcre_exec() and pcre_def_exec(). */ 00044 00045 00046 #ifdef HAVE_CONFIG_H 00047 #include "config.h" 00048 #endif 00049 00050 #include "pcre_internal.h" 00051 00052 00053 /************************************************* 00054 * Match character against an XCLASS * 00055 *************************************************/ 00056 00057 /* This function is called to match a character against an extended class that 00058 might contain values > 255. 00059 00060 Arguments: 00061 c the character 00062 data points to the flag byte of the XCLASS data 00063 00064 Returns: TRUE if character matches, else FALSE 00065 */ 00066 00067 BOOL 00068 _pcre_xclass(int c, const uschar *data) 00069 { 00070 int t; 00071 BOOL negated = (*data & XCL_NOT) != 0; 00072 00073 /* Character values < 256 are matched against a bitmap, if one is present. If 00074 not, we still carry on, because there may be ranges that start below 256 in the 00075 additional data. */ 00076 00077 if (c < 256) 00078 { 00079 if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0) 00080 return !negated; /* char found */ 00081 } 00082 00083 /* First skip the bit map if present. Then match against the list of Unicode 00084 properties or large chars or ranges that end with a large char. We won't ever 00085 encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ 00086 00087 if ((*data++ & XCL_MAP) != 0) data += 32; 00088 00089 while ((t = *data++) != XCL_END) 00090 { 00091 int x, y; 00092 if (t == XCL_SINGLE) 00093 { 00094 GETCHARINC(x, data); 00095 if (c == x) return !negated; 00096 } 00097 else if (t == XCL_RANGE) 00098 { 00099 GETCHARINC(x, data); 00100 GETCHARINC(y, data); 00101 if (c >= x && c <= y) return !negated; 00102 } 00103 00104 #ifdef SUPPORT_UCP 00105 else /* XCL_PROP & XCL_NOTPROP */ 00106 { 00107 const ucd_record * prop = GET_UCD(c); 00108 00109 switch(*data) 00110 { 00111 case PT_ANY: 00112 if (t == XCL_PROP) return !negated; 00113 break; 00114 00115 case PT_LAMP: 00116 if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt) == 00117 (t == XCL_PROP)) return !negated; 00118 break; 00119 00120 case PT_GC: 00121 if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP)) return !negated; 00122 break; 00123 00124 case PT_PC: 00125 if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated; 00126 break; 00127 00128 case PT_SC: 00129 if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated; 00130 break; 00131 00132 /* This should never occur, but compilers may mutter if there is no 00133 default. */ 00134 00135 default: 00136 return FALSE; 00137 } 00138 00139 data += 2; 00140 } 00141 #endif /* SUPPORT_UCP */ 00142 } 00143 00144 return negated; /* char did not match */ 00145 } 00146 00147 /* End of pcre_xclass.c */