00001 /************************************************* 00002 * PCRE DEMONSTRATION PROGRAM * 00003 *************************************************/ 00004 00005 /* This is a demonstration program to illustrate the most straightforward ways 00006 of calling the PCRE regular expression library from a C program. See the 00007 pcresample documentation for a short discussion ("man pcresample" if you have 00008 the PCRE man pages installed). 00009 00010 In Unix-like environments, compile this program thuswise: 00011 00012 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \ 00013 -R/usr/local/lib -lpcre 00014 00015 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and 00016 library files for PCRE are installed on your system. You don't need -I and -L 00017 if PCRE is installed in the standard system libraries. Only some operating 00018 systems (e.g. Solaris) use the -R option. 00019 00020 Building under Windows: 00021 00022 If you want to statically link this program against a non-dll .a file, you must 00023 define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and 00024 pcre_free() exported functions will be declared __declspec(dllimport), with 00025 unwanted results. So in this environment, uncomment the following line. */ 00026 00027 /* #define PCRE_STATIC */ 00028 00029 #include <stdio.h> 00030 #include <string.h> 00031 #include <pcre.h> 00032 00033 #define OVECCOUNT 30 /* should be a multiple of 3 */ 00034 00035 00036 int main(int argc, char **argv) 00037 { 00038 pcre *re; 00039 const char *error; 00040 char *pattern; 00041 char *subject; 00042 unsigned char *name_table; 00043 int erroffset; 00044 int find_all; 00045 int namecount; 00046 int name_entry_size; 00047 int ovector[OVECCOUNT]; 00048 int subject_length; 00049 int rc, i; 00050 00051 00052 /************************************************************************** 00053 * First, sort out the command line. There is only one possible option at * 00054 * the moment, "-g" to request repeated matching to find all occurrences, * 00055 * like Perl's /g option. We set the variable find_all to a non-zero value * 00056 * if the -g option is present. Apart from that, there must be exactly two * 00057 * arguments. * 00058 **************************************************************************/ 00059 00060 find_all = 0; 00061 for (i = 1; i < argc; i++) 00062 { 00063 if (strcmp(argv[i], "-g") == 0) find_all = 1; 00064 else break; 00065 } 00066 00067 /* After the options, we require exactly two arguments, which are the pattern, 00068 and the subject string. */ 00069 00070 if (argc - i != 2) 00071 { 00072 printf("Two arguments required: a regex and a subject string\n"); 00073 return 1; 00074 } 00075 00076 pattern = argv[i]; 00077 subject = argv[i+1]; 00078 subject_length = (int)strlen(subject); 00079 00080 00081 /************************************************************************* 00082 * Now we are going to compile the regular expression pattern, and handle * 00083 * and errors that are detected. * 00084 *************************************************************************/ 00085 00086 re = pcre_compile( 00087 pattern, /* the pattern */ 00088 0, /* default options */ 00089 &error, /* for error message */ 00090 &erroffset, /* for error offset */ 00091 NULL); /* use default character tables */ 00092 00093 /* Compilation failed: print the error message and exit */ 00094 00095 if (re == NULL) 00096 { 00097 printf("PCRE compilation failed at offset %d: %s\n", erroffset, error); 00098 return 1; 00099 } 00100 00101 00102 /************************************************************************* 00103 * If the compilation succeeded, we call PCRE again, in order to do a * 00104 * pattern match against the subject string. This does just ONE match. If * 00105 * further matching is needed, it will be done below. * 00106 *************************************************************************/ 00107 00108 rc = pcre_exec( 00109 re, /* the compiled pattern */ 00110 NULL, /* no extra data - we didn't study the pattern */ 00111 subject, /* the subject string */ 00112 subject_length, /* the length of the subject */ 00113 0, /* start at offset 0 in the subject */ 00114 0, /* default options */ 00115 ovector, /* output vector for substring information */ 00116 OVECCOUNT); /* number of elements in the output vector */ 00117 00118 /* Matching failed: handle error cases */ 00119 00120 if (rc < 0) 00121 { 00122 switch(rc) 00123 { 00124 case PCRE_ERROR_NOMATCH: printf("No match\n"); break; 00125 /* 00126 Handle other special cases if you like 00127 */ 00128 default: printf("Matching error %d\n", rc); break; 00129 } 00130 pcre_free(re); /* Release memory used for the compiled pattern */ 00131 return 1; 00132 } 00133 00134 /* Match succeded */ 00135 00136 printf("\nMatch succeeded at offset %d\n", ovector[0]); 00137 00138 00139 /************************************************************************* 00140 * We have found the first match within the subject string. If the output * 00141 * vector wasn't big enough, say so. Then output any substrings that were * 00142 * captured. * 00143 *************************************************************************/ 00144 00145 /* The output vector wasn't big enough */ 00146 00147 if (rc == 0) 00148 { 00149 rc = OVECCOUNT/3; 00150 printf("ovector only has room for %d captured substrings\n", rc - 1); 00151 } 00152 00153 /* Show substrings stored in the output vector by number. Obviously, in a real 00154 application you might want to do things other than print them. */ 00155 00156 for (i = 0; i < rc; i++) 00157 { 00158 char *substring_start = subject + ovector[2*i]; 00159 int substring_length = ovector[2*i+1] - ovector[2*i]; 00160 printf("%2d: %.*s\n", i, substring_length, substring_start); 00161 } 00162 00163 00164 /************************************************************************** 00165 * That concludes the basic part of this demonstration program. We have * 00166 * compiled a pattern, and performed a single match. The code that follows * 00167 * shows first how to access named substrings, and then how to code for * 00168 * repeated matches on the same subject. * 00169 **************************************************************************/ 00170 00171 /* See if there are any named substrings, and if so, show them by name. First 00172 we have to extract the count of named parentheses from the pattern. */ 00173 00174 (void)pcre_fullinfo( 00175 re, /* the compiled pattern */ 00176 NULL, /* no extra data - we didn't study the pattern */ 00177 PCRE_INFO_NAMECOUNT, /* number of named substrings */ 00178 &namecount); /* where to put the answer */ 00179 00180 if (namecount <= 0) printf("No named substrings\n"); else 00181 { 00182 unsigned char *tabptr; 00183 printf("Named substrings\n"); 00184 00185 /* Before we can access the substrings, we must extract the table for 00186 translating names to numbers, and the size of each entry in the table. */ 00187 00188 (void)pcre_fullinfo( 00189 re, /* the compiled pattern */ 00190 NULL, /* no extra data - we didn't study the pattern */ 00191 PCRE_INFO_NAMETABLE, /* address of the table */ 00192 &name_table); /* where to put the answer */ 00193 00194 (void)pcre_fullinfo( 00195 re, /* the compiled pattern */ 00196 NULL, /* no extra data - we didn't study the pattern */ 00197 PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ 00198 &name_entry_size); /* where to put the answer */ 00199 00200 /* Now we can scan the table and, for each entry, print the number, the name, 00201 and the substring itself. */ 00202 00203 tabptr = name_table; 00204 for (i = 0; i < namecount; i++) 00205 { 00206 int n = (tabptr[0] << 8) | tabptr[1]; 00207 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2, 00208 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]); 00209 tabptr += name_entry_size; 00210 } 00211 } 00212 00213 00214 /************************************************************************* 00215 * If the "-g" option was given on the command line, we want to continue * 00216 * to search for additional matches in the subject string, in a similar * 00217 * way to the /g option in Perl. This turns out to be trickier than you * 00218 * might think because of the possibility of matching an empty string. * 00219 * What happens is as follows: * 00220 * * 00221 * If the previous match was NOT for an empty string, we can just start * 00222 * the next match at the end of the previous one. * 00223 * * 00224 * If the previous match WAS for an empty string, we can't do that, as it * 00225 * would lead to an infinite loop. Instead, a special call of pcre_exec() * 00226 * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first * 00227 * of these tells PCRE that an empty string is not a valid match; other * 00228 * possibilities must be tried. The second flag restricts PCRE to one * 00229 * match attempt at the initial string position. If this match succeeds, * 00230 * an alternative to the empty string match has been found, and we can * 00231 * proceed round the loop. * 00232 *************************************************************************/ 00233 00234 if (!find_all) 00235 { 00236 pcre_free(re); /* Release the memory used for the compiled pattern */ 00237 return 0; /* Finish unless -g was given */ 00238 } 00239 00240 /* Loop for second and subsequent matches */ 00241 00242 for (;;) 00243 { 00244 int options = 0; /* Normally no options */ 00245 int start_offset = ovector[1]; /* Start at end of previous match */ 00246 00247 /* If the previous match was for an empty string, we are finished if we are 00248 at the end of the subject. Otherwise, arrange to run another match at the 00249 same point to see if a non-empty match can be found. */ 00250 00251 if (ovector[0] == ovector[1]) 00252 { 00253 if (ovector[0] == subject_length) break; 00254 options = PCRE_NOTEMPTY | PCRE_ANCHORED; 00255 } 00256 00257 /* Run the next matching operation */ 00258 00259 rc = pcre_exec( 00260 re, /* the compiled pattern */ 00261 NULL, /* no extra data - we didn't study the pattern */ 00262 subject, /* the subject string */ 00263 subject_length, /* the length of the subject */ 00264 start_offset, /* starting offset in the subject */ 00265 options, /* options */ 00266 ovector, /* output vector for substring information */ 00267 OVECCOUNT); /* number of elements in the output vector */ 00268 00269 /* This time, a result of NOMATCH isn't an error. If the value in "options" 00270 is zero, it just means we have found all possible matches, so the loop ends. 00271 Otherwise, it means we have failed to find a non-empty-string match at a 00272 point where there was a previous empty-string match. In this case, we do what 00273 Perl does: advance the matching position by one, and continue. We do this by 00274 setting the "end of previous match" offset, because that is picked up at the 00275 top of the loop as the point at which to start again. */ 00276 00277 if (rc == PCRE_ERROR_NOMATCH) 00278 { 00279 if (options == 0) break; 00280 ovector[1] = start_offset + 1; 00281 continue; /* Go round the loop again */ 00282 } 00283 00284 /* Other matching errors are not recoverable. */ 00285 00286 if (rc < 0) 00287 { 00288 printf("Matching error %d\n", rc); 00289 pcre_free(re); /* Release memory used for the compiled pattern */ 00290 return 1; 00291 } 00292 00293 /* Match succeded */ 00294 00295 printf("\nMatch succeeded again at offset %d\n", ovector[0]); 00296 00297 /* The match succeeded, but the output vector wasn't big enough. */ 00298 00299 if (rc == 0) 00300 { 00301 rc = OVECCOUNT/3; 00302 printf("ovector only has room for %d captured substrings\n", rc - 1); 00303 } 00304 00305 /* As before, show substrings stored in the output vector by number, and then 00306 also any named substrings. */ 00307 00308 for (i = 0; i < rc; i++) 00309 { 00310 char *substring_start = subject + ovector[2*i]; 00311 int substring_length = ovector[2*i+1] - ovector[2*i]; 00312 printf("%2d: %.*s\n", i, substring_length, substring_start); 00313 } 00314 00315 if (namecount <= 0) printf("No named substrings\n"); else 00316 { 00317 unsigned char *tabptr = name_table; 00318 printf("Named substrings\n"); 00319 for (i = 0; i < namecount; i++) 00320 { 00321 int n = (tabptr[0] << 8) | tabptr[1]; 00322 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2, 00323 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]); 00324 tabptr += name_entry_size; 00325 } 00326 } 00327 } /* End of loop to find second and subsequent matches */ 00328 00329 printf("\n"); 00330 pcre_free(re); /* Release memory used for the compiled pattern */ 00331 return 0; 00332 } 00333 00334 /* End of pcredemo.c */