pcrecpp_unittest.cc

Go to the documentation of this file.
00001 // -*- coding: utf-8 -*-
00002 //
00003 // Copyright (c) 2005 - 2006, Google Inc.
00004 // All rights reserved.
00005 //
00006 // Redistribution and use in source and binary forms, with or without
00007 // modification, are permitted provided that the following conditions are
00008 // met:
00009 //
00010 //     * Redistributions of source code must retain the above copyright
00011 // notice, this list of conditions and the following disclaimer.
00012 //     * Redistributions in binary form must reproduce the above
00013 // copyright notice, this list of conditions and the following disclaimer
00014 // in the documentation and/or other materials provided with the
00015 // distribution.
00016 //     * Neither the name of Google Inc. nor the names of its
00017 // contributors may be used to endorse or promote products derived from
00018 // this software without specific prior written permission.
00019 //
00020 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00021 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00022 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00023 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00024 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00026 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00030 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 //
00032 // Author: Sanjay Ghemawat
00033 //
00034 // TODO: Test extractions for PartialMatch/Consume
00035 
00036 #ifdef HAVE_CONFIG_H
00037 #include "config.h"
00038 #endif
00039 
00040 #include <stdio.h>
00041 #include <cassert>
00042 #include <vector>
00043 #include "pcrecpp.h"
00044 
00045 using pcrecpp::StringPiece;
00046 using pcrecpp::RE;
00047 using pcrecpp::RE_Options;
00048 using pcrecpp::Hex;
00049 using pcrecpp::Octal;
00050 using pcrecpp::CRadix;
00051 
00052 static bool VERBOSE_TEST  = false;
00053 
00054 // CHECK dies with a fatal error if condition is not true.  It is *not*
00055 // controlled by NDEBUG, so the check will be executed regardless of
00056 // compilation mode.  Therefore, it is safe to do things like:
00057 //    CHECK_EQ(fp->Write(x), 4)
00058 #define CHECK(condition) do {                           \
00059   if (!(condition)) {                                   \
00060     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
00061             __FILE__, __LINE__, #condition);            \
00062     exit(1);                                            \
00063   }                                                     \
00064 } while (0)
00065 
00066 #define CHECK_EQ(a, b)   CHECK(a == b)
00067 
00068 static void Timing1(int num_iters) {
00069   // Same pattern lots of times
00070   RE pattern("ruby:\\d+");
00071   StringPiece p("ruby:1234");
00072   for (int j = num_iters; j > 0; j--) {
00073     CHECK(pattern.FullMatch(p));
00074   }
00075 }
00076 
00077 static void Timing2(int num_iters) {
00078   // Same pattern lots of times
00079   RE pattern("ruby:(\\d+)");
00080   int i;
00081   for (int j = num_iters; j > 0; j--) {
00082     CHECK(pattern.FullMatch("ruby:1234", &i));
00083     CHECK_EQ(i, 1234);
00084   }
00085 }
00086 
00087 static void Timing3(int num_iters) {
00088   string text_string;
00089   for (int j = num_iters; j > 0; j--) {
00090     text_string += "this is another line\n";
00091   }
00092 
00093   RE line_matcher(".*\n");
00094   string line;
00095   StringPiece text(text_string);
00096   int counter = 0;
00097   while (line_matcher.Consume(&text)) {
00098     counter++;
00099   }
00100   printf("Matched %d lines\n", counter);
00101 }
00102 
00103 #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
00104 
00105 static void LeakTest() {
00106   // Check for memory leaks
00107   unsigned long long initial_size = 0;
00108   for (int i = 0; i < 100000; i++) {
00109     if (i == 50000) {
00110       initial_size = VirtualProcessSize();
00111       printf("Size after 50000: %llu\n", initial_size);
00112     }
00113     char buf[100];  // definitely big enough
00114     sprintf(buf, "pat%09d", i);
00115     RE newre(buf);
00116   }
00117   uint64 final_size = VirtualProcessSize();
00118   printf("Size after 100000: %llu\n", final_size);
00119   const double growth = double(final_size - initial_size) / final_size;
00120   printf("Growth: %0.2f%%", growth * 100);
00121   CHECK(growth < 0.02);       // Allow < 2% growth
00122 }
00123 
00124 #endif
00125 
00126 static void RadixTests() {
00127   printf("Testing hex\n");
00128 
00129 #define CHECK_HEX(type, value) \
00130   do { \
00131     type v; \
00132     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
00133     CHECK_EQ(v, 0x ## value); \
00134     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
00135     CHECK_EQ(v, 0x ## value); \
00136   } while(0)
00137 
00138   CHECK_HEX(short,              2bad);
00139   CHECK_HEX(unsigned short,     2badU);
00140   CHECK_HEX(int,                dead);
00141   CHECK_HEX(unsigned int,       deadU);
00142   CHECK_HEX(long,               7eadbeefL);
00143   CHECK_HEX(unsigned long,      deadbeefUL);
00144 #ifdef HAVE_LONG_LONG
00145   CHECK_HEX(long long,          12345678deadbeefLL);
00146 #endif
00147 #ifdef HAVE_UNSIGNED_LONG_LONG
00148   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
00149 #endif
00150 
00151 #undef CHECK_HEX
00152 
00153   printf("Testing octal\n");
00154 
00155 #define CHECK_OCTAL(type, value) \
00156   do { \
00157     type v; \
00158     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
00159     CHECK_EQ(v, 0 ## value); \
00160     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
00161     CHECK_EQ(v, 0 ## value); \
00162   } while(0)
00163 
00164   CHECK_OCTAL(short,              77777);
00165   CHECK_OCTAL(unsigned short,     177777U);
00166   CHECK_OCTAL(int,                17777777777);
00167   CHECK_OCTAL(unsigned int,       37777777777U);
00168   CHECK_OCTAL(long,               17777777777L);
00169   CHECK_OCTAL(unsigned long,      37777777777UL);
00170 #ifdef HAVE_LONG_LONG
00171   CHECK_OCTAL(long long,          777777777777777777777LL);
00172 #endif
00173 #ifdef HAVE_UNSIGNED_LONG_LONG
00174   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
00175 #endif
00176 
00177 #undef CHECK_OCTAL
00178 
00179   printf("Testing decimal\n");
00180 
00181 #define CHECK_DECIMAL(type, value) \
00182   do { \
00183     type v; \
00184     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
00185     CHECK_EQ(v, value); \
00186     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
00187     CHECK_EQ(v, value); \
00188   } while(0)
00189 
00190   CHECK_DECIMAL(short,              -1);
00191   CHECK_DECIMAL(unsigned short,     9999);
00192   CHECK_DECIMAL(int,                -1000);
00193   CHECK_DECIMAL(unsigned int,       12345U);
00194   CHECK_DECIMAL(long,               -10000000L);
00195   CHECK_DECIMAL(unsigned long,      3083324652U);
00196 #ifdef HAVE_LONG_LONG
00197   CHECK_DECIMAL(long long,          -100000000000000LL);
00198 #endif
00199 #ifdef HAVE_UNSIGNED_LONG_LONG
00200   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
00201 #endif
00202 
00203 #undef CHECK_DECIMAL
00204 
00205 }
00206 
00207 static void TestReplace() {
00208   printf("Testing Replace\n");
00209 
00210   struct ReplaceTest {
00211     const char *regexp;
00212     const char *rewrite;
00213     const char *original;
00214     const char *single;
00215     const char *global;
00216     int global_count;         // the expected return value from ReplaceAll
00217   };
00218   static const ReplaceTest tests[] = {
00219     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
00220       "\\2\\1ay",
00221       "the quick brown fox jumps over the lazy dogs.",
00222       "ethay quick brown fox jumps over the lazy dogs.",
00223       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
00224       9 },
00225     { "\\w+",
00226       "\\0-NOSPAM",
00227       "paul.haahr@google.com",
00228       "paul-NOSPAM.haahr@google.com",
00229       "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
00230       4 },
00231     { "^",
00232       "(START)",
00233       "foo",
00234       "(START)foo",
00235       "(START)foo",
00236       1 },
00237     { "^",
00238       "(START)",
00239       "",
00240       "(START)",
00241       "(START)",
00242       1 },
00243     { "$",
00244       "(END)",
00245       "",
00246       "(END)",
00247       "(END)",
00248       1 },
00249     { "b",
00250       "bb",
00251       "ababababab",
00252       "abbabababab",
00253       "abbabbabbabbabb",
00254        5 },
00255     { "b",
00256       "bb",
00257       "bbbbbb",
00258       "bbbbbbb",
00259       "bbbbbbbbbbbb",
00260       6 },
00261     { "b+",
00262       "bb",
00263       "bbbbbb",
00264       "bb",
00265       "bb",
00266       1 },
00267     { "b*",
00268       "bb",
00269       "bbbbbb",
00270       "bb",
00271       "bb",
00272       1 },
00273     { "b*",
00274       "bb",
00275       "aaaaa",
00276       "bbaaaaa",
00277       "bbabbabbabbabbabb",
00278       6 },
00279     { "b*",
00280       "bb",
00281       "aa\naa\n",
00282       "bbaa\naa\n",
00283       "bbabbabb\nbbabbabb\nbb",
00284       7 },
00285     { "b*",
00286       "bb",
00287       "aa\raa\r",
00288       "bbaa\raa\r",
00289       "bbabbabb\rbbabbabb\rbb",
00290       7 },
00291     { "b*",
00292       "bb",
00293       "aa\r\naa\r\n",
00294       "bbaa\r\naa\r\n",
00295       "bbabbabb\r\nbbabbabb\r\nbb",
00296       7 },
00297 #ifdef SUPPORT_UTF8
00298     { "b*",
00299       "bb",
00300       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
00301       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
00302       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
00303       5 },
00304     { "b*",
00305       "bb",
00306       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
00307       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
00308       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
00309        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
00310       9 },
00311 #endif
00312     { "", NULL, NULL, NULL, NULL, 0 }
00313   };
00314 
00315 #ifdef SUPPORT_UTF8
00316   const bool support_utf8 = true;
00317 #else
00318   const bool support_utf8 = false;
00319 #endif
00320 
00321   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
00322     RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
00323     assert(re.error().empty());
00324     string one(t->original);
00325     CHECK(re.Replace(t->rewrite, &one));
00326     CHECK_EQ(one, t->single);
00327     string all(t->original);
00328     const int replace_count = re.GlobalReplace(t->rewrite, &all);
00329     CHECK_EQ(all, t->global);
00330     CHECK_EQ(replace_count, t->global_count);
00331   }
00332 
00333   // One final test: test \r\n replacement when we're not in CRLF mode
00334   {
00335     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
00336     assert(re.error().empty());
00337     string all("aa\r\naa\r\n");
00338     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
00339     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
00340   }
00341   {
00342     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
00343     assert(re.error().empty());
00344     string all("aa\r\naa\r\n");
00345     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
00346     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
00347   }
00348   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
00349   //       Alas, the answer depends on how pcre was compiled.
00350 }
00351 
00352 static void TestExtract() {
00353   printf("Testing Extract\n");
00354 
00355   string s;
00356 
00357   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
00358   CHECK_EQ(s, "kremvax!boris");
00359 
00360   // check the RE interface as well
00361   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
00362   CHECK_EQ(s, "'foo'");
00363   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
00364   CHECK_EQ(s, "'foo'");
00365 }
00366 
00367 static void TestConsume() {
00368   printf("Testing Consume\n");
00369 
00370   string word;
00371 
00372   string s("   aaa b!@#$@#$cccc");
00373   StringPiece input(s);
00374 
00375   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
00376   CHECK(r.Consume(&input, &word));
00377   CHECK_EQ(word, "aaa");
00378   CHECK(r.Consume(&input, &word));
00379   CHECK_EQ(word, "b");
00380   CHECK(! r.Consume(&input, &word));
00381 }
00382 
00383 static void TestFindAndConsume() {
00384   printf("Testing FindAndConsume\n");
00385 
00386   string word;
00387 
00388   string s("   aaa b!@#$@#$cccc");
00389   StringPiece input(s);
00390 
00391   RE r("(\\w+)");      // matches a word
00392   CHECK(r.FindAndConsume(&input, &word));
00393   CHECK_EQ(word, "aaa");
00394   CHECK(r.FindAndConsume(&input, &word));
00395   CHECK_EQ(word, "b");
00396   CHECK(r.FindAndConsume(&input, &word));
00397   CHECK_EQ(word, "cccc");
00398   CHECK(! r.FindAndConsume(&input, &word));
00399 }
00400 
00401 static void TestMatchNumberPeculiarity() {
00402   printf("Testing match-number peculiaraity\n");
00403 
00404   string word1;
00405   string word2;
00406   string word3;
00407 
00408   RE r("(foo)|(bar)|(baz)");
00409   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
00410   CHECK_EQ(word1, "foo");
00411   CHECK_EQ(word2, "");
00412   CHECK_EQ(word3, "");
00413   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
00414   CHECK_EQ(word1, "");
00415   CHECK_EQ(word2, "bar");
00416   CHECK_EQ(word3, "");
00417   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
00418   CHECK_EQ(word1, "");
00419   CHECK_EQ(word2, "");
00420   CHECK_EQ(word3, "baz");
00421   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
00422 
00423   string a;
00424   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
00425   CHECK_EQ(a, "");
00426 }
00427 
00428 static void TestRecursion() {
00429   printf("Testing recursion\n");
00430 
00431   // Get one string that passes (sometimes), one that never does.
00432   string text_good("abcdefghijk");
00433   string text_bad("acdefghijkl");
00434 
00435   // According to pcretest, matching text_good against (\w+)*b
00436   // requires match_limit of at least 8192, and match_recursion_limit
00437   // of at least 37.
00438 
00439   RE_Options options_ml;
00440   options_ml.set_match_limit(8192);
00441   RE re("(\\w+)*b", options_ml);
00442   CHECK(re.PartialMatch(text_good) == true);
00443   CHECK(re.PartialMatch(text_bad) == false);
00444   CHECK(re.FullMatch(text_good) == false);
00445   CHECK(re.FullMatch(text_bad) == false);
00446 
00447   options_ml.set_match_limit(1024);
00448   RE re2("(\\w+)*b", options_ml);
00449   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
00450   CHECK(re2.PartialMatch(text_bad) == false);
00451   CHECK(re2.FullMatch(text_good) == false);
00452   CHECK(re2.FullMatch(text_bad) == false);
00453 
00454   RE_Options options_mlr;
00455   options_mlr.set_match_limit_recursion(50);
00456   RE re3("(\\w+)*b", options_mlr);
00457   CHECK(re3.PartialMatch(text_good) == true);
00458   CHECK(re3.PartialMatch(text_bad) == false);
00459   CHECK(re3.FullMatch(text_good) == false);
00460   CHECK(re3.FullMatch(text_bad) == false);
00461 
00462   options_mlr.set_match_limit_recursion(10);
00463   RE re4("(\\w+)*b", options_mlr);
00464   CHECK(re4.PartialMatch(text_good) == false);
00465   CHECK(re4.PartialMatch(text_bad) == false);
00466   CHECK(re4.FullMatch(text_good) == false);
00467   CHECK(re4.FullMatch(text_bad) == false);
00468 }
00469 
00470 // A meta-quoted string, interpreted as a pattern, should always match
00471 // the original unquoted string.
00472 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
00473   string quoted = RE::QuoteMeta(unquoted);
00474   RE re(quoted, options);
00475   CHECK(re.FullMatch(unquoted));
00476 }
00477 
00478 // A string containing meaningful regexp characters, which is then meta-
00479 // quoted, should not generally match a string the unquoted string does.
00480 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
00481                                   RE_Options options = RE_Options()) {
00482   string quoted = RE::QuoteMeta(unquoted);
00483   RE re(quoted, options);
00484   CHECK(!re.FullMatch(should_not_match));
00485 }
00486 
00487 // Tests that quoted meta characters match their original strings,
00488 // and that a few things that shouldn't match indeed do not.
00489 static void TestQuotaMetaSimple() {
00490   TestQuoteMeta("foo");
00491   TestQuoteMeta("foo.bar");
00492   TestQuoteMeta("foo\\.bar");
00493   TestQuoteMeta("[1-9]");
00494   TestQuoteMeta("1.5-2.0?");
00495   TestQuoteMeta("\\d");
00496   TestQuoteMeta("Who doesn't like ice cream?");
00497   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
00498   TestQuoteMeta("((?!)xxx).*yyy");
00499   TestQuoteMeta("([");
00500   TestQuoteMeta(string("foo\0bar", 7));
00501 }
00502 
00503 static void TestQuoteMetaSimpleNegative() {
00504   NegativeTestQuoteMeta("foo", "bar");
00505   NegativeTestQuoteMeta("...", "bar");
00506   NegativeTestQuoteMeta("\\.", ".");
00507   NegativeTestQuoteMeta("\\.", "..");
00508   NegativeTestQuoteMeta("(a)", "a");
00509   NegativeTestQuoteMeta("(a|b)", "a");
00510   NegativeTestQuoteMeta("(a|b)", "(a)");
00511   NegativeTestQuoteMeta("(a|b)", "a|b");
00512   NegativeTestQuoteMeta("[0-9]", "0");
00513   NegativeTestQuoteMeta("[0-9]", "0-9");
00514   NegativeTestQuoteMeta("[0-9]", "[9]");
00515   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
00516 }
00517 
00518 static void TestQuoteMetaLatin1() {
00519   TestQuoteMeta("3\xb2 = 9");
00520 }
00521 
00522 static void TestQuoteMetaUtf8() {
00523 #ifdef SUPPORT_UTF8
00524   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
00525   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
00526   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
00527   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
00528   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
00529   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
00530   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
00531   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
00532                         "27\\\xc2\\\xb0",
00533                         pcrecpp::UTF8());
00534 #endif
00535 }
00536 
00537 static void TestQuoteMetaAll() {
00538   printf("Testing QuoteMeta\n");
00539   TestQuotaMetaSimple();
00540   TestQuoteMetaSimpleNegative();
00541   TestQuoteMetaLatin1();
00542   TestQuoteMetaUtf8();
00543 }
00544 
00545 //
00546 // Options tests contributed by
00547 // Giuseppe Maxia, CTO, Stardata s.r.l.
00548 // July 2005
00549 //
00550 static void GetOneOptionResult(
00551                 const char *option_name,
00552                 const char *regex,
00553                 const char *str,
00554                 RE_Options options,
00555                 bool full,
00556                 string expected) {
00557 
00558   printf("Testing Option <%s>\n", option_name);
00559   if(VERBOSE_TEST)
00560     printf("/%s/ finds \"%s\" within \"%s\" \n",
00561                     regex,
00562                     expected.c_str(),
00563                     str);
00564   string captured("");
00565   if (full)
00566     RE(regex,options).FullMatch(str, &captured);
00567   else
00568     RE(regex,options).PartialMatch(str, &captured);
00569   CHECK_EQ(captured, expected);
00570 }
00571 
00572 static void TestOneOption(
00573                 const char *option_name,
00574                 const char *regex,
00575                 const char *str,
00576                 RE_Options options,
00577                 bool full,
00578                 bool assertive = true) {
00579 
00580   printf("Testing Option <%s>\n", option_name);
00581   if (VERBOSE_TEST)
00582     printf("'%s' %s /%s/ \n",
00583                   str,
00584                   (assertive? "matches" : "doesn't match"),
00585                   regex);
00586   if (assertive) {
00587     if (full)
00588       CHECK(RE(regex,options).FullMatch(str));
00589     else
00590       CHECK(RE(regex,options).PartialMatch(str));
00591   } else {
00592     if (full)
00593       CHECK(!RE(regex,options).FullMatch(str));
00594     else
00595       CHECK(!RE(regex,options).PartialMatch(str));
00596   }
00597 }
00598 
00599 static void Test_CASELESS() {
00600   RE_Options options;
00601   RE_Options options2;
00602 
00603   options.set_caseless(true);
00604   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
00605   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
00606   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
00607 
00608   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
00609   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
00610   options.set_caseless(false);
00611   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
00612 }
00613 
00614 static void Test_MULTILINE() {
00615   RE_Options options;
00616   RE_Options options2;
00617   const char *str = "HELLO\n" "cruel\n" "world\n";
00618 
00619   options.set_multiline(true);
00620   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
00621   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
00622   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
00623   options.set_multiline(false);
00624   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
00625 }
00626 
00627 static void Test_DOTALL() {
00628   RE_Options options;
00629   RE_Options options2;
00630   const char *str = "HELLO\n" "cruel\n" "world";
00631 
00632   options.set_dotall(true);
00633   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
00634   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
00635   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
00636   options.set_dotall(false);
00637   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
00638 }
00639 
00640 static void Test_DOLLAR_ENDONLY() {
00641   RE_Options options;
00642   RE_Options options2;
00643   const char *str = "HELLO world\n";
00644 
00645   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
00646   options.set_dollar_endonly(true);
00647   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
00648   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
00649 }
00650 
00651 static void Test_EXTRA() {
00652   RE_Options options;
00653   const char *str = "HELLO";
00654 
00655   options.set_extra(true);
00656   TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
00657   TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
00658   options.set_extra(false);
00659   TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
00660 }
00661 
00662 static void Test_EXTENDED() {
00663   RE_Options options;
00664   RE_Options options2;
00665   const char *str = "HELLO world";
00666 
00667   options.set_extended(true);
00668   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
00669   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
00670   TestOneOption("EXTENDED (class)",
00671                     "^ HE L{2} O "
00672                     "\\s+        "
00673                     "\\w+ $      ",
00674                     str,
00675                     options,
00676                     false);
00677 
00678   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
00679   TestOneOption("EXTENDED (function)",
00680                     "^ HE L{2} O "
00681                     "\\s+        "
00682                     "\\w+ $      ",
00683                     str,
00684                     pcrecpp::EXTENDED(),
00685                     false);
00686 
00687   options.set_extended(false);
00688   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
00689 }
00690 
00691 static void Test_NO_AUTO_CAPTURE() {
00692   RE_Options options;
00693   const char *str = "HELLO world";
00694   string captured;
00695 
00696   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
00697   if (VERBOSE_TEST)
00698     printf("parentheses capture text\n");
00699   RE re("(world|universe)$", options);
00700   CHECK(re.Extract("\\1", str , &captured));
00701   CHECK_EQ(captured, "world");
00702   options.set_no_auto_capture(true);
00703   printf("testing Option <NO_AUTO_CAPTURE>\n");
00704   if (VERBOSE_TEST)
00705     printf("parentheses do not capture text\n");
00706   re.Extract("\\1",str, &captured );
00707   CHECK_EQ(captured, "world");
00708 }
00709 
00710 static void Test_UNGREEDY() {
00711   RE_Options options;
00712   const char *str = "HELLO, 'this' is the 'world'";
00713 
00714   options.set_ungreedy(true);
00715   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
00716   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
00717   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
00718 
00719   options.set_ungreedy(false);
00720   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
00721   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
00722 }
00723 
00724 static void Test_all_options() {
00725   const char *str = "HELLO\n" "cruel\n" "world";
00726   RE_Options options;
00727   options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
00728 
00729   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
00730   options.set_all_options(0);
00731   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
00732   options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
00733 
00734   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
00735   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
00736                   " ^ c r u e l $ ",
00737                   str,
00738                   RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
00739                   false);
00740 
00741   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
00742                   " ^ c r u e l $ ",
00743                   str,
00744                   RE_Options()
00745                        .set_multiline(true)
00746                        .set_extended(true),
00747                   false);
00748 
00749   options.set_all_options(0);
00750   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
00751 
00752 }
00753 
00754 static void TestOptions() {
00755   printf("Testing Options\n");
00756   Test_CASELESS();
00757   Test_MULTILINE();
00758   Test_DOTALL();
00759   Test_DOLLAR_ENDONLY();
00760   Test_EXTENDED();
00761   Test_NO_AUTO_CAPTURE();
00762   Test_UNGREEDY();
00763   Test_EXTRA();
00764   Test_all_options();
00765 }
00766 
00767 static void TestConstructors() {
00768   printf("Testing constructors\n");
00769 
00770   RE_Options options;
00771   options.set_dotall(true);
00772   const char *str = "HELLO\n" "cruel\n" "world";
00773 
00774   RE orig("HELLO.*world", options);
00775   CHECK(orig.FullMatch(str));
00776 
00777   RE copy1(orig);
00778   CHECK(copy1.FullMatch(str));
00779 
00780   RE copy2("not a match");
00781   CHECK(!copy2.FullMatch(str));
00782   copy2 = copy1;
00783   CHECK(copy2.FullMatch(str));
00784   copy2 = orig;
00785   CHECK(copy2.FullMatch(str));
00786 
00787   // Make sure when we assign to ourselves, nothing bad happens
00788   orig = orig;
00789   copy1 = copy1;
00790   copy2 = copy2;
00791   CHECK(orig.FullMatch(str));
00792   CHECK(copy1.FullMatch(str));
00793   CHECK(copy2.FullMatch(str));
00794 }
00795 
00796 int main(int argc, char** argv) {
00797   // Treat any flag as --help
00798   if (argc > 1 && argv[1][0] == '-') {
00799     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
00800            "       If 'timingX ###' is specified, run the given timing test\n"
00801            "       with the given number of iterations, rather than running\n"
00802            "       the default corectness test.\n", argv[0]);
00803     return 0;
00804   }
00805 
00806   if (argc > 1) {
00807     if ( argc == 2 || atoi(argv[2]) == 0) {
00808       printf("timing mode needs a num-iters argument\n");
00809       return 1;
00810     }
00811     if (!strcmp(argv[1], "timing1"))
00812       Timing1(atoi(argv[2]));
00813     else if (!strcmp(argv[1], "timing2"))
00814       Timing2(atoi(argv[2]));
00815     else if (!strcmp(argv[1], "timing3"))
00816       Timing3(atoi(argv[2]));
00817     else
00818       printf("Unknown argument '%s'\n", argv[1]);
00819     return 0;
00820   }
00821 
00822   printf("Testing FullMatch\n");
00823 
00824   int i;
00825   string s;
00826 
00827   /***** FullMatch with no args *****/
00828 
00829   CHECK(RE("h.*o").FullMatch("hello"));
00830   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
00831   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
00832   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
00833   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
00834   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
00835 
00836   /***** FullMatch with args *****/
00837 
00838   // Zero-arg
00839   CHECK(RE("\\d+").FullMatch("1001"));
00840 
00841   // Single-arg
00842   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
00843   CHECK_EQ(i, 1001);
00844   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
00845   CHECK_EQ(i, -123);
00846   CHECK(!RE("()\\d+").FullMatch("10", &i));
00847   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
00848                                 &i));
00849 
00850   // Digits surrounding integer-arg
00851   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
00852   CHECK_EQ(i, 23);
00853   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
00854   CHECK_EQ(i, 1);
00855   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
00856   CHECK_EQ(i, -1);
00857   CHECK(RE("(\\d)").PartialMatch("1234", &i));
00858   CHECK_EQ(i, 1);
00859   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
00860   CHECK_EQ(i, -1);
00861 
00862   // String-arg
00863   CHECK(RE("h(.*)o").FullMatch("hello", &s));
00864   CHECK_EQ(s, string("ell"));
00865 
00866   // StringPiece-arg
00867   StringPiece sp;
00868   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
00869   CHECK_EQ(sp.size(), 4);
00870   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
00871   CHECK_EQ(i, 1234);
00872 
00873   // Multi-arg
00874   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
00875   CHECK_EQ(s, string("ruby"));
00876   CHECK_EQ(i, 1234);
00877 
00878   // Ignore non-void* NULL arg
00879   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
00880   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
00881   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
00882   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
00883 #ifdef HAVE_LONG_LONG
00884   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
00885 #endif
00886   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
00887   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
00888 
00889   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
00890   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
00891   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
00892   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
00893   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
00894   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
00895 
00896   // Ignored arg
00897   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
00898   CHECK_EQ(s, string("ruby"));
00899   CHECK_EQ(i, 1234);
00900 
00901   // Type tests
00902   {
00903     char c;
00904     CHECK(RE("(H)ello").FullMatch("Hello", &c));
00905     CHECK_EQ(c, 'H');
00906   }
00907   {
00908     unsigned char c;
00909     CHECK(RE("(H)ello").FullMatch("Hello", &c));
00910     CHECK_EQ(c, static_cast<unsigned char>('H'));
00911   }
00912   {
00913     short v;
00914     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
00915     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
00916     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
00917     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
00918     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
00919     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
00920   }
00921   {
00922     unsigned short v;
00923     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
00924     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
00925     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
00926     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
00927   }
00928   {
00929     int v;
00930     static const int max_value = 0x7fffffff;
00931     static const int min_value = -max_value - 1;
00932     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
00933     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
00934     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
00935     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
00936     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
00937     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
00938   }
00939   {
00940     unsigned int v;
00941     static const unsigned int max_value = 0xfffffffful;
00942     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
00943     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
00944     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
00945   }
00946 #ifdef HAVE_LONG_LONG
00947 # if defined(__MINGW__) || defined(__MINGW32__)
00948 #   define LLD "%I64d"
00949 #   define LLU "%I64u"
00950 # else
00951 #   define LLD "%lld"
00952 #   define LLU "%llu"
00953 # endif
00954   {
00955     long long v;
00956     static const long long max_value = 0x7fffffffffffffffLL;
00957     static const long long min_value = -max_value - 1;
00958     char buf[32];  // definitely big enough for a long long
00959 
00960     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
00961     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
00962 
00963     sprintf(buf, LLD, max_value);
00964     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
00965 
00966     sprintf(buf, LLD, min_value);
00967     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
00968 
00969     sprintf(buf, LLD, max_value);
00970     assert(buf[strlen(buf)-1] != '9');
00971     buf[strlen(buf)-1]++;
00972     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
00973 
00974     sprintf(buf, LLD, min_value);
00975     assert(buf[strlen(buf)-1] != '9');
00976     buf[strlen(buf)-1]++;
00977     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
00978   }
00979 #endif
00980 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
00981   {
00982     unsigned long long v;
00983     long long v2;
00984     static const unsigned long long max_value = 0xffffffffffffffffULL;
00985     char buf[32];  // definitely big enough for a unsigned long long
00986 
00987     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
00988     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
00989 
00990     sprintf(buf, LLU, max_value);
00991     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
00992 
00993     assert(buf[strlen(buf)-1] != '9');
00994     buf[strlen(buf)-1]++;
00995     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
00996   }
00997 #endif
00998   {
00999     float v;
01000     CHECK(RE("(.*)").FullMatch("100", &v));
01001     CHECK(RE("(.*)").FullMatch("-100.", &v));
01002     CHECK(RE("(.*)").FullMatch("1e23", &v));
01003   }
01004   {
01005     double v;
01006     CHECK(RE("(.*)").FullMatch("100", &v));
01007     CHECK(RE("(.*)").FullMatch("-100.", &v));
01008     CHECK(RE("(.*)").FullMatch("1e23", &v));
01009   }
01010 
01011   // Check that matching is fully anchored
01012   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
01013   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
01014   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
01015   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
01016 
01017   // Braces
01018   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
01019   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
01020   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
01021 
01022   // Complicated RE
01023   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
01024   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
01025   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
01026   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
01027 
01028   // Check full-match handling (needs '$' tacked on internally)
01029   CHECK(RE("fo|foo").FullMatch("fo"));
01030   CHECK(RE("fo|foo").FullMatch("foo"));
01031   CHECK(RE("fo|foo$").FullMatch("fo"));
01032   CHECK(RE("fo|foo$").FullMatch("foo"));
01033   CHECK(RE("foo$").FullMatch("foo"));
01034   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
01035   CHECK(!RE("fo|bar").FullMatch("fox"));
01036 
01037   // Uncomment the following if we change the handling of '$' to
01038   // prevent it from matching a trailing newline
01039   if (false) {
01040     // Check that we don't get bitten by pcre's special handling of a
01041     // '\n' at the end of the string matching '$'
01042     CHECK(!RE("foo$").PartialMatch("foo\n"));
01043   }
01044 
01045   // Number of args
01046   int a[16];
01047   CHECK(RE("").FullMatch(""));
01048 
01049   memset(a, 0, sizeof(0));
01050   CHECK(RE("(\\d){1}").FullMatch("1",
01051                                  &a[0]));
01052   CHECK_EQ(a[0], 1);
01053 
01054   memset(a, 0, sizeof(0));
01055   CHECK(RE("(\\d)(\\d)").FullMatch("12",
01056                                    &a[0],  &a[1]));
01057   CHECK_EQ(a[0], 1);
01058   CHECK_EQ(a[1], 2);
01059 
01060   memset(a, 0, sizeof(0));
01061   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
01062                                         &a[0],  &a[1],  &a[2]));
01063   CHECK_EQ(a[0], 1);
01064   CHECK_EQ(a[1], 2);
01065   CHECK_EQ(a[2], 3);
01066 
01067   memset(a, 0, sizeof(0));
01068   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
01069                                              &a[0],  &a[1],  &a[2],  &a[3]));
01070   CHECK_EQ(a[0], 1);
01071   CHECK_EQ(a[1], 2);
01072   CHECK_EQ(a[2], 3);
01073   CHECK_EQ(a[3], 4);
01074 
01075   memset(a, 0, sizeof(0));
01076   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
01077                                                   &a[0],  &a[1],  &a[2],
01078                                                   &a[3],  &a[4]));
01079   CHECK_EQ(a[0], 1);
01080   CHECK_EQ(a[1], 2);
01081   CHECK_EQ(a[2], 3);
01082   CHECK_EQ(a[3], 4);
01083   CHECK_EQ(a[4], 5);
01084 
01085   memset(a, 0, sizeof(0));
01086   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
01087                                                        &a[0],  &a[1],  &a[2],
01088                                                        &a[3],  &a[4],  &a[5]));
01089   CHECK_EQ(a[0], 1);
01090   CHECK_EQ(a[1], 2);
01091   CHECK_EQ(a[2], 3);
01092   CHECK_EQ(a[3], 4);
01093   CHECK_EQ(a[4], 5);
01094   CHECK_EQ(a[5], 6);
01095 
01096   memset(a, 0, sizeof(0));
01097   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
01098                                                             &a[0],  &a[1],  &a[2],  &a[3],
01099                                                             &a[4],  &a[5],  &a[6]));
01100   CHECK_EQ(a[0], 1);
01101   CHECK_EQ(a[1], 2);
01102   CHECK_EQ(a[2], 3);
01103   CHECK_EQ(a[3], 4);
01104   CHECK_EQ(a[4], 5);
01105   CHECK_EQ(a[5], 6);
01106   CHECK_EQ(a[6], 7);
01107 
01108   memset(a, 0, sizeof(0));
01109   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
01110            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
01111                "1234567890123456",
01112                &a[0],  &a[1],  &a[2],  &a[3],
01113                &a[4],  &a[5],  &a[6],  &a[7],
01114                &a[8],  &a[9],  &a[10], &a[11],
01115                &a[12], &a[13], &a[14], &a[15]));
01116   CHECK_EQ(a[0], 1);
01117   CHECK_EQ(a[1], 2);
01118   CHECK_EQ(a[2], 3);
01119   CHECK_EQ(a[3], 4);
01120   CHECK_EQ(a[4], 5);
01121   CHECK_EQ(a[5], 6);
01122   CHECK_EQ(a[6], 7);
01123   CHECK_EQ(a[7], 8);
01124   CHECK_EQ(a[8], 9);
01125   CHECK_EQ(a[9], 0);
01126   CHECK_EQ(a[10], 1);
01127   CHECK_EQ(a[11], 2);
01128   CHECK_EQ(a[12], 3);
01129   CHECK_EQ(a[13], 4);
01130   CHECK_EQ(a[14], 5);
01131   CHECK_EQ(a[15], 6);
01132 
01133   /***** PartialMatch *****/
01134 
01135   printf("Testing PartialMatch\n");
01136 
01137   CHECK(RE("h.*o").PartialMatch("hello"));
01138   CHECK(RE("h.*o").PartialMatch("othello"));
01139   CHECK(RE("h.*o").PartialMatch("hello!"));
01140   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
01141 
01142   /***** other tests *****/
01143 
01144   RadixTests();
01145   TestReplace();
01146   TestExtract();
01147   TestConsume();
01148   TestFindAndConsume();
01149   TestQuoteMetaAll();
01150   TestMatchNumberPeculiarity();
01151 
01152   // Check the pattern() accessor
01153   {
01154     const string kPattern = "http://([^/]+)/.*";
01155     const RE re(kPattern);
01156     CHECK_EQ(kPattern, re.pattern());
01157   }
01158 
01159   // Check RE error field.
01160   {
01161     RE re("foo");
01162     CHECK(re.error().empty());  // Must have no error
01163   }
01164 
01165 #ifdef SUPPORT_UTF8
01166   // Check UTF-8 handling
01167   {
01168     printf("Testing UTF-8 handling\n");
01169 
01170     // Three Japanese characters (nihongo)
01171     const unsigned char utf8_string[] = {
01172          0xe6, 0x97, 0xa5, // 65e5
01173          0xe6, 0x9c, 0xac, // 627c
01174          0xe8, 0xaa, 0x9e, // 8a9e
01175          0
01176     };
01177     const unsigned char utf8_pattern[] = {
01178          '.',
01179          0xe6, 0x9c, 0xac, // 627c
01180          '.',
01181          0
01182     };
01183 
01184     // Both should match in either mode, bytes or UTF-8
01185     RE re_test1(".........");
01186     CHECK(re_test1.FullMatch(utf8_string));
01187     RE re_test2("...", pcrecpp::UTF8());
01188     CHECK(re_test2.FullMatch(utf8_string));
01189 
01190     // Check that '.' matches one byte or UTF-8 character
01191     // according to the mode.
01192     string ss;
01193     RE re_test3("(.)");
01194     CHECK(re_test3.PartialMatch(utf8_string, &ss));
01195     CHECK_EQ(ss, string("\xe6"));
01196     RE re_test4("(.)", pcrecpp::UTF8());
01197     CHECK(re_test4.PartialMatch(utf8_string, &ss));
01198     CHECK_EQ(ss, string("\xe6\x97\xa5"));
01199 
01200     // Check that string matches itself in either mode
01201     RE re_test5(utf8_string);
01202     CHECK(re_test5.FullMatch(utf8_string));
01203     RE re_test6(utf8_string, pcrecpp::UTF8());
01204     CHECK(re_test6.FullMatch(utf8_string));
01205 
01206     // Check that pattern matches string only in UTF8 mode
01207     RE re_test7(utf8_pattern);
01208     CHECK(!re_test7.FullMatch(utf8_string));
01209     RE re_test8(utf8_pattern, pcrecpp::UTF8());
01210     CHECK(re_test8.FullMatch(utf8_string));
01211   }
01212 
01213   // Check that ungreedy, UTF8 regular expressions don't match when they
01214   // oughtn't -- see bug 82246.
01215   {
01216     // This code always worked.
01217     const char* pattern = "\\w+X";
01218     const string target = "a aX";
01219     RE match_sentence(pattern);
01220     RE match_sentence_re(pattern, pcrecpp::UTF8());
01221 
01222     CHECK(!match_sentence.FullMatch(target));
01223     CHECK(!match_sentence_re.FullMatch(target));
01224   }
01225 
01226   {
01227     const char* pattern = "(?U)\\w+X";
01228     const string target = "a aX";
01229     RE match_sentence(pattern);
01230     RE match_sentence_re(pattern, pcrecpp::UTF8());
01231 
01232     CHECK(!match_sentence.FullMatch(target));
01233     CHECK(!match_sentence_re.FullMatch(target));
01234   }
01235 #endif  /* def SUPPORT_UTF8 */
01236 
01237   printf("Testing error reporting\n");
01238 
01239   { RE re("a\\1"); CHECK(!re.error().empty()); }
01240   {
01241     RE re("a[x");
01242     CHECK(!re.error().empty());
01243   }
01244   {
01245     RE re("a[z-a]");
01246     CHECK(!re.error().empty());
01247   }
01248   {
01249     RE re("a[[:foobar:]]");
01250     CHECK(!re.error().empty());
01251   }
01252   {
01253     RE re("a(b");
01254     CHECK(!re.error().empty());
01255   }
01256   {
01257     RE re("a\\");
01258     CHECK(!re.error().empty());
01259   }
01260 
01261   // Test that recursion is stopped
01262   TestRecursion();
01263 
01264   // Test Options
01265   if (getenv("VERBOSE_TEST") != NULL)
01266     VERBOSE_TEST  = true;
01267   TestOptions();
01268 
01269   // Test the constructors
01270   TestConstructors();
01271 
01272   // Done
01273   printf("OK\n");
01274 
01275   return 0;
01276 }

Generated on Tue Jul 5 14:11:58 2011 for ROOT_528-00b_version by  doxygen 1.5.1