00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "Riostream.h"
00027 #include "TPRegexp.h"
00028 #include "TObjArray.h"
00029 #include "TObjString.h"
00030 #include "TError.h"
00031
00032 #include <pcre.h>
00033
00034 #include <vector>
00035
00036 struct PCREPriv_t {
00037 pcre *fPCRE;
00038 pcre_extra *fPCREExtra;
00039
00040 PCREPriv_t() { fPCRE = 0; fPCREExtra = 0; }
00041 };
00042
00043
00044 ClassImp(TPRegexp)
00045
00046
00047 TPRegexp::TPRegexp()
00048 {
00049
00050
00051 fPriv = new PCREPriv_t;
00052 fPCREOpts = 0;
00053 }
00054
00055
00056 TPRegexp::TPRegexp(const TString &pat)
00057 {
00058
00059
00060 fPattern = pat;
00061 fPriv = new PCREPriv_t;
00062 fPCREOpts = 0;
00063 }
00064
00065
00066 TPRegexp::TPRegexp(const TPRegexp &p)
00067 {
00068
00069
00070 fPattern = p.fPattern;
00071 fPriv = new PCREPriv_t;
00072 fPCREOpts = p.fPCREOpts;
00073 }
00074
00075
00076 TPRegexp::~TPRegexp()
00077 {
00078
00079
00080 if (fPriv->fPCRE)
00081 pcre_free(fPriv->fPCRE);
00082 if (fPriv->fPCREExtra)
00083 pcre_free(fPriv->fPCREExtra);
00084 delete fPriv;
00085 }
00086
00087
00088 TPRegexp &TPRegexp::operator=(const TPRegexp &p)
00089 {
00090
00091
00092 if (this != &p) {
00093 fPattern = p.fPattern;
00094 if (fPriv->fPCRE)
00095 pcre_free(fPriv->fPCRE);
00096 fPriv->fPCRE = 0;
00097 if (fPriv->fPCREExtra)
00098 pcre_free(fPriv->fPCREExtra);
00099 fPriv->fPCREExtra = 0;
00100 fPCREOpts = p.fPCREOpts;
00101 }
00102 return *this;
00103 }
00104
00105
00106 UInt_t TPRegexp::ParseMods(const TString &modStr) const
00107 {
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135 UInt_t opts = 0;
00136
00137 if (modStr.Length() <= 0)
00138 return fPCREOpts;
00139
00140
00141 const char *m = modStr;
00142 while (*m) {
00143 switch (*m) {
00144 case 'g':
00145 opts |= kPCRE_GLOBAL;
00146 break;
00147 case 'i':
00148 opts |= PCRE_CASELESS;
00149 break;
00150 case 'm':
00151 opts |= PCRE_MULTILINE;
00152 break;
00153 case 'o':
00154 opts |= kPCRE_OPTIMIZE;
00155 break;
00156 case 's':
00157 opts |= PCRE_DOTALL;
00158 break;
00159 case 'x':
00160 opts |= PCRE_EXTENDED;
00161 break;
00162 case 'd':
00163 opts |= kPCRE_DEBUG_MSGS;
00164 break;
00165 default:
00166 Error("ParseMods", "illegal pattern modifier: %c", *m);
00167 opts = 0;
00168 }
00169 ++m;
00170 }
00171 return opts;
00172 }
00173
00174
00175 TString TPRegexp::GetModifiers() const
00176 {
00177
00178
00179
00180 TString ret;
00181
00182 if (fPCREOpts & kPCRE_GLOBAL) ret += 'g';
00183 if (fPCREOpts & PCRE_CASELESS) ret += 'i';
00184 if (fPCREOpts & PCRE_MULTILINE) ret += 'm';
00185 if (fPCREOpts & PCRE_DOTALL) ret += 's';
00186 if (fPCREOpts & PCRE_EXTENDED) ret += 'x';
00187 if (fPCREOpts & kPCRE_OPTIMIZE) ret += 'o';
00188 if (fPCREOpts & kPCRE_DEBUG_MSGS) ret += 'd';
00189
00190 return ret;
00191 }
00192
00193
00194 void TPRegexp::Compile()
00195 {
00196
00197
00198 if (fPriv->fPCRE)
00199 pcre_free(fPriv->fPCRE);
00200
00201 if (fPCREOpts & kPCRE_DEBUG_MSGS)
00202 Info("Compile", "PREGEX compiling %s", fPattern.Data());
00203
00204 const char *errstr;
00205 Int_t patIndex;
00206 fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
00207 &errstr, &patIndex, 0);
00208
00209 if (!fPriv->fPCRE) {
00210 Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
00211 fPattern.Data(), patIndex, errstr);
00212 }
00213
00214 if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE))
00215 Optimize();
00216 }
00217
00218
00219 void TPRegexp::Optimize()
00220 {
00221
00222
00223 if (fPriv->fPCREExtra)
00224 pcre_free(fPriv->fPCREExtra);
00225
00226 if (fPCREOpts & kPCRE_DEBUG_MSGS)
00227 Info("Optimize", "PREGEX studying %s", fPattern.Data());
00228
00229 const char *errstr;
00230
00231 fPriv->fPCREExtra = pcre_study(fPriv->fPCRE, 0, &errstr);
00232
00233 if (!fPriv->fPCREExtra && errstr) {
00234 Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
00235 fPattern.Data(), errstr);
00236 }
00237 }
00238
00239
00240 Int_t TPRegexp::ReplaceSubs(const TString &s, TString &final,
00241 const TString &replacePattern,
00242 Int_t *offVec, Int_t nrMatch) const
00243 {
00244
00245
00246 Int_t nrSubs = 0;
00247 const char *p = replacePattern;
00248
00249 Int_t state = 0;
00250 Int_t subnum = 0;
00251 while (state != -1) {
00252 switch (state) {
00253 case 0:
00254 if (!*p) {
00255 state = -1;
00256 break;
00257 }
00258 if (*p == '$') {
00259 state = 1;
00260 subnum = 0;
00261 if (p[1] == '&') {
00262 p++;
00263 if (isdigit(p[1]))
00264 p++;
00265 } else if (!isdigit(p[1])) {
00266 Error("ReplaceSubs", "badly formed replacement pattern: %s",
00267 replacePattern.Data());
00268 }
00269 } else
00270 final += *p;
00271 break;
00272 case 1:
00273 if (isdigit(*p)) {
00274 subnum *= 10;
00275 subnum += (*p)-'0';
00276 } else {
00277 if (fPCREOpts & kPCRE_DEBUG_MSGS)
00278 Info("ReplaceSubs", "PREGEX appending substr #%d", subnum);
00279 if (subnum < 0 || subnum > nrMatch-1) {
00280 Error("ReplaceSubs","bad string number: %d",subnum);
00281 } else {
00282 const TString subStr = s(offVec[2*subnum],offVec[2*subnum+1]-offVec[2*subnum]);
00283 final += subStr;
00284 nrSubs++;
00285 }
00286 state = 0;
00287 continue;
00288 }
00289 }
00290 p++;
00291 }
00292 return nrSubs;
00293 }
00294
00295
00296 Int_t TPRegexp::MatchInternal(const TString &s, Int_t start,
00297 Int_t nMaxMatch, TArrayI *pos)
00298 {
00299
00300
00301 Int_t *offVec = new Int_t[3*nMaxMatch];
00302
00303 Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
00304 s.Length(), start, 0,
00305 offVec, 3*nMaxMatch);
00306
00307 if (nrMatch == PCRE_ERROR_NOMATCH)
00308 nrMatch = 0;
00309 else if (nrMatch <= 0) {
00310 Error("Match","pcre_exec error = %d", nrMatch);
00311 delete [] offVec;
00312 return 0;
00313 }
00314
00315 if (pos)
00316 pos->Set(2*nrMatch, offVec);
00317 delete [] offVec;
00318
00319 return nrMatch;
00320 }
00321
00322
00323 Int_t TPRegexp::Match(const TString &s, const TString &mods, Int_t start,
00324 Int_t nMaxMatch, TArrayI *pos)
00325 {
00326
00327
00328
00329
00330
00331
00332
00333 UInt_t opts = ParseMods(mods);
00334
00335 if (!fPriv->fPCRE || opts != fPCREOpts) {
00336 fPCREOpts = opts;
00337 Compile();
00338 }
00339
00340 return MatchInternal(s, start, nMaxMatch, pos);
00341 }
00342
00343
00344
00345 TObjArray *TPRegexp::MatchS(const TString &s, const TString &mods,
00346 Int_t start, Int_t nMaxMatch)
00347 {
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363 TArrayI pos;
00364 Int_t nrMatch = Match(s, mods, start, nMaxMatch, &pos);
00365
00366 TObjArray *subStrL = new TObjArray();
00367 subStrL->SetOwner();
00368
00369 for (Int_t i = 0; i < nrMatch; i++) {
00370 Int_t startp = pos[2*i];
00371 Int_t stopp = pos[2*i+1];
00372 if (startp >= 0 && stopp >= 0) {
00373 const TString subStr = s(pos[2*i], pos[2*i+1]-pos[2*i]);
00374 subStrL->Add(new TObjString(subStr));
00375 } else
00376 subStrL->Add(new TObjString());
00377 }
00378
00379 return subStrL;
00380 }
00381
00382
00383 Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern,
00384 Int_t start, Int_t nMaxMatch,
00385 Bool_t doDollarSubst)
00386 {
00387
00388
00389
00390 Int_t *offVec = new Int_t[3*nMaxMatch];
00391
00392 TString final;
00393 Int_t nrSubs = 0;
00394 Int_t offset = start;
00395 Int_t last = 0;
00396
00397 while (kTRUE) {
00398
00399
00400
00401 Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
00402 s.Length(), offset, 0,
00403 offVec, 3*nMaxMatch);
00404
00405 if (nrMatch == PCRE_ERROR_NOMATCH) {
00406 nrMatch = 0;
00407 break;
00408 } else if (nrMatch <= 0) {
00409 Error("Substitute", "pcre_exec error = %d", nrMatch);
00410 break;
00411 }
00412
00413
00414 if (last <= offVec[0]) {
00415 final += s(last,offVec[0]-last);
00416 last = offVec[1];
00417 }
00418
00419
00420 if (doDollarSubst) {
00421 ReplaceSubs(s, final, replacePattern, offVec, nrMatch);
00422 } else {
00423 final += replacePattern;
00424 }
00425 ++nrSubs;
00426
00427
00428 if (!(fPCREOpts & kPCRE_GLOBAL))
00429 break;
00430
00431 if (offVec[0] != offVec[1])
00432 offset = offVec[1];
00433 else {
00434
00435 if (offVec[1] == s.Length())
00436 break;
00437 offset = offVec[1]+1;
00438 }
00439 }
00440
00441 delete [] offVec;
00442
00443 final += s(last,s.Length()-last);
00444 s = final;
00445
00446 return nrSubs;
00447 }
00448
00449
00450 Int_t TPRegexp::Substitute(TString &s, const TString &replacePattern,
00451 const TString &mods, Int_t start, Int_t nMaxMatch)
00452 {
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464 UInt_t opts = ParseMods(mods);
00465
00466 if (!fPriv->fPCRE || opts != fPCREOpts) {
00467 fPCREOpts = opts;
00468 Compile();
00469 }
00470
00471 return SubstituteInternal(s, replacePattern, start, nMaxMatch, kTRUE);
00472 }
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483 Ssiz_t TString::Index(TPRegexp& r, Ssiz_t start) const
00484 {
00485
00486
00487
00488 TArrayI pos;
00489 Int_t nrMatch = r.Match(*this,"",start,10,&pos);
00490 if (nrMatch > 0)
00491 return pos[0];
00492 else
00493 return -1;
00494 }
00495
00496
00497 Ssiz_t TString::Index(TPRegexp& r, Ssiz_t* extent, Ssiz_t start) const
00498 {
00499
00500
00501
00502
00503 TArrayI pos;
00504 const Int_t nrMatch = r.Match(*this,"",start,10,&pos);
00505 if (nrMatch > 0) {
00506 *extent = pos[1]-pos[0];
00507 return pos[0];
00508 } else {
00509 *extent = 0;
00510 return -1;
00511 }
00512 }
00513
00514
00515 TSubString TString::operator()(TPRegexp& r, Ssiz_t start) const
00516 {
00517
00518
00519 Ssiz_t len;
00520 Ssiz_t begin = Index(r, &len, start);
00521 return TSubString(*this, begin, len);
00522 }
00523
00524
00525 TSubString TString::operator()(TPRegexp& r) const
00526 {
00527
00528
00529 return (*this)(r, 0);
00530 }
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548 ClassImp(TPMERegexp);
00549
00550
00551 TPMERegexp::TPMERegexp() :
00552 TPRegexp(),
00553 fNMaxMatches(10),
00554 fNMatches(0),
00555 fAddressOfLastString(0),
00556 fLastGlobalPosition(0)
00557 {
00558
00559
00560 Compile();
00561 }
00562
00563
00564 TPMERegexp::TPMERegexp(const TString& s, const TString& opts, Int_t nMatchMax) :
00565 TPRegexp(s),
00566 fNMaxMatches(nMatchMax),
00567 fNMatches(0),
00568 fAddressOfLastString(0),
00569 fLastGlobalPosition(0)
00570 {
00571
00572
00573
00574
00575 fPCREOpts = ParseMods(opts);
00576 Compile();
00577 }
00578
00579
00580 TPMERegexp::TPMERegexp(const TString& s, UInt_t opts, Int_t nMatchMax) :
00581 TPRegexp(s),
00582 fNMaxMatches(nMatchMax),
00583 fNMatches(0),
00584 fAddressOfLastString(0),
00585 fLastGlobalPosition(0)
00586 {
00587
00588
00589
00590
00591 fPCREOpts = opts;
00592 Compile();
00593 }
00594
00595
00596 TPMERegexp::TPMERegexp(const TPMERegexp& r) :
00597 TPRegexp(r),
00598 fNMaxMatches(r.fNMaxMatches),
00599 fNMatches(0),
00600 fAddressOfLastString(0),
00601 fLastGlobalPosition(0)
00602 {
00603
00604
00605
00606
00607 Compile();
00608 }
00609
00610
00611 void TPMERegexp::Reset(const TString& s, const TString& opts, Int_t nMatchMax)
00612 {
00613
00614
00615
00616 Reset(s, ParseMods(opts), nMatchMax);
00617 }
00618
00619
00620 void TPMERegexp::Reset(const TString& s, UInt_t opts, Int_t nMatchMax)
00621 {
00622
00623
00624
00625 fPattern = s;
00626 fPCREOpts = opts;
00627 Compile();
00628
00629 if (nMatchMax != -1)
00630 fNMatches = nMatchMax;
00631 fNMatches = 0;
00632 fLastGlobalPosition = 0;
00633 }
00634
00635
00636 void TPMERegexp::AssignGlobalState(const TPMERegexp& re)
00637 {
00638
00639
00640
00641
00642
00643
00644
00645
00646
00647 fLastStringMatched = re.fLastStringMatched;
00648 fLastGlobalPosition = re.fLastGlobalPosition;
00649 }
00650
00651
00652 void TPMERegexp::ResetGlobalState()
00653 {
00654
00655
00656
00657
00658
00659 fLastGlobalPosition = 0;
00660 }
00661
00662
00663 Int_t TPMERegexp::Match(const TString& s, UInt_t start)
00664 {
00665
00666
00667
00668
00669
00670
00671
00672
00673 if (fAddressOfLastString != (void*) &s) {
00674 fLastGlobalPosition = 0;
00675 }
00676
00677 if (fPCREOpts & kPCRE_GLOBAL) {
00678 start += fLastGlobalPosition;
00679 }
00680
00681
00682 fNMatches = MatchInternal(s, start, fNMaxMatches, &fMarkers);
00683
00684
00685
00686 fLastStringMatched = s;
00687 fAddressOfLastString = (void*) &s;
00688
00689 if (fPCREOpts & kPCRE_GLOBAL) {
00690 if (fNMatches == PCRE_ERROR_NOMATCH) {
00691
00692 fLastGlobalPosition = 0;
00693 } else if (fNMatches > 0) {
00694
00695 fLastGlobalPosition = fMarkers[1];
00696 } else {
00697
00698 fLastGlobalPosition = 0;
00699 }
00700 }
00701
00702 return fNMatches;
00703 }
00704
00705
00706 Int_t TPMERegexp::Split(const TString& s, Int_t maxfields)
00707 {
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730 typedef std::pair<int, int> Marker_t;
00731 typedef std::vector<Marker_t> MarkerVec_t;
00732
00733
00734 MarkerVec_t oMarks;
00735
00736
00737
00738
00739
00740
00741 MarkerVec_t oCurrentTrailingEmpties;
00742
00743 Int_t nOffset = 0;
00744 Int_t nMatchesFound = 0;
00745
00746
00747
00748
00749 Int_t matchRes;
00750 while ((matchRes = Match(s, nOffset)) &&
00751 ((maxfields < 1) || nMatchesFound < maxfields)) {
00752 ++nMatchesFound;
00753
00754 if (fMarkers[1] - fMarkers[0] == 0) {
00755 oMarks.push_back(Marker_t(nOffset, nOffset + 1));
00756 ++nOffset;
00757 if (nOffset >= s.Length())
00758 break;
00759 else
00760 continue;
00761 }
00762
00763
00764 if (nOffset != fMarkers[0]) {
00765 if (!oCurrentTrailingEmpties.empty()) {
00766 oMarks.insert(oMarks.end(),
00767 oCurrentTrailingEmpties.begin(),
00768 oCurrentTrailingEmpties.end());
00769 oCurrentTrailingEmpties.clear();
00770 }
00771 oMarks.push_back(Marker_t(nOffset, fMarkers[0]));
00772 } else {
00773
00774 if (maxfields == 0) {
00775
00776 oCurrentTrailingEmpties.push_back(Marker_t(nOffset, nOffset));
00777 } else {
00778 oMarks.push_back(Marker_t(nOffset, nOffset));
00779 }
00780 }
00781
00782 nOffset = fMarkers[1];
00783
00784 if (matchRes > 1) {
00785 for (Int_t i = 1; i < matchRes; ++i)
00786 oMarks.push_back(Marker_t(fMarkers[2*i], fMarkers[2*i + 1]));
00787 }
00788 }
00789
00790
00791
00792 if (nMatchesFound == 0) {
00793 oMarks.push_back(Marker_t(0, s.Length()));
00794 }
00795
00796
00797 else if (maxfields > 0 && nMatchesFound >= maxfields) {
00798 oMarks[oMarks.size() - 1].second = s.Length();
00799 }
00800
00801 else {
00802 Bool_t last_empty = (nOffset == s.Length());
00803 if (!last_empty || maxfields < 0) {
00804 if (!oCurrentTrailingEmpties.empty()) {
00805 oMarks.insert(oMarks.end(),
00806 oCurrentTrailingEmpties.begin(),
00807 oCurrentTrailingEmpties.end());
00808 }
00809 oMarks.push_back(Marker_t(nOffset, s.Length()));
00810 }
00811 }
00812
00813 fNMatches = oMarks.size();
00814 fMarkers.Set(2*fNMatches);
00815 for (Int_t i = 0; i < fNMatches; ++i) {
00816 fMarkers[2*i] = oMarks[i].first;
00817 fMarkers[2*i + 1] = oMarks[i].second;
00818 }
00819
00820
00821 return fNMatches;
00822 }
00823
00824
00825 Int_t TPMERegexp::Substitute(TString& s, const TString& r, Bool_t doDollarSubst)
00826 {
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839 Int_t cnt = SubstituteInternal(s, r, 0, fNMaxMatches, doDollarSubst);
00840
00841 TString ret;
00842 Int_t state = 0;
00843 Ssiz_t pos = 0, len = s.Length();
00844 const Char_t *data = s.Data();
00845 while (pos < len) {
00846 Char_t c = data[pos];
00847 if (c == '\\') {
00848 c = data[pos+1];
00849 switch (c) {
00850 case 0 : ret += '\\'; break;
00851 case 'l': state = 1; break;
00852 case 'u': state = 2; break;
00853 case 'L': state = 3; break;
00854 case 'U': state = 4; break;
00855 case 'E': state = 0; break;
00856 default : ret += '\\'; ret += c; break;
00857 }
00858 pos += 2;
00859 } else {
00860 switch (state) {
00861 case 0: ret += c; break;
00862 case 1: ret += (Char_t) tolower(c); state = 0; break;
00863 case 2: ret += (Char_t) toupper(c); state = 0; break;
00864 case 3: ret += (Char_t) tolower(c); break;
00865 case 4: ret += (Char_t) toupper(c); break;
00866 default: Error("TPMERegexp::Substitute", "invalid state.");
00867 }
00868 ++pos;
00869 }
00870 }
00871
00872 s = ret;
00873
00874 return cnt;
00875 }
00876
00877
00878 TString TPMERegexp::operator[](int index)
00879 {
00880
00881
00882
00883 if (index >= fNMatches)
00884 return "";
00885
00886 Int_t begin = fMarkers[2*index];
00887 Int_t end = fMarkers[2*index + 1];
00888 return fLastStringMatched(begin, end-begin);
00889 }
00890
00891
00892 void TPMERegexp::Print(Option_t* option)
00893 {
00894
00895
00896
00897
00898 TString opt = option;
00899 opt.ToLower();
00900
00901 Printf("Regexp='%s', Opts='%s'", fPattern.Data(), GetModifiers().Data());
00902 if (opt.Contains("all")) {
00903 Printf(" last string='%s'", fLastStringMatched.Data());
00904 Printf(" number of matches = %d", fNMatches);
00905 for (Int_t i=0; i<fNMatches; ++i)
00906 Printf(" %d - %s", i, operator[](i).Data());
00907 }
00908 }
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930 ClassImp(TStringToken)
00931
00932
00933 TStringToken::TStringToken(const TString& fullStr, const TString& splitRe, Bool_t retVoid) :
00934 fFullStr (fullStr),
00935 fSplitRe (splitRe),
00936 fReturnVoid (retVoid),
00937 fPos (0)
00938 {
00939
00940 }
00941
00942
00943 Bool_t TStringToken::NextToken()
00944 {
00945
00946
00947
00948 TArrayI x;
00949 while (fPos < fFullStr.Length()) {
00950 if (fSplitRe.Match(fFullStr, "", fPos, 2, &x)) {
00951 TString::operator=(fFullStr(fPos, x[0] - fPos));
00952 fPos = x[1];
00953 } else {
00954 TString::operator=(fFullStr(fPos, fFullStr.Length() - fPos));
00955 fPos = fFullStr.Length() + 1;
00956 }
00957 if (Length() || fReturnVoid)
00958 return kTRUE;
00959 }
00960
00961
00962
00963 if (fPos == fFullStr.Length() && fReturnVoid) {
00964 TString::operator=("");
00965 fPos = fFullStr.Length() + 1;
00966 return kTRUE;
00967 }
00968
00969 return kFALSE;
00970 }