TUri.cxx

Go to the documentation of this file.
00001 // @(#)root/base:$Id: TUri.cxx 35631 2010-09-23 09:01:41Z rdm $
00002 // Author: Gerhard E. Bruckner 15/07/07
00003 
00004 /*************************************************************************
00005  * Copyright (C) 1995-2007, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 //////////////////////////////////////////////////////////////////////////
00013 //                                                                      //
00014 // TUri                                                                 //
00015 //                                                                      //
00016 // This class represents a RFC 3986 compatible URI.                     //
00017 // See http://rfc.net/rfc3986.html.                                     //
00018 // It provides member functions to set and return the different         //
00019 // the different parts of an URI. The functionality is that of          //
00020 // a validating parser.                                                 //
00021 //                                                                      //
00022 //////////////////////////////////////////////////////////////////////////
00023 
00024 #include <ctype.h>    // for tolower()
00025 #include "TUri.h"
00026 #include "TObjArray.h"
00027 #include "TObjString.h"
00028 #include "TPRegexp.h"
00029 
00030 //RFC3986:
00031 // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
00032 const char* const kURI_pchar        = "(?:[[:alpha:][:digit:]-._~!$&'()*+,;=:@]|%[0-9A-Fa-f][0-9A-Fa-f])";
00033 
00034 //unreserved characters, see chapter 2.3
00035 const char* const kURI_unreserved   = "[[:alpha:][:digit:]-._~]";
00036 
00037 // reserved characters, see chapter
00038 // reserved      = gen-delims / sub-delims
00039 const char* const kURI_reserved     = "[:/?#[]@!$&'()*+,;=]";
00040 
00041 // gen-delims, see chapter 2.2
00042 // delimiters of the generic URI components
00043 const char* const kURI_gendelims    = "[:/?#[]@]";
00044 
00045 // sub-delims, see chapter 2.2
00046 const char* const kURI_subdelims    = "[!$&'()*+,;=]";
00047 
00048 
00049 ClassImp(TUri)
00050 
00051 //______________________________________________________________________________
00052 TUri::TUri(const TString &uri)
00053 {
00054    // Constructor that calls SetUri with a complete URI.
00055 
00056    SetUri(uri);
00057 }
00058 
00059 //______________________________________________________________________________
00060 TUri::TUri(const char *uri)
00061 {
00062    // Constructor that calls SetUri with a complete URI.
00063 
00064    SetUri(uri);
00065 }
00066 
00067 //______________________________________________________________________________
00068 TUri::TUri(const TUri &uri) : TObject(uri)
00069 {
00070    // TUri copy ctor.
00071 
00072    fScheme = uri.fScheme;
00073    fUserinfo = uri.fUserinfo;
00074    fHost = uri.fHost;
00075    fPort = uri.fPort;
00076    fPath = uri.fPath;
00077    fQuery = uri.fQuery;
00078    fFragment = uri.fFragment;
00079    fHasScheme = uri.fHasScheme;
00080    fHasUserinfo = uri.fHasUserinfo;
00081    fHasHost = uri.fHasHost;
00082    fHasPort = uri.fHasPort;
00083    fHasPath = uri.fHasPath;
00084    fHasQuery = uri.fHasQuery;
00085    fHasFragment = uri.fHasFragment;
00086 }
00087 
00088 //______________________________________________________________________________
00089 TUri &TUri::operator= (const TUri & rhs)
00090 {
00091    // TUri assignment operator.
00092 
00093    if (this != &rhs) {
00094       TObject::operator= (rhs);
00095       fScheme = rhs.fScheme;
00096       fUserinfo = rhs.fUserinfo;
00097       fHost = rhs.fHost;
00098       fPort = rhs.fPort;
00099       fPath = rhs.fPath;
00100       fQuery = rhs.fQuery;
00101       fFragment = rhs.fFragment;
00102       fHasScheme = rhs.fHasScheme;
00103       fHasUserinfo = rhs.fHasUserinfo;
00104       fHasHost = rhs.fHasHost;
00105       fHasPort = rhs.fHasPort;
00106       fHasPath = rhs.fHasPath;
00107       fHasQuery = rhs.fHasQuery;
00108       fHasFragment = rhs.fHasFragment;
00109    }
00110    return *this;
00111 }
00112 
00113 //______________________________________________________________________________
00114 Bool_t operator== (const TUri &u1, const TUri &u2)
00115 {
00116    // Implementation of a TUri Equivalence operator
00117    // that uses syntax-based normalisation
00118    // see chapter 6.2.2.
00119 
00120    // make temporary copies of the operands
00121    TUri u11 = u1;
00122    TUri u22 = u2;
00123    // normalise them
00124    u11.Normalise();
00125    u22.Normalise();
00126    // compare them as TStrings
00127    return u11.GetUri() == u22.GetUri();
00128 }
00129 
00130 //______________________________________________________________________________
00131 const TString TUri::GetUri() const
00132 {
00133    // Returns the whole URI -
00134    // an implementation of chapter 5.3 component recomposition.
00135    // The result URI is composed out of the five basic parts.
00136    //
00137    // URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
00138    // hier-part   = "//" authority path-abempty
00139    //             / path-absolute
00140    //             / path-rootless
00141    //             / path-empty
00142 
00143    TString result = "";
00144    if (fHasScheme)
00145       result = fScheme + ":";
00146    result += GetHierPart();
00147    if (fHasQuery)
00148       result += TString("?") + fQuery;
00149    if (fHasFragment)
00150       result += TString("#") + fFragment;
00151    return result;
00152 }
00153 
00154 //______________________________________________________________________________
00155 const TString TUri::RemoveDotSegments(const TString &inp)
00156 {
00157    // This functions implements the "remove_dot_segments" routine
00158    // of chapter 5.2.4 "for interpreting and removing the
00159    // special '.' and '..' complete path segments from a
00160    // referenced path".
00161 
00162    TString source = inp;
00163    TString sink = TString("");  // sink buffer
00164 
00165    // Step 2 "While the source buffer is not empty, loop as follows:"
00166    while (source.Length() > 0) {
00167       // Rule 2.A
00168       if (TPRegexp("^\\.\\.?/(.*)$").Substitute(source, "/$1") > 0)
00169          continue;
00170 
00171       // Rule 2.B
00172       if (TPRegexp("^/\\./(.*)$|^/\\.($)").Substitute(source, "/$1") > 0)
00173          continue;
00174 
00175       // Rule 2.C
00176       if (TPRegexp("^/\\.\\./(.*)$|^/\\.\\.($)").Substitute(source, "/$1") > 0) {
00177          Ssiz_t last = sink.Last('/');
00178          if (last == -1)
00179             last = 0;
00180          sink.Remove(last, sink.Length() - last);
00181          continue;
00182       }
00183 
00184       // Rule 2.D
00185       if (source.CompareTo(".") == 0 || source.CompareTo("..") == 0) {
00186          source.Remove(0, source.Length() - 11);
00187          continue;
00188       }
00189 
00190       // Rule 2.E
00191       TPRegexp regexp = TPRegexp("^(/?[^/]*)(?:/|$)");
00192       TObjArray *tokens = regexp.MatchS(source);
00193       TString segment = ((TObjString*) tokens->At(1))->GetString();
00194       sink += segment;
00195       source.Remove(0, segment.Length());
00196       delete tokens;
00197    }
00198 
00199    // Step 3: return sink buffer
00200    return sink;
00201 }
00202 
00203 //______________________________________________________________________________
00204 Bool_t TUri::IsAbsolute() const
00205 {
00206    // Returns kTRUE if instance qualifies as absolute-URI
00207    // absolute-URI  = scheme ":" hier-part [ "?" query ]
00208    // cf. Appendix A.
00209 
00210    return (HasScheme() && HasHierPart() && !HasFragment());
00211 }
00212 
00213 //______________________________________________________________________________
00214 Bool_t TUri::IsRelative() const
00215 {
00216    // Returns kTRUE if instance qualifies as relative-ref
00217    // relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
00218    // cf. Appendix A.
00219 
00220    return (!HasScheme() && HasRelativePart());
00221 }
00222 
00223 //______________________________________________________________________________
00224 Bool_t TUri::IsUri() const
00225 {
00226    // Returns kTRUE if instance qualifies as URI
00227    // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
00228    // cf. Appendix A.
00229 
00230    return (HasScheme() && HasHierPart());
00231 }
00232 
00233 //______________________________________________________________________________
00234 Bool_t TUri::IsReference() const
00235 {
00236    // Returns kTRUE if instance qualifies as URI-reference
00237    // URI-reference = URI / relative-ref
00238    // cf. Appendix A.
00239 
00240    return (IsUri() || IsRelative());
00241 }
00242 
00243 //______________________________________________________________________________
00244 Bool_t TUri::SetScheme(const TString &scheme)
00245 {
00246    // Set scheme component of URI:
00247    // scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
00248 
00249    if (!scheme) {
00250       fHasScheme = kFALSE;
00251       return kTRUE;
00252    }
00253    if (IsScheme(scheme)) {
00254       fScheme = scheme;
00255       fHasScheme = kTRUE;
00256       return kTRUE;
00257    } else {
00258       Error("SetScheme", "<scheme> component \"%s\" of URI is not compliant with RFC 3986.", scheme.Data());
00259       return kFALSE;
00260    }
00261 }
00262 
00263 //______________________________________________________________________________
00264 Bool_t TUri::IsScheme(const TString &string)
00265 {
00266    // Returns kTRUE if string qualifies as URI scheme:
00267    // scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
00268 
00269    return TPRegexp(
00270              "^[[:alpha:]][[:alpha:][:digit:]+-.]*$"
00271           ).Match(string);
00272 }
00273 
00274 //______________________________________________________________________________
00275 const TString TUri::GetAuthority() const
00276 {
00277    // Returns the authority part of the instance:
00278    // authority   = [ userinfo "@" ] host [ ":" port ]
00279 
00280    TString authority = fHasUserinfo ? fUserinfo + "@" + fHost : fHost;
00281    if (fHasPort && !fPort.IsNull())
00282       // add port only if not empty
00283       authority += TString(":") + TString(fPort);
00284    return (authority);
00285 }
00286 
00287 //______________________________________________________________________________
00288 Bool_t TUri::SetQuery(const TString &query)
00289 {
00290    // Set query component of URI:
00291    // query       = *( pchar / "/" / "?" )
00292 
00293    if (!query) {
00294       fHasQuery = kFALSE;
00295       return kTRUE;
00296    }
00297    if (IsQuery(query)) {
00298       fQuery = query;
00299       fHasQuery = kTRUE;
00300       return kTRUE;
00301    } else {
00302       Error("SetQuery", "<query> component \"%s\" of URI is not compliant with RFC 3986.", query.Data());
00303       return kFALSE;
00304    }
00305 }
00306 
00307 //______________________________________________________________________________
00308 Bool_t TUri::IsQuery(const TString &string)
00309 {
00310    // Returns kTRUE if string qualifies as URI query:
00311    // query       = *( pchar / "/" / "?" )
00312 
00313    return TPRegexp(
00314              TString("^([/?]|") + kURI_pchar + ")*$"
00315           ).Match(string);
00316 }
00317 
00318 //______________________________________________________________________________
00319 Bool_t TUri::SetAuthority(const TString &authority)
00320 {
00321    // Set authority part of URI:
00322    // authority   = [ userinfo "@" ] host [ ":" port ]
00323    //
00324    // Split into components {userinfo@, host, :port},
00325    // remember that according to the RFC, it is necessary to
00326    // distinguish between missing component (no delimiter)
00327    // and empty component (delimiter present).
00328 
00329    if (authority.IsNull()) {
00330       fHasUserinfo = kFALSE;
00331       fHasHost = kFALSE;
00332       fHasPort = kFALSE;
00333       return kTRUE;
00334    }
00335    TPRegexp regexp = TPRegexp("^(?:(.*@))?([^:]*)((?::.*)?)$");
00336    TObjArray *tokens = regexp.MatchS(authority);
00337 
00338    if (tokens->GetEntries() != 4) {
00339       Error("SetAuthority", "<authority> component \"%s\" of URI is not compliant with RFC 3986.", authority.Data());
00340       return kFALSE;
00341    }
00342 
00343    Bool_t valid = kTRUE;
00344 
00345    // handle userinfo
00346    TString userinfo = ((TObjString*) tokens->At(1))->GetString();
00347    if (userinfo.EndsWith("@")) {
00348       userinfo.Remove(TString::kTrailing, '@');
00349       valid &= SetUserInfo(userinfo);
00350    }
00351 
00352    // handle host
00353    TString host = ((TObjString*) tokens->At(2))->GetString();
00354    valid &= SetHost(host);
00355 
00356    // handle port
00357    TString port = ((TObjString*) tokens->At(3))->GetString();
00358    if (port.BeginsWith(":")) {
00359       port.Remove(TString::kLeading, ':');
00360       valid &= SetPort(port);
00361    }
00362 
00363    return valid;
00364 }
00365 
00366 //______________________________________________________________________________
00367 Bool_t TUri::IsAuthority(const TString &string)
00368 {
00369    // Returns kTRUE if string qualifies as valid URI authority:
00370    // authority   = [ userinfo "@" ] host [ ":" port ]
00371 
00372    // split into parts {userinfo, host, port}
00373    TPRegexp regexp = TPRegexp("^(?:(.*)@)?([^:]*)(?::(.*))?$");
00374    TObjArray *tokens = regexp.MatchS(string);
00375    TString userinfo = ((TObjString*) tokens->At(1))->GetString();
00376    TString host = ((TObjString*) tokens->At(2))->GetString();
00377    TString port;
00378    // port is optional
00379    if (tokens->GetEntries() == 4)
00380       port = ((TObjString*) tokens->At(3))->GetString();
00381    else
00382       port = "";
00383    return (IsHost(host) && IsUserInfo(userinfo) && IsPort(port));
00384 }
00385 
00386 //______________________________________________________________________________
00387 Bool_t TUri::SetUserInfo(const TString &userinfo)
00388 {
00389    // Set userinfo component of URI:
00390    // userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
00391 
00392    if (userinfo.IsNull()) {
00393       fHasUserinfo = kFALSE;
00394       return kTRUE;
00395    }
00396    if (IsUserInfo(userinfo)) {
00397       fUserinfo = userinfo;
00398       fHasUserinfo = kTRUE;
00399       return kTRUE;
00400    } else {
00401       Error("SetUserInfo", "<userinfo> component \"%s\" of URI is not compliant with RFC 3986.", userinfo.Data());
00402       return kFALSE;
00403    }
00404 }
00405 
00406 //______________________________________________________________________________
00407 Bool_t TUri::IsUserInfo(const TString &string)
00408 {
00409    // Return kTRUE is string qualifies as valid URI userinfo:
00410    // userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
00411    // this equals to pchar without the '@' character
00412 
00413    return (TPRegexp(
00414               "^" + TString(kURI_pchar) + "*$"
00415            ).Match(string) > 0 && !TString(string).Contains("@"));
00416 }
00417 
00418 //______________________________________________________________________________
00419 Bool_t TUri::SetHost(const TString &host)
00420 {
00421    // Set host component of URI:
00422    // RFC 3986:    host = IP-literal / IPv4address / reg-name
00423    // implemented: host =  IPv4address / reg-name
00424 
00425    if (IsHost(host)) {
00426       fHost = host;
00427       fHasHost = kTRUE;
00428       return kTRUE;
00429    } else {
00430       Error("SetHost", "<host> component \"%s\" of URI is not compliant with RFC 3986.", host.Data());
00431       return kFALSE;
00432    }
00433 }
00434 
00435 //______________________________________________________________________________
00436 Bool_t TUri::SetPort(const TString &port)
00437 {
00438    // Set port component of URI:
00439    // port        = *DIGIT
00440 
00441    if (IsPort(port)) {
00442       fPort = port;
00443       fHasPort = kTRUE;
00444       return kTRUE;
00445    }
00446    Error("SetPort", "<port> component \"%s\" of URI is not compliant with RFC 3986.", port.Data());
00447    return kFALSE;
00448 }
00449 
00450 //______________________________________________________________________________
00451 Bool_t TUri::SetPath(const TString &path)
00452 {
00453    // Set path component of URI:
00454    // path          = path-abempty    ; begins with "/" or is empty
00455    //               / path-absolute   ; begins with "/" but not "//"
00456    //               / path-noscheme   ; begins with a non-colon segment
00457    //               / path-rootless   ; begins with a segment
00458    //               / path-empty      ; zero characters
00459 
00460    if (IsPath(path)) {
00461       fPath = path;
00462       fHasPath = kTRUE;
00463       return kTRUE;
00464    }
00465    Error("SetPath", "<path> component \"%s\" of URI is not compliant with RFC 3986.", path.Data());
00466    return kFALSE;
00467 }
00468 
00469 //______________________________________________________________________________
00470 Bool_t TUri::SetFragment(const TString &fragment)
00471 {
00472    // Set fragment component of URI:
00473    // fragment    = *( pchar / "/" / "?" )
00474 
00475    if (IsFragment(fragment)) {
00476       fFragment = fragment;
00477       fHasFragment = kTRUE;
00478       return kTRUE;
00479    } else {
00480       Error("SetFragment", "<fragment> component \"%s\" of URI is not compliant with RFC 3986.", fragment.Data());
00481       return kFALSE;
00482    }
00483 }
00484 
00485 //______________________________________________________________________________
00486 
00487 Bool_t TUri::IsFragment(const TString &string)
00488 {
00489    // Returns kTRUE if string qualifies as valid fragment component
00490    // fragment    = *( pchar / "/" / "?" )
00491 
00492    return (TPRegexp(
00493               "^(" + TString(kURI_pchar) + "|[/?])*$"
00494            ).Match(string) > 0);
00495 }
00496 
00497 //______________________________________________________________________________
00498 void TUri::Print(Option_t *option) const
00499 {
00500    // Display function,
00501    // option "d" .. debug output
00502    // anything else .. simply print URI.
00503 
00504    if (strcmp(option, "d") != 0) {
00505       Printf("%s", GetUri().Data());
00506       return ;
00507    }
00508    // debug output
00509    Printf("URI: <%s>", GetUri().Data());
00510    Printf("(%c) |--scheme---------<%s>", fHasScheme ? 't' : 'f', fScheme.Data());
00511    Printf("    |--hier-----------<%s>", GetHierPart().Data());
00512    Printf("(%c)     |--authority------<%s>", HasAuthority() ? 't' : 'f', GetAuthority().Data());
00513    Printf("(%c)         |--userinfo---<%s>", fHasUserinfo ? 't' : 'f', fUserinfo.Data());
00514    Printf("(%c)         |--host-------<%s>", fHasHost ? 't' : 'f', fHost.Data());
00515    Printf("(%c)         |--port-------<%s>", fHasPort ? 't' : 'f', fPort.Data());
00516    Printf("(%c)     |--path-------<%s>", fHasPath ? 't' : 'f', fPath.Data());
00517    Printf("(%c) |--query------<%s>", fHasQuery ? 't' : 'f', fQuery.Data());
00518    Printf("(%c) |--fragment---<%s>", fHasFragment ? 't' : 'f', fFragment.Data());
00519    printf("path flags: ");
00520    if (IsPathAbempty(fPath))
00521       printf("abempty ");
00522    if (IsPathAbsolute(fPath))
00523       printf("absolute ");
00524    if (IsPathRootless(fPath))
00525       printf("rootless ");
00526    if (IsPathEmpty(fPath))
00527       printf("empty ");
00528    printf("\nURI flags: ");
00529    if (IsAbsolute())
00530       printf("absolute-URI ");
00531    if (IsRelative())
00532       printf("relative-ref ");
00533    if (IsUri())
00534       printf("URI ");
00535    if (IsReference())
00536       printf("URI-reference ");
00537    printf("\n");
00538 }
00539 
00540 //______________________________________________________________________________
00541 void TUri::Reset()
00542 {
00543    // Initialize this URI object.
00544    // Set all TString members to empty string,
00545    // set all Bool_t members to kFALSE.
00546 
00547    fScheme = "";
00548    fUserinfo = "";
00549    fHost = "";
00550    fPort = "";
00551    fPath = "";
00552    fQuery = "";
00553    fFragment = "";
00554 
00555    fHasScheme = kFALSE;
00556    fHasUserinfo = kFALSE;
00557    fHasHost = kFALSE;
00558    fHasPort = kFALSE;
00559    fHasPath = kFALSE;
00560    fHasQuery = kFALSE;
00561    fHasFragment = kFALSE;
00562 }
00563 
00564 //______________________________________________________________________________
00565 Bool_t TUri::SetUri(const TString &uri)
00566 {
00567    // Parse URI and set the member variables accordingly,
00568    // returns kTRUE if URI validates, and kFALSE otherwise:
00569    // URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
00570    // hier-part   = "//" authority path-abempty
00571    //             / path-absolute
00572    //             / path-rootless
00573    //             / path-empty
00574    //
00575 
00576    // Reset member variables
00577    Reset();
00578 
00579    // regular expression taken from appendix B
00580    // reference points          12            3  4          5       6   7        8 9
00581    TPRegexp regexp = TPRegexp("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)([?]([^#]*))?(#(.*))?");
00582    TObjArray *tokens = regexp.MatchS(uri);
00583 
00584    // collect bool values to see if all setters succeed
00585    Bool_t valid = kTRUE;
00586    //tokens->Print();
00587    switch (tokens->GetEntries()) {
00588       case 10:
00589          // URI contains fragment delimiter '#'
00590          valid &= SetFragment(((TObjString*) tokens->At(9))->GetString());
00591          // fallthrough
00592 
00593       case 8:
00594          // URI does not contain a fragment delimiter
00595          // if there is a query delimiter '?', set query
00596          if (!((TString)((TObjString*) tokens->At(6))->GetString()).IsNull())
00597             valid &= SetQuery(((TObjString*) tokens->At(7))->GetString());
00598          // fallthrough
00599 
00600       case 6:
00601          // URI does not contain fragment or query delimiters
00602          valid &= SetPath(((TObjString*) tokens->At(5))->GetString());
00603          // if there is an authority delimiter '//', set authority
00604          if (!((TString)((TObjString*) tokens->At(3))->GetString()).IsNull())
00605             valid &= SetAuthority(((TObjString*) tokens->At(4))->GetString());
00606          // if there is a scheme delimiter ':', set scheme
00607          if (!((TString)((TObjString*) tokens->At(1))->GetString()).IsNull())
00608             valid &= SetScheme(((TObjString*) tokens->At(2))->GetString());
00609          break;
00610 
00611       default:
00612          // regular expression did not match
00613          Error("SetUri", "URI \"%s\" is not is not compliant with RFC 3986.", uri.Data());
00614          valid = kFALSE;
00615    }
00616 
00617    // reset member variables once again, if one at least setter failed
00618    if (!valid)
00619       Reset();
00620 
00621    delete tokens;
00622    return valid;
00623 }
00624 
00625 //______________________________________________________________________________
00626 const TString TUri::GetHierPart() const
00627 {
00628    // hier-part   = "//" authority path-abempty
00629    //             / path-absolute
00630    //             / path-rootless
00631    //             / path-empty
00632 
00633    if (HasAuthority() && IsPathAbempty(fPath))
00634       return (TString("//") + GetAuthority() + fPath);
00635    else
00636       return fPath;
00637 }
00638 
00639 //______________________________________________________________________________
00640 const TString TUri::GetRelativePart() const
00641 {
00642    // relative-part = "//" authority path-abempty
00643    //               / path-absolute
00644    //               / path-noscheme
00645    //               / path-empty
00646 
00647    if (HasAuthority() && IsPathAbempty(fPath))
00648       return (TString("//") + GetAuthority() + fPath);
00649    else
00650       return fPath;
00651 }
00652 
00653 //______________________________________________________________________________
00654 Bool_t TUri::SetHierPart(const TString &hier)
00655 {
00656    // returns hier-part component of URI
00657    // hier-part   = "//" authority path-abempty
00658    //             / path-absolute
00659    //             / path-rootless
00660    //             / path-empty
00661    //
00662 
00663    /*  if ( IsPathAbsolute(hier) || IsPathRootless(hier) || IsPathEmpty(hier) ) {
00664      SetPath (hier);
00665      return kTRUE;
00666     }
00667     */
00668 
00669    // reference points:         1  2          3
00670    TPRegexp regexp = TPRegexp("^(//([^/?#]*))?([^?#]*)$");
00671    TObjArray *tokens = regexp.MatchS(hier);
00672 
00673    if (tokens->GetEntries() == 0) {
00674       Error("SetHierPart", "<hier-part> component \"%s\" of URI is not compliant with RFC 3986.", hier.Data());
00675       delete tokens;
00676       return false;
00677    }
00678 
00679    TString delm = ((TObjString*) tokens->At(1))->GetString();
00680    TString auth = ((TObjString*) tokens->At(2))->GetString();
00681    TString path = ((TObjString*) tokens->At(3))->GetString();
00682 
00683    Bool_t valid = kTRUE;
00684 
00685    if (!delm.IsNull() && IsPathAbempty(path)) {
00686       // URI contains an authority delimiter '//' ...
00687       valid &= SetAuthority(auth);
00688       valid &= SetPath(path);
00689    } else {
00690       // URI does not contain an authority
00691       if (IsPathAbsolute(path) || IsPathRootless(path) || IsPathEmpty(path))
00692          valid &= SetPath(path);
00693       else {
00694          valid = kFALSE;
00695          Error("SetHierPart", "<hier-part> component \"%s\" of URI is not compliant with RFC 3986.", hier.Data());
00696       }
00697    }
00698    delete tokens;
00699    return valid;
00700 }
00701 
00702 //______________________________________________________________________________
00703 Bool_t TUri::IsHierPart(const TString &string)
00704 {
00705    // Returns kTRUE if string qualifies as hier-part:
00706    //
00707    // hier-part   = "//" authority path-abempty
00708    //             / path-absolute
00709    //             / path-rootless
00710    //             / path-empty
00711 
00712    // use functionality of SetHierPart
00713    // in order to avoid duplicate code
00714    TUri uri;
00715    return (uri.SetHierPart(string));
00716 }
00717 
00718 //______________________________________________________________________________
00719 Bool_t TUri::IsRelativePart(const TString &string)
00720 {
00721    // Returns kTRUE is string qualifies as relative-part:
00722    // relative-part = "//" authority path-abempty
00723    //               / path-absolute
00724    //               / path-noscheme
00725    //               / path-empty
00726 
00727    // use functionality of SetRelativePart
00728    // in order to avoid duplicate code
00729    TUri uri;
00730    return (uri.SetRelativePart(string));
00731 }
00732 
00733 //______________________________________________________________________________
00734 Bool_t TUri::SetRelativePart(const TString &relative)
00735 {
00736    // Returns kTRUE is string qualifies as relative-part:
00737    // relative-part = "//" authority path-abempty
00738    //               / path-absolute
00739    //               / path-noscheme
00740    //               / path-empty
00741 
00742    // reference points:         1  2          3
00743    TPRegexp regexp = TPRegexp("^(//([^/?#]*))?([^?#]*)$");
00744    TObjArray *tokens = regexp.MatchS(relative);
00745 
00746    if (tokens->GetEntries() == 0) {
00747       Error("SetRelativePath", "<relative-part> component \"%s\" of URI is not compliant with RFC 3986.", relative.Data());
00748       delete tokens;
00749       return false;
00750    }
00751    TString delm = ((TObjString*) tokens->At(1))->GetString();
00752    TString auth = ((TObjString*) tokens->At(2))->GetString();
00753    TString path = ((TObjString*) tokens->At(3))->GetString();
00754 
00755    Bool_t valid = kTRUE;
00756 
00757    if (!delm.IsNull() && IsPathAbempty(path)) {
00758       // URI contains an authority delimiter '//' ...
00759       valid &= SetAuthority(auth);
00760       valid &= SetPath(path);
00761    } else {
00762       // URI does not contain an authority
00763       if (IsPathAbsolute(path) || IsPathNoscheme(path) || IsPathEmpty(path))
00764          valid &= SetPath(path);
00765       else {
00766          valid = kFALSE;
00767          Error("SetRelativePath", "<relative-part> component \"%s\" of URI is not compliant with RFC 3986.", relative.Data());
00768       }
00769    }
00770    delete tokens;
00771    return valid;
00772 }
00773 
00774 //______________________________________________________________________________
00775 const TString TUri::PctEncode(const TString &source)
00776 {
00777    // Percent-encode and return the given string according to RFC 3986
00778    // in principle, this function cannot fail or produce an error.
00779 
00780    TString sink = "";
00781    // iterate through source
00782    for (Int_t i = 0; i < source.Length(); i++) {
00783       if (IsUnreserved(TString(source(i)))) {
00784          // unreserved character -> copy
00785          sink = sink + source[i];
00786       } else {
00787          // reserved character -> encode to 2 digit hex
00788          // preceded by '%'
00789          char buffer[4];
00790          sprintf(buffer, "%%%02X", source[i]);
00791          sink = sink + buffer;
00792       }
00793    }
00794    return sink;
00795 }
00796 
00797 //______________________________________________________________________________
00798 Bool_t TUri::IsHost(const TString &string)
00799 {
00800    // Returns kTRUE if string qualifies as valid host component:
00801    // host = IP-literal / IPv4address / reg-name
00802    // implemented: host =  IPv4address / reg-name
00803 
00804    return (IsRegName(string) || IsIpv4(string));
00805 }
00806 
00807 //______________________________________________________________________________
00808 Bool_t TUri::IsPath(const TString &string)
00809 {
00810    // Retruns kTRUE if string qualifies as valid path component:
00811    // path          = path-abempty    ; begins with "/" or is empty
00812    //               / path-absolute   ; begins with "/" but not "//"
00813    //               / path-noscheme   ; begins with a non-colon segment
00814    //               / path-rootless   ; begins with a segment
00815    //               / path-empty      ; zero characters
00816 
00817    return (IsPathAbempty(string) ||
00818            IsPathAbsolute(string) ||
00819            IsPathNoscheme(string) ||
00820            IsPathRootless(string) ||
00821            IsPathEmpty(string));
00822 }
00823 
00824 //______________________________________________________________________________
00825 Bool_t TUri::IsPathAbempty(const TString &string)
00826 {
00827    // Returns kTRUE if string qualifies as valid path-abempty component:
00828    //    path-abempty  = *( "/" segment )
00829    //    segment       = *pchar
00830 
00831    return (TPRegexp(
00832               TString("^(/") + TString(kURI_pchar) + "*)*$"
00833            ).Match(string) > 0);
00834 }
00835 
00836 //______________________________________________________________________________
00837 Bool_t TUri::IsPathAbsolute(const TString &string)
00838 {
00839    // Returns kTRUE if string qualifies as valid path-absolute component
00840    //    path-absolute = "/" [ segment-nz *( "/" segment ) ]
00841    //    segment-nz    = 1*pchar
00842    //    segment       = *pchar
00843 
00844    return (TPRegexp(
00845               TString("^/(") + TString(kURI_pchar) + "+(/" + TString(kURI_pchar) + "*)*)?$"
00846            ).Match(string) > 0);
00847 }
00848 
00849 //______________________________________________________________________________
00850 Bool_t TUri::IsPathNoscheme(const TString &string)
00851 {
00852    // Returns kTRUE if string qualifies as valid path-noscheme component:
00853    // path-noscheme = segment-nz-nc *( "/" segment )
00854    // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
00855    // segment       = *pchar
00856 
00857    return (TPRegexp(
00858               TString("^(([[:alpha:][:digit:]-._~!$&'()*+,;=@]|%[0-9A-Fa-f][0-9A-Fa-f])+)(/") + TString(kURI_pchar) + "*)*$"
00859            ).Match(string) > 0);
00860 }
00861 
00862 //______________________________________________________________________________
00863 Bool_t TUri::IsPathRootless(const TString &string)
00864 {
00865    // Returns kTRUE if string qualifies as valid path-rootless component:
00866    // path-rootless = segment-nz *( "/" segment )
00867 
00868    return TPRegexp(
00869              TString("^") + TString(kURI_pchar) + "+(/" + TString(kURI_pchar) + "*)*$"
00870           ).Match(string);
00871 }
00872 
00873 //______________________________________________________________________________
00874 Bool_t TUri::IsPathEmpty(const TString &string)
00875 {
00876    // Returns kTRUE if string qualifies as valid path-empty component:
00877    // path-empty    = 0<pchar>
00878    return TString(string).IsNull();
00879 }
00880 
00881 //______________________________________________________________________________
00882 Bool_t TUri::IsPort(const TString &string)
00883 {
00884    // Returns kTRUE if string qualifies as valid port component:
00885    // RFC 3986: port        = *DIGIT
00886 
00887    return (TPRegexp("^[[:digit:]]*$").Match(string) > 0);
00888 }
00889 
00890 //______________________________________________________________________________
00891 Bool_t TUri::IsRegName(const TString &string)
00892 {
00893    // Returns kTRUE if string qualifies as valid reg-name:
00894    //
00895    //  reg-name    = *( unreserved / pct-encoded / sub-delims )
00896    //  sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
00897    //                  / "*" / "+" / "," / ";" / "="
00898    //
00899 
00900    return (TPRegexp(
00901               "^([[:alpha:][:digit:]-._~!$&'()*+,;=]|%[0-9A-Fa-f][0-9A-Fa-f])*$").Match(string) > 0);
00902 }
00903 
00904 //______________________________________________________________________________
00905 Bool_t TUri::IsIpv4(const TString &string)
00906 {
00907    // Returns kTRUE, if string holds a valid IPv4 address
00908    // currently only decimal variant supported.
00909    // Existence of leadig 0s or numeric range remains unchecked
00910    // IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet.
00911    //
00912    return (TPRegexp(
00913               "^([[:digit:]]{1,3}[.]){3}[[:digit:]]{1,3}$").Match(string) > 0);
00914 }
00915 
00916 //______________________________________________________________________________
00917 Bool_t TUri::IsUnreserved(const TString &string)
00918 {
00919    // Returns kTRUE, if the given string does not contain
00920    // RFC 3986 reserved characters
00921    // unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
00922 
00923    return (TPRegexp(
00924               "^" + TString(kURI_unreserved) + "*$").Match(string) > 0);
00925 }
00926 
00927 //______________________________________________________________________________
00928 void TUri::Normalise()
00929 {
00930    // Syntax based normalisation according to
00931    // RFC chapter 6.2.2.
00932 
00933    // case normalisation of host and scheme
00934    // cf. chapter 6.2.2.1
00935    fScheme.ToLower();
00936    if (fHasHost) {
00937       TString host = GetHost();
00938       host.ToLower();
00939       SetHost(host);
00940    }
00941    // percent-encoding normalisation (6.2.2.2) for
00942    // userinfo, host (reg-name), path, query, fragment
00943    fUserinfo = PctNormalise(PctDecodeUnreserved(fUserinfo));
00944    fHost = PctNormalise(PctDecodeUnreserved(fHost));
00945    fPath = PctNormalise(PctDecodeUnreserved(fPath));
00946    fQuery = PctNormalise(PctDecodeUnreserved(fQuery));
00947    fFragment = PctNormalise(PctDecodeUnreserved(fFragment));
00948 
00949    // path segment normalisation (6.2.2.3)
00950    if (fHasPath)
00951       SetPath(RemoveDotSegments(GetPath()));
00952 }
00953 
00954 //______________________________________________________________________________
00955 TString const TUri::PctDecodeUnreserved(const TString &source)
00956 {
00957    // Percent-decode the given string according to chapter 2.1
00958    // we assume a valid pct-encoded string.
00959 
00960    TString sink = "";
00961    Int_t i = 0;
00962    while (i < source.Length()) {
00963       if (source[i] == '%') {
00964          if (source.Length() < i+2) {
00965             // abort if out of bounds
00966             return sink;
00967          }
00968          // two hex digits follow -> decode to ASCII
00969          // upper nibble, bits 4-7
00970          char c1 = tolower(source[i + 1]) - '0';
00971          if (c1 > 9) // a-f
00972             c1 -= 39;
00973          // lower nibble, bits 0-3
00974          char c0 = tolower(source[i + 2]) - '0';
00975          if (c0 > 9) // a-f
00976             c0 -= 39;
00977          char decoded = c1 << 4 | c0;
00978          if (TPRegexp(kURI_unreserved).Match(decoded) > 0) {
00979             // we have an unreserved character -> store decoded version
00980             sink = sink + decoded;
00981          } else {
00982             // this is a reserved character
00983             TString pct = source(i,3);
00984             pct.ToUpper();
00985             sink = sink + pct;
00986          }
00987          // advance 2 characters
00988          i += 2;
00989       } else {
00990          // regular character -> copy
00991          sink = sink + source[i];
00992       }
00993       i++;
00994    }
00995    return sink;
00996 }
00997 
00998 //______________________________________________________________________________
00999 TString const TUri::PctNormalise(const TString &source)
01000 {
01001    // Normalise the percent-encoded parts of the string
01002    // i.e. uppercase the hexadecimal digits
01003    // %[:alpha:][:alpha:] -> %[:ALPHA:][:ALPHA:]
01004 
01005    TString sink = "";
01006    Int_t i = 0;
01007    while (i < source.Length()) {
01008       if (source[i] == '%') {
01009          if (source.Length() < i+2) {
01010             // abort if out of bounds
01011             return sink;
01012          }
01013          TString pct = source(i,3);
01014          // uppercase the pct part
01015          pct.ToUpper();
01016          sink = sink + pct;
01017          // advance 2 characters
01018          i += 2;
01019       } else {
01020          // regular character -> copy
01021          sink = sink + source[i];
01022       }
01023       i++;
01024    }
01025    return sink;
01026 }
01027 
01028 //______________________________________________________________________________
01029 TString const TUri::PctDecode(const TString &source)
01030 {
01031    // Percent-decode the given string according to chapter 2.1
01032    // we assume a valid pct-encoded string.
01033 
01034    TString sink = "";
01035    Int_t i = 0;
01036    while (i < source.Length()) {
01037       if (source[i] == '%') {
01038          if (source.Length() < i+2) {
01039             // abort if out of bounds
01040             return sink;
01041          }
01042          // two hex digits follow -> decode to ASCII
01043          // upper nibble, bits 4-7
01044          char c1 = tolower(source[i + 1]) - '0';
01045          if (c1 > 9) // a-f
01046             c1 -= 39;
01047          // lower nibble, bits 0-3
01048          char c0 = tolower(source[i + 2]) - '0';
01049          if (c0 > 9) // a-f
01050             c0 -= 39;
01051          sink = sink + (char)(c1 << 4 | c0);
01052          // advance 2 characters
01053          i += 2;
01054       } else {
01055          // regular character -> copy
01056          sink = sink + source[i];
01057       }
01058       i++;
01059    }
01060    return sink;
01061 }
01062 
01063 //______________________________________________________________________________
01064 TUri TUri::Transform(const TUri &reference, const TUri &base)
01065 {
01066    // Transform a URI reference into its target URI using
01067    // given a base URI.
01068    // This is an implementation of the pseudocode in chapter 5.2.2.
01069 
01070    TUri target;
01071    if (reference.HasScheme()) {
01072       target.SetScheme(reference.GetScheme());
01073       if (reference.HasAuthority())
01074          target.SetAuthority(reference.GetAuthority());
01075       if (reference.HasPath())
01076          target.SetPath(RemoveDotSegments(reference.GetPath()));
01077       if (reference.HasQuery())
01078          target.SetQuery(reference.GetQuery());
01079    } else {
01080       if (reference.HasAuthority()) {
01081          target.SetAuthority(reference.GetAuthority());
01082          if (reference.HasPath())
01083             target.SetPath(RemoveDotSegments(reference.GetPath()));
01084          if (reference.HasQuery())
01085             target.SetQuery(reference.GetQuery());
01086       } else {
01087          if (reference.GetPath().IsNull()) {
01088             target.SetPath(base.GetPath());
01089             if (reference.HasQuery()) {
01090                target.SetQuery(reference.GetQuery());
01091             } else {
01092                if (base.HasQuery())
01093                   target.SetQuery(base.GetQuery());
01094             }
01095          } else {
01096             if (reference.GetPath().BeginsWith("/")) {
01097                target.SetPath(RemoveDotSegments(reference.GetPath()));
01098             } else {
01099                target.SetPath(RemoveDotSegments(MergePaths(reference, base)));
01100             }
01101             if (reference.HasQuery())
01102                target.SetQuery(reference.GetQuery());
01103          }
01104          if (base.HasAuthority())
01105             target.SetAuthority(base.GetAuthority());
01106       }
01107       if (base.HasScheme())
01108          target.SetScheme(base.GetScheme());
01109    }
01110    if (reference.HasFragment())
01111       target.SetFragment(reference.GetFragment());
01112    return target;
01113 }
01114 
01115 //______________________________________________________________________________
01116 const TString TUri::MergePaths(const TUri &reference, const TUri &base)
01117 {
01118    // RFC 3986, 5.3.2.
01119    // If the base URI has a defined authority component and an empty
01120    // path, then return a string consisting of "/" concatenated with the
01121    // reference's path; otherwise,
01122    // return a string consisting of the reference's path component
01123    // appended to all but the last segment of the base URI's path (i.e.,
01124    // excluding any characters after the right-most "/" in the base URI
01125    // path, or excluding the entire base URI path if it does not contain
01126    // any "/" characters).
01127 
01128    TString result = "";
01129    if (base.HasAuthority() && base.GetPath().IsNull()) {
01130       result = TString("/") + reference.GetPath();
01131    } else {
01132       TString basepath = base.GetPath();
01133       Ssiz_t last = basepath.Last('/');
01134       if (last == -1)
01135          result = reference.GetPath();
01136       else
01137          result = basepath(0, last + 1) + reference.GetPath();
01138    }
01139    return result;
01140 }

Generated on Tue Jul 5 14:11:24 2011 for ROOT_528-00b_version by  doxygen 1.5.1