TWebFile.cxx

Go to the documentation of this file.
00001 // @(#)root/net:$Id: TWebFile.cxx 35726 2010-09-24 20:24:16Z rdm $
00002 // Author: Fons Rademakers   17/01/97
00003 
00004 /*************************************************************************
00005  * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 //////////////////////////////////////////////////////////////////////////
00013 //                                                                      //
00014 // TWebFile                                                             //
00015 //                                                                      //
00016 // A TWebFile is like a normal TFile except that it reads its data      //
00017 // via a standard apache web server. A TWebFile is a read-only file.    //
00018 //                                                                      //
00019 //////////////////////////////////////////////////////////////////////////
00020 
00021 #include "TWebFile.h"
00022 #include "TROOT.h"
00023 #include "TSocket.h"
00024 #include "Bytes.h"
00025 #include "TError.h"
00026 #include "TSystem.h"
00027 #include "TBase64.h"
00028 #include "TVirtualPerfStats.h"
00029 
00030 #include <errno.h>
00031 #include <stdlib.h>
00032 
00033 #ifdef WIN32
00034 #define EADDRINUSE  10048
00035 #define EISCONN     10056
00036 #endif
00037 
00038 static const char *gUserAgent = "User-Agent: ROOT-TWebFile/1.1";
00039 
00040 TUrl TWebFile::fgProxy;
00041 
00042 
00043 // Internal class used to manage the socket that may stay open between
00044 // calls when HTTP/1.1 protocol is used
00045 class TWebSocket {
00046 private:
00047    TWebFile *fWebFile;           // associated web file
00048 public:
00049    TWebSocket(TWebFile *f);
00050    ~TWebSocket();
00051    void ReOpen();
00052 };
00053 
00054 //______________________________________________________________________________
00055 TWebSocket::TWebSocket(TWebFile *f)
00056 {
00057    // Open web file socket.
00058 
00059    fWebFile = f;
00060    if (!f->fSocket)
00061       ReOpen();
00062 }
00063 
00064 //______________________________________________________________________________
00065 TWebSocket::~TWebSocket()
00066 {
00067    // Close socket in case not HTTP/1.1 protocol or when explicitly requested.
00068 
00069    if (!fWebFile->fHTTP11) {
00070       delete fWebFile->fSocket;
00071       fWebFile->fSocket = 0;
00072    }
00073 }
00074 
00075 //______________________________________________________________________________
00076 void TWebSocket::ReOpen()
00077 {
00078    // Re-open web file socket.
00079 
00080    if (fWebFile->fSocket)
00081       delete fWebFile->fSocket;
00082 
00083    TUrl connurl;
00084    if (fWebFile->fProxy.IsValid())
00085       connurl = fWebFile->fProxy;
00086    else
00087       connurl = fWebFile->fUrl;
00088 
00089    for (Int_t i = 0; i < 5; i++) {
00090       fWebFile->fSocket = new TSocket(connurl.GetHost(), connurl.GetPort());
00091       if (!fWebFile->fSocket->IsValid()) {
00092          delete fWebFile->fSocket;
00093          fWebFile->fSocket = 0;
00094          if (gSystem->GetErrno() == EADDRINUSE || gSystem->GetErrno() == EISCONN) {
00095             gSystem->Sleep(i*10);
00096          } else {
00097             ::Error("TWebSocket::ReOpen", "cannot connect to host %s (errno=%d)",
00098                     fWebFile->fUrl.GetHost(), gSystem->GetErrno());
00099             return;
00100          }
00101       } else
00102          return;
00103    }
00104 }
00105 
00106 
00107 ClassImp(TWebFile)
00108 
00109 //______________________________________________________________________________
00110 TWebFile::TWebFile(const char *url, Option_t *opt) : TFile(url, "WEB")
00111 {
00112    // Create a Web file object. A web file is the same as a read-only
00113    // TFile except that it is being read via a HTTP server. The url
00114    // argument must be of the form: http://host.dom.ain/file.root.
00115    // The opt can be "NOPROXY", to bypass any set "http_proxy" shell
00116    // variable. The proxy can be specified as (in sh, or equivalent csh):
00117    //   export http_proxy=http://pcsalo.cern.ch:3128
00118    // The proxy can also be specified via the static method TWebFile::SetProxy().
00119    // Basic authentication (AuthType Basic) is supported. The user name and
00120    // passwd can be specified in the url like this:
00121    //   http://username:mypasswd@pcsalo.cern.ch/files/aap.root
00122    // If the file specified in the URL does not exist or is not accessible
00123    // the kZombie bit will be set in the TWebFile object. Use IsZombie()
00124    // to see if the file is accessible. The preferred interface to this
00125    // constructor is via TFile::Open().
00126 
00127    TString option = opt;
00128    fNoProxy = kFALSE;
00129    if (option.Contains("NOPROXY", TString::kIgnoreCase))
00130       fNoProxy = kTRUE;
00131    CheckProxy();
00132 
00133    Bool_t headOnly = kFALSE;
00134    if (option.Contains("HEADONLY", TString::kIgnoreCase))
00135       headOnly = kTRUE;
00136 
00137    Init(headOnly);
00138 }
00139 
00140 //______________________________________________________________________________
00141 TWebFile::TWebFile(TUrl url, Option_t *opt) : TFile(url.GetUrl(), "WEB")
00142 {
00143    // Create a Web file object. A web file is the same as a read-only
00144    // TFile except that it is being read via a HTTP server. Make sure url
00145    // is a valid TUrl object.
00146    // The opt can be "NOPROXY", to bypass any set "http_proxy" shell
00147    // variable. The proxy can be specified as (in sh, or equivalent csh):
00148    //   export http_proxy=http://pcsalo.cern.ch:3128
00149    // The proxy can also be specified via the static method TWebFile::SetProxy().
00150    // Basic authentication (AuthType Basic) is supported. The user name and
00151    // passwd can be specified in the url like this:
00152    //   http://username:mypasswd@pcsalo.cern.ch/files/aap.root
00153    // If the file specified in the URL does not exist or is not accessible
00154    // the kZombie bit will be set in the TWebFile object. Use IsZombie()
00155    // to see if the file is accessible.
00156 
00157    TString option = opt;
00158    fNoProxy = kFALSE;
00159    if (option.Contains("NOPROXY", TString::kIgnoreCase))
00160       fNoProxy = kTRUE;
00161    CheckProxy();
00162 
00163    Bool_t headOnly = kFALSE;
00164    if (option.Contains("HEADONLY", TString::kIgnoreCase))
00165       headOnly = kTRUE;
00166 
00167    Init(headOnly);
00168 }
00169 
00170 //______________________________________________________________________________
00171 TWebFile::~TWebFile()
00172 {
00173    // Cleanup.
00174 
00175    delete fSocket;
00176 }
00177 
00178 //______________________________________________________________________________
00179 void TWebFile::Init(Bool_t readHeadOnly)
00180 {
00181    // Initialize a TWebFile object.
00182 
00183    char buf[4];
00184    int  err;
00185 
00186    fSocket     = 0;
00187    fSize       = -1;
00188    fHasModRoot = kFALSE;
00189    fHTTP11     = kFALSE;
00190 
00191    SetMsgReadBuffer10();
00192 
00193    if ((err = GetHead()) < 0) {
00194       if (readHeadOnly) {
00195          fD = -1;
00196          fWritten = err;
00197          return;
00198       }
00199       if (err == -2) {
00200          Error("TWebFile", "%s does not exist", fBasicUrl.Data());
00201          MakeZombie();
00202          gDirectory = gROOT;
00203          return;
00204       }
00205       // err == -3 HEAD not supported, fall through and try ReadBuffer()
00206    }
00207    if (readHeadOnly) {
00208       fD = -1;
00209       return;
00210    }
00211 
00212    if (fIsRootFile) {
00213       Seek(0);
00214       if (ReadBuffer(buf, 4)) {
00215          MakeZombie();
00216          gDirectory = gROOT;
00217          return;
00218       }
00219 
00220       if (strncmp(buf, "root", 4) && strncmp(buf, "PK", 2)) {  // PK is zip file
00221          Error("TWebFile", "%s is not a ROOT file", fBasicUrl.Data());
00222          MakeZombie();
00223          gDirectory = gROOT;
00224          return;
00225       }
00226    }
00227 
00228    TFile::Init(kFALSE);
00229    fD = -2;   // so TFile::IsOpen() will return true when in TFile::~TFile
00230 }
00231 
00232 //______________________________________________________________________________
00233 void TWebFile::SetMsgReadBuffer10(const char *redirectLocation, Bool_t tempRedirect)
00234 {
00235    // Set GET command for use by ReadBuffer(s)10(), handle redirection if
00236    // needed. Give full URL so Apache's virtual hosts solution works.
00237 
00238    TUrl oldUrl;
00239    TString oldBasicUrl;
00240 
00241    if (redirectLocation) {
00242       if (tempRedirect) { // temp redirect
00243          fUrlOrg      = fUrl;
00244          fBasicUrlOrg = fBasicUrl;
00245       } else {             // permanent redirect
00246          fUrlOrg      = "";
00247          fBasicUrlOrg = "";
00248       }
00249 
00250       oldUrl = fUrl;
00251       oldBasicUrl = fBasicUrl;
00252 
00253       fUrl.SetUrl(redirectLocation);
00254       fBasicUrl = fUrl.GetProtocol();
00255       fBasicUrl += "://";
00256       fBasicUrl += fUrl.GetHost();
00257       fBasicUrl += ":";
00258       fBasicUrl += fUrl.GetPort();
00259       fBasicUrl += "/";
00260       fBasicUrl += fUrl.GetFile();
00261    }
00262 
00263    if (fMsgReadBuffer10 != "") {
00264       // patch up existing command
00265       if (oldBasicUrl != "") {
00266          // change to redirection location
00267          fMsgReadBuffer10.ReplaceAll(oldBasicUrl, fBasicUrl);
00268          fMsgReadBuffer10.ReplaceAll(TString("Host: ")+oldUrl.GetHost(), TString("Host: ")+fUrl.GetHost());
00269       } else if (fBasicUrlOrg != "") {
00270          // change back from temp redirection location
00271          fMsgReadBuffer10.ReplaceAll(fBasicUrl, fBasicUrlOrg);
00272          fMsgReadBuffer10.ReplaceAll(TString("Host: ")+fUrl.GetHost(), TString("Host: ")+fUrlOrg.GetHost());
00273          fUrl         = fUrlOrg;
00274          fBasicUrl    = fBasicUrlOrg;
00275          fUrlOrg      = "";
00276          fBasicUrlOrg = "";
00277       }
00278    }
00279 
00280    if (fBasicUrl == "") {
00281       fBasicUrl += fUrl.GetProtocol();
00282       fBasicUrl += "://";
00283       fBasicUrl += fUrl.GetHost();
00284       fBasicUrl += ":";
00285       fBasicUrl += fUrl.GetPort();
00286       fBasicUrl += "/";
00287       fBasicUrl += fUrl.GetFile();
00288    }
00289 
00290    if (fMsgReadBuffer10 == "") {
00291       fMsgReadBuffer10 = "GET ";
00292       fMsgReadBuffer10 += fBasicUrl;
00293       if (fHTTP11)
00294          fMsgReadBuffer10 += " HTTP/1.1";
00295       else
00296          fMsgReadBuffer10 += " HTTP/1.0";
00297       fMsgReadBuffer10 += "\r\n";
00298       if (fHTTP11) {
00299          fMsgReadBuffer10 += "Host: ";
00300          fMsgReadBuffer10 += fUrl.GetHost();
00301          fMsgReadBuffer10 += "\r\n";
00302       }
00303       fMsgReadBuffer10 += BasicAuthentication();
00304       fMsgReadBuffer10 += gUserAgent;
00305       fMsgReadBuffer10 += "\r\n";
00306       fMsgReadBuffer10 += "Range: bytes=";
00307    }
00308 }
00309 
00310 //______________________________________________________________________________
00311 void TWebFile::CheckProxy()
00312 {
00313    // Check if shell var "http_proxy" has been set and should be used.
00314 
00315    if (fNoProxy)
00316       return;
00317 
00318    if (fgProxy.IsValid()) {
00319       fProxy = fgProxy;
00320       return;
00321    }
00322 
00323    TString proxy = gSystem->Getenv("http_proxy");
00324    if (proxy != "") {
00325       TUrl p(proxy);
00326       if (strcmp(p.GetProtocol(), "http")) {
00327          Error("CheckProxy", "protocol must be HTTP in proxy URL %s",
00328                proxy.Data());
00329          return;
00330       }
00331       fProxy = p;
00332    }
00333 }
00334 
00335 //______________________________________________________________________________
00336 Bool_t TWebFile::IsOpen() const
00337 {
00338    // A TWebFile that has been correctly constructed is always considered open.
00339 
00340    return IsZombie() ? kFALSE : kTRUE;
00341 }
00342 
00343 //______________________________________________________________________________
00344 Int_t TWebFile::ReOpen(Option_t *mode)
00345 {
00346    // Reopen a file with a different access mode, like from READ to
00347    // UPDATE or from NEW, CREATE, RECREATE, UPDATE to READ. Thus the
00348    // mode argument can be either "READ" or "UPDATE". The method returns
00349    // 0 in case the mode was successfully modified, 1 in case the mode
00350    // did not change (was already as requested or wrong input arguments)
00351    // and -1 in case of failure, in which case the file cannot be used
00352    // anymore. A TWebFile cannot be reopened in update mode.
00353 
00354    TString opt = mode;
00355    opt.ToUpper();
00356 
00357    if (opt != "READ" && opt != "UPDATE")
00358       Error("ReOpen", "mode must be either READ or UPDATE, not %s", opt.Data());
00359 
00360    if (opt == "UPDATE")
00361       Error("ReOpen", "update mode not allowed for a TWebFile");
00362 
00363    return 1;
00364 }
00365 
00366 //______________________________________________________________________________
00367 Bool_t TWebFile::ReadBuffer(char *buf, Int_t len)
00368 {
00369    // Read specified byte range from remote file via HTTP daemon. This
00370    // routine connects to the remote host, sends the request and returns
00371    // the buffer. Returns kTRUE in case of error.
00372 
00373    Int_t st;
00374    if ((st = ReadBufferViaCache(buf, len))) {
00375       if (st == 2)
00376          return kTRUE;
00377       return kFALSE;
00378    }
00379 
00380    if (!fHasModRoot)
00381       return ReadBuffer10(buf, len);
00382 
00383    // Give full URL so Apache's virtual hosts solution works.
00384    // Use protocol 0.9 for efficiency, we are not interested in the 1.0 headers.
00385    if (fMsgReadBuffer == "") {
00386       fMsgReadBuffer = "GET ";
00387       fMsgReadBuffer += fBasicUrl;
00388       fMsgReadBuffer += "?";
00389    }
00390    TString msg = fMsgReadBuffer;
00391    msg += fOffset;
00392    msg += ":";
00393    msg += len;
00394    msg += "\r\n";
00395 
00396    if (GetFromWeb(buf, len, msg) == -1)
00397       return kTRUE;
00398 
00399    fOffset += len;
00400 
00401    return kFALSE;
00402 }
00403 
00404 //______________________________________________________________________________
00405 Bool_t TWebFile::ReadBuffer(char *buf, Long64_t pos, Int_t len)
00406 {
00407    // Read specified byte range from remote file via HTTP daemon. This
00408    // routine connects to the remote host, sends the request and returns
00409    // the buffer. Returns kTRUE in case of error.
00410 
00411    SetOffset(pos);
00412    return ReadBuffer(buf, len);
00413 }
00414 
00415 //______________________________________________________________________________
00416 Bool_t TWebFile::ReadBuffer10(char *buf, Int_t len)
00417 {
00418    // Read specified byte range from remote file via HTTP 1.0 daemon (without
00419    // mod-root installed). This routine connects to the remote host, sends the
00420    // request and returns the buffer. Returns kTRUE in case of error.
00421 
00422    SetMsgReadBuffer10();
00423 
00424    TString msg = fMsgReadBuffer10;
00425    msg += fOffset;
00426    msg += "-";
00427    msg += fOffset+len-1;
00428    msg += "\r\n\r\n";
00429 
00430    Int_t n = GetFromWeb10(buf, len, msg);
00431    if (n == -1)
00432       return kTRUE;
00433    // The -2 error condition typically only happens when
00434    // GetHead() failed because not implemented, in the first call to
00435    // ReadBuffer() in Init(), it is not checked in ReadBuffers10().
00436    if (n == -2) {
00437       Error("ReadBuffer10", "%s does not exist", fBasicUrl.Data());
00438       MakeZombie();
00439       gDirectory = gROOT;
00440       return kTRUE;
00441    }
00442 
00443    fOffset += len;
00444 
00445    return kFALSE;
00446 }
00447 
00448 //______________________________________________________________________________
00449 Bool_t TWebFile::ReadBuffers(char *buf, Long64_t *pos, Int_t *len, Int_t nbuf)
00450 {
00451    // Read specified byte ranges from remote file via HTTP daemon.
00452    // Reads the nbuf blocks described in arrays pos and len,
00453    // where pos[i] is the seek position of block i of length len[i].
00454    // Note that for nbuf=1, this call is equivalent to TFile::ReafBuffer
00455    // This function is overloaded by TNetFile, TWebFile, etc.
00456    // Returns kTRUE in case of failure.
00457 
00458    if (!fHasModRoot)
00459       return ReadBuffers10(buf, pos, len, nbuf);
00460 
00461    // Give full URL so Apache's virtual hosts solution works.
00462    // Use protocol 0.9 for efficiency, we are not interested in the 1.0 headers.
00463    if (fMsgReadBuffer == "") {
00464       fMsgReadBuffer = "GET ";
00465       fMsgReadBuffer += fBasicUrl;
00466       fMsgReadBuffer += "?";
00467    }
00468    TString msg = fMsgReadBuffer;
00469 
00470    Int_t k = 0, n = 0;
00471    for (Int_t i = 0; i < nbuf; i++) {
00472       if (n) msg += ",";
00473       msg += pos[i] + fArchiveOffset;
00474       msg += ":";
00475       msg += len[i];
00476       n   += len[i];
00477       if (msg.Length() > 8000) {
00478          msg += "\r\n";
00479          if (GetFromWeb(&buf[k], n, msg) == -1)
00480             return kTRUE;
00481          msg = fMsgReadBuffer;
00482          k += n;
00483          n = 0;
00484       }
00485    }
00486 
00487    msg += "\r\n";
00488 
00489    if (GetFromWeb(&buf[k], n, msg) == -1)
00490       return kTRUE;
00491 
00492    return kFALSE;
00493 }
00494 
00495 //______________________________________________________________________________
00496 Bool_t TWebFile::ReadBuffers10(char *buf,  Long64_t *pos, Int_t *len, Int_t nbuf)
00497 {
00498    // Read specified byte ranges from remote file via HTTP 1.0 daemon (without
00499    // mod-root installed). Read the nbuf blocks described in arrays pos and len,
00500    // where pos[i] is the seek position of block i of length len[i].
00501    // Note that for nbuf=1, this call is equivalent to TFile::ReafBuffer
00502    // This function is overloaded by TNetFile, TWebFile, etc.
00503    // Returns kTRUE in case of failure.
00504 
00505    SetMsgReadBuffer10();
00506 
00507    TString msg = fMsgReadBuffer10;
00508 
00509    Int_t k = 0, n = 0, r;
00510    for (Int_t i = 0; i < nbuf; i++) {
00511       if (n) msg += ",";
00512       msg += pos[i] + fArchiveOffset;
00513       msg += "-";
00514       msg += pos[i] + fArchiveOffset + len[i] - 1;
00515       n   += len[i];
00516       if (msg.Length() > 8000) {
00517          msg += "\r\n\r\n";
00518          r = GetFromWeb10(&buf[k], n, msg);
00519          if (r == -1)
00520             return kTRUE;
00521          msg = fMsgReadBuffer10;
00522          k += n;
00523          n = 0;
00524       }
00525    }
00526 
00527    msg += "\r\n\r\n";
00528 
00529    r = GetFromWeb10(&buf[k], n, msg);
00530    if (r == -1)
00531       return kTRUE;
00532 
00533    return kFALSE;
00534 }
00535 
00536 //______________________________________________________________________________
00537 Int_t TWebFile::GetFromWeb(char *buf, Int_t len, const TString &msg)
00538 {
00539    // Read request from web server. Returns -1 in case of error,
00540    // 0 in case of success.
00541 
00542    if (!len) return 0;
00543 
00544    Double_t start = 0;
00545    if (gPerfStats) start = TTimeStamp();
00546 
00547    TUrl connurl;
00548    if (fProxy.IsValid())
00549       connurl = fProxy;
00550    else
00551       connurl = fUrl;
00552 
00553    TSocket s(connurl.GetHost(), connurl.GetPort());
00554    if (!s.IsValid()) {
00555       Error("GetFromWeb", "cannot connect to host %s", fUrl.GetHost());
00556       return -1;
00557    }
00558 
00559    if (s.SendRaw(msg.Data(), msg.Length()) == -1) {
00560       Error("GetFromWeb", "error sending command to host %s", fUrl.GetHost());
00561       return -1;
00562    }
00563 
00564    if (s.RecvRaw(buf, len) == -1) {
00565       Error("GetFromWeb", "error receiving data from host %s", fUrl.GetHost());
00566       return -1;
00567    }
00568 
00569    // collect statistics
00570    fBytesRead += len;
00571    fReadCalls++;
00572 #ifdef R__WIN32
00573    SetFileBytesRead(GetFileBytesRead() + len);
00574    SetFileReadCalls(GetFileReadCalls() + 1);
00575 #else
00576    fgBytesRead += len;
00577    fgReadCalls++;
00578 #endif
00579 
00580    if (gPerfStats)
00581       gPerfStats->FileReadEvent(this, len, start);
00582 
00583    return 0;
00584 }
00585 
00586 //______________________________________________________________________________
00587 Int_t TWebFile::GetFromWeb10(char *buf, Int_t len, const TString &msg)
00588 {
00589    // Read multiple byte range request from web server.
00590    // Uses HTTP 1.0 daemon wihtout mod-root.
00591    // Returns -2 in case file does not exist, -1 in case
00592    // of error and 0 in case of success.
00593 
00594    if (!len) return 0;
00595 
00596    Double_t start = 0;
00597    if (gPerfStats) start = TTimeStamp();
00598 
00599    // open fSocket and close it when going out of scope
00600    TWebSocket ws(this);
00601 
00602    if (!fSocket || !fSocket->IsValid()) {
00603       Error("GetFromWeb10", "cannot connect to host %s", fUrl.GetHost());
00604       return -1;
00605    }
00606 
00607    if (fSocket->SendRaw(msg.Data(), msg.Length()) == -1) {
00608       Error("GetFromWeb10", "error sending command to host %s", fUrl.GetHost());
00609       return -1;
00610    }
00611 
00612    char line[8192];
00613    Int_t n, ret = 0, nranges = 0, ltot = 0, redirect = 0;
00614    TString boundary, boundaryEnd;
00615    Long64_t first = -1, last = -1, tot;
00616 
00617    while ((n = GetLine(fSocket, line, sizeof(line))) >= 0) {
00618       if (n == 0) {
00619          if (ret < 0)
00620             return ret;
00621          if (redirect) {
00622             ws.ReOpen();
00623             return GetFromWeb10(buf, len, msg);
00624          }
00625 
00626          if (first >= 0) {
00627             Int_t ll = Int_t(last - first) + 1;
00628             if (fSocket->RecvRaw(&buf[ltot], ll) == -1) {
00629                Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
00630                return -1;
00631             }
00632             ltot += ll;
00633 
00634             first = -1;
00635 
00636             if (boundary == "")
00637                break;  // not a multipart response
00638          }
00639 
00640          continue;
00641       }
00642 
00643       if (gDebug > 0)
00644          Info("GetFromWeb10", "header: %s", line);
00645 
00646       if (boundaryEnd == line) {
00647          if (gDebug > 0)
00648             Info("GetFromWeb10", "got all headers");
00649          break;
00650       }
00651       if (boundary == line) {
00652          nranges++;
00653          if (gDebug > 0)
00654             Info("GetFromWeb10", "get new multipart byte range (%d)", nranges);
00655       }
00656 
00657       TString res = line;
00658 
00659       if (res.BeginsWith("HTTP/1.")) {
00660          if (res.BeginsWith("HTTP/1.1")) {
00661             if (!fHTTP11)
00662                fMsgReadBuffer10  = "";
00663             fHTTP11 = kTRUE;
00664          }
00665          TString scode = res(9, 3);
00666          Int_t code = scode.Atoi();
00667          if (code >= 500) {
00668             ret = -1;
00669             TString mess = res(13, 1000);
00670             Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
00671          } else if (code >= 400) {
00672             if (code == 404)
00673                ret = -2;   // file does not exist
00674             else {
00675                ret = -1;
00676                TString mess = res(13, 1000);
00677                Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
00678             }
00679          } else if (code >= 300) {
00680             if (code == 301 || code == 303)
00681                redirect = 1;   // permanent redirect
00682             else if (code == 302 || code == 307)
00683                redirect = 2;   // temp redirect
00684             else {
00685                ret = -1;
00686                TString mess = res(13, 1000);
00687                Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
00688             }
00689          } else if (code > 200) {
00690             if (code != 206) {
00691                ret = -1;
00692                TString mess = res(13, 1000);
00693                Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
00694             }
00695          }
00696       } else if (res.BeginsWith("Content-Type: multipart")) {
00697          boundary = "--" + res(res.Index("boundary=")+9, 1000);
00698          boundaryEnd = boundary + "--";
00699       } else if (res.BeginsWith("Content-range:")) {
00700 #ifdef R__WIN32
00701          sscanf(res.Data(), "Content-range: bytes %I64d-%I64d/%I64d", &first, &last, &tot);
00702 #else
00703          sscanf(res.Data(), "Content-range: bytes %lld-%lld/%lld", &first, &last, &tot);
00704 #endif
00705          if (fSize == -1) fSize = tot;
00706       } else if (res.BeginsWith("Content-Range:")) {
00707 #ifdef R__WIN32
00708          sscanf(res.Data(), "Content-Range: bytes %I64d-%I64d/%I64d", &first, &last, &tot);
00709 #else
00710          sscanf(res.Data(), "Content-Range: bytes %lld-%lld/%lld", &first, &last, &tot);
00711 #endif
00712          if (fSize == -1) fSize = tot;
00713       } else if (res.BeginsWith("Location:") && redirect) {
00714          TString redir = res(10, 1000);
00715          if (redirect == 2)   // temp redirect
00716             SetMsgReadBuffer10(redir, kTRUE);
00717          else               // permanent redirect
00718             SetMsgReadBuffer10(redir, kFALSE);
00719       }
00720    }
00721 
00722    if (n == -1 && fHTTP11) {
00723       if (gDebug > 0)
00724          Info("GetFromWeb10", "HTTP/1.1 socket closed, reopen");
00725       if (fBasicUrlOrg != "") {
00726          // if we have to close temp redirection, set back to original url
00727          SetMsgReadBuffer10();
00728       }
00729       ws.ReOpen();
00730       return GetFromWeb10(buf, len, msg);
00731    }
00732 
00733    if (ltot != len) {
00734       Error("GetFromWeb10", "error receiving expected amount of data (got %d, expected %d) from host %s",
00735             ltot, len, fUrl.GetHost());
00736       return -1;
00737    }
00738 
00739    // collect statistics
00740    fBytesRead += len;
00741    fReadCalls++;
00742 #ifdef R__WIN32
00743    SetFileBytesRead(GetFileBytesRead() + len);
00744    SetFileReadCalls(GetFileReadCalls() + 1);
00745 #else
00746    fgBytesRead += len;
00747    fgReadCalls++;
00748 #endif
00749 
00750    if (gPerfStats)
00751       gPerfStats->FileReadEvent(this, len, start);
00752 
00753    return 0;
00754 }
00755 
00756 //______________________________________________________________________________
00757 void TWebFile::Seek(Long64_t offset, ERelativeTo pos)
00758 {
00759    // Set position from where to start reading.
00760 
00761    switch (pos) {
00762    case kBeg:
00763       fOffset = offset + fArchiveOffset;
00764       break;
00765    case kCur:
00766       fOffset += offset;
00767       break;
00768    case kEnd:
00769       // this option is not used currently in the ROOT code
00770       if (fArchiveOffset)
00771          Error("Seek", "seeking from end in archive is not (yet) supported");
00772       fOffset = fEND - offset;  // is fEND really EOF or logical EOF?
00773       break;
00774    }
00775 }
00776 
00777 //______________________________________________________________________________
00778 Long64_t TWebFile::GetSize() const
00779 {
00780    // Return maximum file size.
00781 
00782    if (!fHasModRoot || fSize >= 0)
00783       return fSize;
00784 
00785    Long64_t size;
00786    char     asize[64];
00787 
00788    TString msg = "GET ";
00789    msg += fBasicUrl;
00790    msg += "?";
00791    msg += -1;
00792    msg += "\r\n";
00793 
00794    if (const_cast<TWebFile*>(this)->GetFromWeb(asize, 64, msg) == -1)
00795       return kMaxInt;
00796 
00797 #ifndef R__WIN32
00798    size = atoll(asize);
00799 #else
00800    size = _atoi64(asize);
00801 #endif
00802 
00803    fSize = size;
00804 
00805    return size;
00806 }
00807 
00808 //______________________________________________________________________________
00809 Int_t TWebFile::GetHead()
00810 {
00811    // Get the HTTP header. Depending on the return code we can see if
00812    // the file exists and if the server uses mod_root.
00813    // Returns -1 in case of an error, -2 in case the file does not exists,
00814    // -3 in case HEAD is not supported (dCache HTTP door) and
00815    // 0 in case of success.
00816 
00817    // Give full URL so Apache's virtual hosts solution works.
00818    if (fMsgGetHead == "") {
00819       fMsgGetHead = "HEAD ";
00820       fMsgGetHead += fBasicUrl;
00821       if (fHTTP11)
00822          fMsgGetHead += " HTTP/1.1";
00823       else
00824          fMsgGetHead += " HTTP/1.0";
00825       fMsgGetHead += "\r\n";
00826       if (fHTTP11) {
00827          fMsgGetHead += "Host: ";
00828          fMsgGetHead += fUrl.GetHost();
00829          fMsgGetHead += "\r\n";
00830       }
00831       fMsgGetHead += BasicAuthentication();
00832       fMsgGetHead += gUserAgent;
00833       fMsgGetHead += "\r\n\r\n";
00834    }
00835    TString msg = fMsgGetHead;
00836 
00837    TUrl connurl;
00838    if (fProxy.IsValid())
00839       connurl = fProxy;
00840    else
00841       connurl = fUrl;
00842 
00843    TSocket *s = 0;
00844    for (Int_t i = 0; i < 5; i++) {
00845       s = new TSocket(connurl.GetHost(), connurl.GetPort());
00846       if (!s->IsValid()) {
00847          delete s;
00848          if (gSystem->GetErrno() == EADDRINUSE || gSystem->GetErrno() == EISCONN) {
00849             s = 0;
00850             gSystem->Sleep(i*10);
00851          } else {
00852             Error("GetHead", "cannot connect to host %s (errno=%d)", fUrl.GetHost(),
00853                   gSystem->GetErrno());
00854             return -1;
00855          }
00856       } else
00857          break;
00858    }
00859    if (!s)
00860       return -1;
00861 
00862    if (s->SendRaw(msg.Data(), msg.Length()) == -1) {
00863       Error("GetHead", "error sending command to host %s", fUrl.GetHost());
00864       delete s;
00865       return -1;
00866    }
00867 
00868    char line[8192];
00869    Int_t n, ret = 0, redirect = 0;
00870 
00871    while ((n = GetLine(s, line, sizeof(line))) >= 0) {
00872       if (n == 0) {
00873          if (gDebug > 0)
00874             Info("GetHead", "got all headers");
00875          delete s;
00876          if (fBasicUrlOrg != "" && !redirect) {
00877             // set back to original url in case of temp redirect
00878             SetMsgReadBuffer10();
00879             fMsgGetHead = "";
00880          }
00881          if (ret < 0)
00882             return ret;
00883          if (redirect)
00884             return GetHead();
00885          return 0;
00886       }
00887 
00888       if (gDebug > 0)
00889          Info("GetHead", "header: %s", line);
00890 
00891       TString res = line;
00892       if (res.BeginsWith("HTTP/1.")) {
00893          if (res.BeginsWith("HTTP/1.1")) {
00894             if (!fHTTP11) {
00895                fMsgGetHead = "";
00896                fMsgReadBuffer10 = "";
00897             }
00898             fHTTP11 = kTRUE;
00899          }
00900          TString scode = res(9, 3);
00901          Int_t code = scode.Atoi();
00902          if (code >= 500) {
00903             if (code == 500)
00904                fHasModRoot = kTRUE;
00905             else {
00906                ret = -1;
00907                TString mess = res(13, 1000);
00908                Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
00909             }
00910          } else if (code >= 400) {
00911             if (code == 400)
00912                ret = -3;   // command not supported
00913             else if (code == 404)
00914                ret = -2;   // file does not exist
00915             else {
00916                ret = -1;
00917                TString mess = res(13, 1000);
00918                Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
00919             }
00920          } else if (code >= 300) {
00921             if (code == 301 || code == 303)
00922                redirect = 1;   // permanent redirect
00923             else if (code == 302 || code == 307)
00924                redirect = 2;   // temp redirect
00925             else {
00926                ret = -1;
00927                TString mess = res(13, 1000);
00928                Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
00929             }
00930          } else if (code > 200) {
00931             ret = -1;
00932             TString mess = res(13, 1000);
00933             Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
00934          }
00935       } else if (res.BeginsWith("Content-Length:")) {
00936          TString slen = res(16, 1000);
00937          fSize = slen.Atoll();
00938       } else if (res.BeginsWith("Location:") && redirect) {
00939          TString redir = res(10, 1000);
00940          if (redirect == 2)   // temp redirect
00941             SetMsgReadBuffer10(redir, kTRUE);
00942          else               // permanent redirect
00943             SetMsgReadBuffer10(redir, kFALSE);
00944          fMsgGetHead = "";
00945       }
00946    }
00947 
00948    delete s;
00949 
00950    return ret;
00951 }
00952 
00953 //______________________________________________________________________________
00954 Int_t TWebFile::GetLine(TSocket *s, char *line, Int_t maxsize)
00955 {
00956    // Read a line from the socket. Reads at most one less than the number of
00957    // characters specified by maxsize. Reading stops when a newline character
00958    // is found, The newline (\n) and cr (\r), if any, are removed.
00959    // Returns -1 in case of error, or the number of characters read (>= 0)
00960    // otherwise.
00961 
00962    Int_t n = GetHunk(s, line, maxsize);
00963    if (n < 0) {
00964       if (!fHTTP11 || gDebug > 0)
00965          Error("GetLine", "error receiving data from host %s", fUrl.GetHost());
00966       return -1;
00967    }
00968 
00969    if (n > 0 && line[n-1] == '\n') {
00970       n--;
00971       if (n > 0 && line[n-1] == '\r')
00972          n--;
00973       line[n] = '\0';
00974    }
00975    return n;
00976 }
00977 
00978 //______________________________________________________________________________
00979 Int_t TWebFile::GetHunk(TSocket *s, char *hunk, Int_t maxsize)
00980 {
00981    // Read a hunk of data from the socket, up until a terminator. The hunk is
00982    // limited by whatever the TERMINATOR callback chooses as its
00983    // terminator. For example, if terminator stops at newline, the hunk
00984    // will consist of a line of data; if terminator stops at two
00985    // newlines, it can be used to read the head of an HTTP response.
00986    // Upon determining the boundary, the function returns the data (up to
00987    // the terminator) in hunk.
00988    //
00989    // In case of read error, -1 is returned. In case of having read some
00990    // data, but encountering EOF before seeing the terminator, the data
00991    // that has been read is returned, but it will (obviously) not contain the
00992    // terminator.
00993    //
00994    // The TERMINATOR function is called with three arguments: the
00995    // beginning of the data read so far, the beginning of the current
00996    // block of peeked-at data, and the length of the current block.
00997    // Depending on its needs, the function is free to choose whether to
00998    // analyze all data or just the newly arrived data. If TERMINATOR
00999    // returns 0, it means that the terminator has not been seen.
01000    // Otherwise it should return a pointer to the character immediately
01001    // following the terminator.
01002    //
01003    // The idea is to be able to read a line of input, or otherwise a hunk
01004    // of text, such as the head of an HTTP request, without crossing the
01005    // boundary, so that the next call to RecvRaw() etc. reads the data
01006    // after the hunk. To achieve that, this function does the following:
01007    //
01008    // 1. Peek at incoming data.
01009    //
01010    // 2. Determine whether the peeked data, along with the previously
01011    //    read data, includes the terminator.
01012    //
01013    // 3a. If yes, read the data until the end of the terminator, and
01014    //     exit.
01015    //
01016    // 3b. If no, read the peeked data and goto 1.
01017    //
01018    // The function is careful to assume as little as possible about the
01019    // implementation of peeking.  For example, every peek is followed by
01020    // a read. If the read returns a different amount of data, the
01021    // process is retried until all data arrives safely.
01022    //
01023    // Reads at most one less than the number of characters specified by maxsize.
01024 
01025    if (maxsize <= 0) return 0;
01026 
01027    Int_t bufsize = maxsize;
01028    Int_t tail = 0;                 // tail position in HUNK
01029 
01030    while (1) {
01031       const char *end;
01032       Int_t pklen, rdlen, remain;
01033 
01034       // First, peek at the available data.
01035       pklen = s->RecvRaw(hunk+tail, bufsize-1-tail, kPeek);
01036       if (pklen < 0) {
01037          return -1;
01038       }
01039       end = HttpTerminator(hunk, hunk+tail, pklen);
01040       if (end) {
01041          // The data contains the terminator: we'll drain the data up
01042          // to the end of the terminator.
01043          remain = end - (hunk + tail);
01044          if (remain == 0) {
01045             // No more data needs to be read.
01046             hunk[tail] = '\0';
01047             return tail;
01048          }
01049          if (bufsize - 1 < tail + remain) {
01050             Error("GetHunk", "hunk buffer too small for data from host %s (%d bytes needed)",
01051                   fUrl.GetHost(), tail + remain + 1);
01052             hunk[tail] = '\0';
01053             return -1;
01054          }
01055       } else {
01056          // No terminator: simply read the data we know is (or should
01057          // be) available.
01058          remain = pklen;
01059       }
01060 
01061       // Now, read the data. Note that we make no assumptions about
01062       // how much data we'll get. (Some TCP stacks are notorious for
01063       // read returning less data than the previous MSG_PEEK.)
01064       rdlen = s->RecvRaw(hunk+tail, remain, kDontBlock);
01065       if (rdlen < 0) {
01066          return -1;
01067       }
01068       tail += rdlen;
01069       hunk[tail] = '\0';
01070 
01071       if (rdlen == 0) {
01072          if (tail == 0) {
01073             // EOF without anything having been read
01074             return tail;
01075          } else {
01076             // EOF seen: return the data we've read.
01077             return tail;
01078          }
01079       }
01080       if (end && rdlen == remain) {
01081          // The terminator was seen and the remaining data drained --
01082          // we got what we came for.
01083          return tail;
01084       }
01085 
01086       // Keep looping until all the data arrives.
01087 
01088       if (tail == bufsize - 1) {
01089          Error("GetHunk", "hunk buffer too small for data from host %s",
01090                fUrl.GetHost());
01091          return -1;
01092       }
01093    }
01094 }
01095 
01096 //______________________________________________________________________________
01097 const char *TWebFile::HttpTerminator(const char *start, const char *peeked,
01098                                      Int_t peeklen)
01099 {
01100    // Determine whether [START, PEEKED + PEEKLEN) contains an HTTP new
01101    // line [\r]\n. If so, return the pointer to the position after the line,
01102    // otherwise return 0. This is used as callback to GetHunk(). The data
01103    // between START and PEEKED has been read and cannot be "unread"; the
01104    // data after PEEKED has only been peeked.
01105 #if 0
01106    const char *p, *end;
01107 
01108    // Look for "[\r]\n", and return the following position if found.
01109    // Start one char before the current to cover the possibility that
01110    // part of the terminator (e.g. "\r") arrived in the previous batch.
01111    p = peeked - start < 1 ? start : peeked - 1;
01112    end = peeked + peeklen;
01113 
01114    // Check for \r\n anywhere in [p, end-2).
01115    for (; p < end - 1; p++)
01116       if (p[0] == '\r' && p[1] == '\n')
01117          return p + 2;
01118 
01119    // p==end-1: check for \r\n directly preceding END.
01120    if (p[0] == '\r' && p[1] == '\n')
01121       return p + 2;
01122 #else
01123    if (start) { }   // start unused, silence compiler
01124    const char *p = (const char*) memchr(peeked, '\n', peeklen);
01125    if (p)
01126       // p+1 because the line must include '\n'
01127       return p + 1;
01128 #endif
01129    return 0;
01130 }
01131 
01132 //______________________________________________________________________________
01133 TString TWebFile::BasicAuthentication()
01134 {
01135    // Return basic authentication scheme, to be added to the request.
01136 
01137    TString msg;
01138    if (strlen(fUrl.GetUser())) {
01139       TString auth = fUrl.GetUser();
01140       if (strlen(fUrl.GetPasswd())) {
01141          auth += ":";
01142          auth += fUrl.GetPasswd();
01143       }
01144       msg += "Authorization: Basic ";
01145       msg += TBase64::Encode(auth);
01146       msg += "\r\n";
01147    }
01148    return msg;
01149 }
01150 
01151 //______________________________________________________________________________
01152 void TWebFile::SetProxy(const char *proxy)
01153 {
01154    // Static method setting global proxy URL.
01155 
01156    if (proxy && *proxy) {
01157       TUrl p(proxy);
01158       if (strcmp(p.GetProtocol(), "http")) {
01159          :: Error("TWebFile::SetProxy", "protocol must be HTTP in proxy URL %s",
01160                   proxy);
01161          return;
01162       }
01163       fgProxy = p;
01164    }
01165 }
01166 
01167 //______________________________________________________________________________
01168 const char *TWebFile::GetProxy()
01169 {
01170    // Static method returning the global proxy URL.
01171 
01172    if (fgProxy.IsValid())
01173       return fgProxy.GetUrl();
01174    return "";
01175 }
01176 
01177 
01178 //______________________________________________________________________________
01179 TWebSystem::TWebSystem() : TSystem("-http", "HTTP Helper System")
01180 {
01181    // Create helper class that allows directory access via httpd.
01182    // The name must start with '-' to bypass the TSystem singleton check.
01183 
01184    SetName("http");
01185 
01186    fDirp = 0;
01187 }
01188 
01189 //______________________________________________________________________________
01190 Int_t TWebSystem::MakeDirectory(const char *)
01191 {
01192    // Make a directory via httpd. Not supported.
01193 
01194    return -1;
01195 }
01196 
01197 //______________________________________________________________________________
01198 void *TWebSystem::OpenDirectory(const char *)
01199 {
01200    // Open a directory via httpd. Returns an opaque pointer to a dir
01201    // structure. Returns 0 in case of error.
01202 
01203    if (fDirp) {
01204       Error("OpenDirectory", "invalid directory pointer (should never happen)");
01205       fDirp = 0;
01206    }
01207 
01208    fDirp = 0;   // not implemented for the time being
01209 
01210    return fDirp;
01211 }
01212 
01213 //______________________________________________________________________________
01214 void TWebSystem::FreeDirectory(void *dirp)
01215 {
01216    // Free directory via httpd.
01217 
01218    if (dirp != fDirp) {
01219       Error("FreeDirectory", "invalid directory pointer (should never happen)");
01220       return;
01221    }
01222 
01223    fDirp = 0;
01224 }
01225 
01226 //______________________________________________________________________________
01227 const char *TWebSystem::GetDirEntry(void *dirp)
01228 {
01229    // Get directory entry via httpd. Returns 0 in case no more entries.
01230 
01231    if (dirp != fDirp) {
01232       Error("GetDirEntry", "invalid directory pointer (should never happen)");
01233       return 0;
01234    }
01235 
01236    return 0;
01237 }
01238 
01239 //______________________________________________________________________________
01240 Int_t TWebSystem::GetPathInfo(const char *path, FileStat_t &buf)
01241 {
01242    // Get info about a file. Info is returned in the form of a FileStat_t
01243    // structure (see TSystem.h).
01244    // The function returns 0 in case of success and 1 if the file could
01245    // not be stat'ed.
01246 
01247    TWebFile *f = new TWebFile(path, "HEADONLY");
01248 
01249    if (f->fWritten == 0) {
01250 
01251       buf.fDev    = 0;
01252       buf.fIno    = 0;
01253       buf.fMode   = 0;
01254       buf.fUid    = 0;
01255       buf.fGid    = 0;
01256       buf.fSize   = f->GetSize();
01257       buf.fMtime  = 0;
01258       buf.fIsLink = kFALSE;
01259 
01260       delete f;
01261       return 0;
01262    }
01263 
01264    delete f;
01265    return 1;
01266 }
01267 
01268 //______________________________________________________________________________
01269 Bool_t TWebSystem::AccessPathName(const char *path, EAccessMode)
01270 {
01271    // Returns FALSE if one can access a file using the specified access mode.
01272    // Mode is the same as for the Unix access(2) function.
01273    // Attention, bizarre convention of return value!!
01274 
01275    TWebFile *f = new TWebFile(path, "HEADONLY");
01276    if (f->fWritten == 0) {
01277       delete f;
01278       return kFALSE;
01279    }
01280    delete f;
01281    return kTRUE;
01282 }
01283 
01284 //______________________________________________________________________________
01285 Int_t TWebSystem::Unlink(const char *)
01286 {
01287    // Unlink, i.e. remove, a file or directory. Returns 0 when succesfull,
01288    // -1 in case of failure. Not supported for httpd.
01289 
01290    return -1;
01291 }

Generated on Tue Jul 5 14:46:12 2011 for ROOT_528-00b_version by  doxygen 1.5.1