ROOT_528-00b_version: roofit/roofitcore/src/RooStreamParser.cxx Source File

00001 /*****************************************************************************
00002  * Project: RooFit                                                           *
00003  * Package: RooFitCore                                                       *
00004  * @(#)root/roofitcore:$Id: RooStreamParser.cxx 36209 2010-10-08 21:37:36Z wouter $
00005  * Authors:                                                                  *
00006  *   WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu       *
00007  *   DK, David Kirkby,    UC Irvine,         dkirkby@uci.edu                 *
00008  *                                                                           *
00009  * Copyright (c) 2000-2005, Regents of the University of California          *
00010  *                          and Stanford University. All rights reserved.    *
00011  *                                                                           *
00012  * Redistribution and use in source and binary forms,                        *
00013  * with or without modification, are permitted according to the terms        *
00014  * listed in LICENSE (http://roofit.sourceforge.net/license.txt)             *
00015  *****************************************************************************/
00016 
00017 //////////////////////////////////////////////////////////////////////////////
00018 //
00019 // RooStreamParser is a utility class to parse istreams into tokens and optionally
00020 // convert them into basic types (double,int,string)
00021 // 
00022 // The general tokenizing philosophy is that there are two kinds of tokens: value
00023 // and punctuation. The former are variable length, the latter always
00024 // one character. A token is terminated if one of the following conditions
00025 // occur
00026 //         - space character found (' ',tab,newline)
00027 //         - change of token type (value -> punctuation or vv)
00028 //         - end of fixed-length token (punctuation only)
00029 //         - start or end of quoted string
00030 //
00031 // The parser is aware of floating point notation and will assign leading
00032 // minus signs, decimal points etc to a value token when this is obvious
00033 // from the context. The definition of what is punctuation can be redefined.
00034 //
00035 
00036 
00037 #include "RooFit.h"
00038 
00039 #include "Riostream.h"
00040 #include "Riostream.h"
00041 #include <stdlib.h>
00042 #include <ctype.h>
00043 
00044 #ifndef _WIN32
00045 #include <strings.h>
00046 #endif
00047 
00048 #include "RooStreamParser.h"
00049 #include "RooMsgService.h"
00050 #include "RooNumber.h"
00051 
00052 
00053 ClassImp(RooStreamParser)
00054 
00055 
00056 //_____________________________________________________________________________
00057 RooStreamParser::RooStreamParser(istream& is) : 
00058   _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(""), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
00059 {
00060   // Construct parser on given input stream
00061 }
00062 
00063 
00064 //_____________________________________________________________________________
00065 RooStreamParser::RooStreamParser(istream& is, const TString& errorPrefix) : 
00066   _is(&is), _atEOL(kFALSE), _atEOF(kFALSE), _prefix(errorPrefix), _punct("()[]<>|/\\:?.,=+-&^%$#@!`~")
00067 {
00068   // Construct parser on given input stream. Use given errorPrefix to 
00069   // prefix any parsing error messages
00070 }
00071 
00072 
00073 
00074 //_____________________________________________________________________________
00075 RooStreamParser::~RooStreamParser()
00076 {
00077   // Destructor
00078 }
00079 
00080 
00081 
00082 //_____________________________________________________________________________
00083 Bool_t RooStreamParser::atEOL() 
00084 { 
00085   // If true, parser is at end of line in stream
00086 
00087   Int_t nc(_is->peek()) ; 
00088   return (nc=='\n'||nc==-1) ; 
00089 }
00090 
00091 
00092 
00093 //_____________________________________________________________________________
00094 void RooStreamParser::setPunctuation(const TString& punct) 
00095 {
00096   // Change list of characters interpreted as punctuation
00097 
00098   _punct = punct ;
00099 }
00100 
00101 
00102 
00103 //_____________________________________________________________________________
00104 Bool_t RooStreamParser::isPunctChar(char c) const 
00105 {
00106   // Check if given char is considered punctuation
00107 
00108   const char* punct = _punct.Data() ;
00109   for (int i=0 ; i<_punct.Length() ; i++)
00110     if (punct[i] == c) {
00111       return kTRUE ;
00112     }
00113   return kFALSE ;
00114 }
00115 
00116 
00117 
00118 //_____________________________________________________________________________
00119 TString RooStreamParser::readToken() 
00120 {
00121   // Read one token separated by any of the know punctuation characters
00122   // This function recognizes and handles comment lines in the istream (those
00123   // starting with '#', quoted strings ("") the content of which is not tokenized
00124   // and '+-.' characters that are part of a floating point numbers and are exempt
00125   // from being interpreted as a token separator in case '+-.' are defined as
00126   // token separators.
00127 
00128   // Smart tokenizer. Absorb white space and token must be either punctuation or alphanum
00129   Bool_t first(kTRUE), quotedString(kFALSE), lineCont(kFALSE) ;
00130   char buffer[10240], c(0), cnext, cprev=' ' ;
00131   Int_t bufptr(0) ;
00132 
00133   // Check for end of file 
00134    if (_is->eof() || _is->fail()) {
00135      _atEOF = kTRUE ;
00136      return TString("") ;
00137    }
00138 
00139   //Ignore leading newline
00140   if (_is->peek()=='\n') {
00141     _is->get(c) ;
00142 
00143     // If new line starts with #, zap it    
00144     while (_is->peek()=='#') {
00145       zapToEnd(kFALSE) ;
00146       _is->get(c) ; // absorb newline
00147     }
00148   }
00149 
00150   while(1) {
00151     // Buffer overflow protection
00152     if (bufptr>=10239) {
00153       oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: token length exceeds buffer capacity, terminating token early" << endl ;
00154       break ;
00155     }
00156 
00157     // Read next char
00158     _is->get(c) ;
00159     
00160 
00161     // Terminate at EOF, EOL or trouble
00162     if (_is->eof() || _is->fail() || c=='\n') break ;
00163 
00164     // Terminate as SPACE, unless we haven't seen any non-SPACE yet
00165     if (isspace(c)) {
00166       if (first) 
00167         continue ; 
00168       else 
00169         if (!quotedString) {
00170           break ;
00171         }
00172     }
00173 
00174     // If '-' or '/' see what the next character is
00175     if (c == '.' || c=='-' || c=='+' || c=='/' || c=='\\') {
00176       _is->get(cnext) ;
00177       _is->putback(cnext) ;
00178     }
00179 
00180     // Check for line continuation marker
00181     if (c=='\\' && cnext=='\\') {
00182       // Kill rest of line including endline marker
00183       zapToEnd(kFALSE) ;
00184       _is->get(c) ;
00185       lineCont=kTRUE ;
00186       break ;
00187     }
00188 
00189     // Stop if begin of comments is encountered
00190     if (c=='/' && cnext=='/') {
00191       zapToEnd(kFALSE) ;
00192       break ;
00193     }
00194 
00195     // Special handling of quoted strings
00196     if (c=='"') {
00197       if (first) {
00198         quotedString=kTRUE ;            
00199       } else if (!quotedString) {
00200         // Terminate current token. Next token will be quoted string
00201         _is->putback('"') ;
00202         break ;
00203       }
00204     }
00205 
00206     if (!quotedString) {
00207       // Decide if next char is punctuation (exempt - and . that are part of floating point numbers, or +/- preceeding INF)
00208       if (isPunctChar(c) && !(c=='.' && (isdigit(cnext)||isdigit(cprev))) 
00209           && !((c=='-'||c=='+') && (isdigit(cnext)||cnext=='.'||cnext=='i'||cnext=='I'))) {
00210         if (first) {
00211           // Make this a one-char punctuation token
00212           buffer[bufptr++]=c ;
00213           break ;
00214         } else {
00215           // Put back punct. char and terminate current alphanum token
00216           _is->putback(c) ;
00217           break ;
00218         } 
00219       }       
00220     } else {
00221       // Inside quoted string conventional tokenizing rules do not apply
00222 
00223       // Terminate token on closing quote
00224       if (c=='"' && !first) {
00225         buffer[bufptr++]=c ;    
00226         quotedString=kFALSE ;
00227         break ;
00228       }
00229     }
00230 
00231     // Store in buffer
00232     buffer[bufptr++]=c ;
00233     first=kFALSE ;
00234     cprev=c ;
00235   }
00236 
00237   if (_is->eof() || _is->bad()) {
00238     _atEOF = kTRUE ;
00239   }
00240 
00241   // Check if closing quote was encountered
00242   if (quotedString) {
00243     oocoutW((TObject*)0,InputArguments) << "RooStreamParser::readToken: closing quote (\") missing" << endl ;
00244   }
00245 
00246   // Absorb trailing white space or absorb rest of line if // is encountered
00247   if (c=='\n') {
00248     if (!lineCont) {
00249       _is->putback(c) ;
00250     }
00251   } else {
00252     c = _is->peek() ;
00253 
00254     while ((isspace(c) || c=='/') && c != '\n') {
00255       if (c=='/') {
00256         _is->get(c) ;
00257         if (_is->peek()=='/') {
00258           zapToEnd(kFALSE) ;    
00259         } else {
00260           _is->putback('/') ;
00261         }
00262         break ;
00263       } else {
00264         _is->get(c) ;
00265         c = _is->peek() ;
00266       }
00267     }
00268   }
00269 
00270   // If no token was read line is continued, return first token on next line
00271   if (bufptr==0 && lineCont) {
00272     return readToken() ;
00273   }
00274   
00275   // Zero terminate buffer and convert to TString
00276   buffer[bufptr]=0 ;
00277   return TString(buffer) ;
00278 }
00279 
00280 
00281 
00282 //_____________________________________________________________________________
00283 TString RooStreamParser::readLine() 
00284 {
00285   // Read an entire line from the stream and return as TString
00286   // This method recognizes the use of '\\' in the istream 
00287   // as line continuation token.
00288 
00289   char c,buffer[10240] ;
00290   Int_t nfree(10239) ; 
00291   
00292   if (_is->peek()=='\n') _is->get(c) ;
00293 
00294   // Read till end of line
00295   _is->getline(buffer,nfree,'\n') ;
00296 
00297   // Look for eventual continuation line sequence  
00298   char *pcontseq = strstr(buffer,"\\\\") ;
00299   if (pcontseq) nfree -= (pcontseq-buffer) ;
00300   while(pcontseq) {
00301     _is->getline(pcontseq,nfree,'\n') ;
00302 
00303     char* nextpcontseq = strstr(pcontseq,"\\\\") ;
00304     if (nextpcontseq) nfree -= (nextpcontseq-pcontseq) ;
00305     pcontseq = nextpcontseq ;
00306   }    
00307 
00308   // Chop eventual comments
00309   char *pcomment = strstr(buffer,"//") ;
00310   if (pcomment) *pcomment=0 ;
00311 
00312   // Chop leading and trailing space
00313   char *pstart=buffer ;
00314   while (isspace(*pstart)) {
00315     pstart++ ;
00316   }
00317   char *pend=buffer+strlen(buffer)-1 ;
00318   if (pend>pstart)
00319     while (isspace(*pend)) { *pend--=0 ; }
00320 
00321   if (_is->eof() || _is->fail()) {
00322     _atEOF = kTRUE ;
00323   }
00324 
00325   // Convert to TString
00326   return TString(pstart) ;
00327 }
00328 
00329 
00330 
00331 //_____________________________________________________________________________
00332 void RooStreamParser::zapToEnd(Bool_t inclContLines) 
00333 {
00334   // Eat all characters up to and including then end of the
00335   // current line. If inclContLines is kTRUE, all continuation lines
00336   // marked by the '\\' token are zapped as well
00337 
00338   // Skip over everything until the end of the current line
00339   if (_is->peek()!='\n') {
00340 
00341     char buffer[10240] ;
00342     Int_t nfree(10239) ; 
00343 
00344     // Read till end of line
00345     _is->getline(buffer,nfree,'\n') ;
00346 
00347     if (inclContLines) {
00348       // Look for eventual continuation line sequence  
00349       char *pcontseq = strstr(buffer,"\\\\") ;
00350       if (pcontseq) nfree -= (pcontseq-buffer) ;
00351       while(pcontseq) {
00352         _is->getline(pcontseq,nfree,'\n') ;
00353         
00354         char* nextpcontseq = strstr(pcontseq,"\\\\") ;
00355         if (nextpcontseq) nfree -= (nextpcontseq-pcontseq) ;
00356         pcontseq = nextpcontseq ;
00357       }    
00358     }
00359 
00360     // Put back newline character in stream buffer
00361    _is->putback('\n') ;
00362   }
00363 }
00364 
00365 
00366 
00367 //_____________________________________________________________________________
00368 Bool_t RooStreamParser::expectToken(const TString& expected, Bool_t zapOnError) 
00369 {
00370   // Read the next token and return kTRUE if it is identical to the given 'expected' token.
00371 
00372   TString token(readToken()) ;
00373 
00374   Bool_t error=token.CompareTo(expected) ;
00375   if (error && !_prefix.IsNull()) {
00376     oocoutW((TObject*)0,InputArguments) << _prefix << ": parse error, expected '" 
00377                                         << expected << "'" << ", got '" << token << "'" << endl ;
00378     if (zapOnError) zapToEnd(kTRUE) ;
00379   }
00380   return error ;
00381 }
00382 
00383 
00384 
00385 //_____________________________________________________________________________
00386 Bool_t RooStreamParser::readDouble(Double_t& value, Bool_t /*zapOnError*/) 
00387 {
00388   // Read the next token and convert it to a Double_t. Returns true
00389   // if an error occurred in reading or conversion
00390 
00391   TString token(readToken()) ;
00392   if (token.IsNull()) return kTRUE ;
00393   return convertToDouble(token,value) ;
00394   
00395 }
00396 
00397 
00398 
00399 //_____________________________________________________________________________
00400 Bool_t RooStreamParser::convertToDouble(const TString& token, Double_t& value) 
00401 {
00402   // Convert given string to a double. Return true if the conversion fails.
00403 
00404   char* endptr = 0;
00405   const char* data=token.Data() ;
00406 
00407   // Handle +/- infinity cases, (token is guaranteed to be >1 char long)
00408   if (!strcasecmp(data,"inf") || !strcasecmp(data+1,"inf")) {
00409     value = (data[0]=='-') ? -RooNumber::infinity() : RooNumber::infinity() ;
00410     return kFALSE ;
00411   }
00412 
00413   value = strtod(data,&endptr) ;
00414   Bool_t error = (endptr-data!=token.Length()) ;
00415 
00416   if (error && !_prefix.IsNull()) {
00417     oocoutE((TObject*)0,InputArguments) << _prefix << ": parse error, cannot convert '" 
00418                                         << token << "'" << " to double precision" <<  endl ;
00419   }
00420   return error ;
00421 }
00422 
00423 
00424 
00425 //_____________________________________________________________________________
00426 Bool_t RooStreamParser::readInteger(Int_t& value, Bool_t /*zapOnError*/) 
00427 {
00428   // Read a token and convert it to an Int_t. Returns true
00429   // if an error occurred in reading or conversion
00430 
00431   TString token(readToken()) ;
00432   if (token.IsNull()) return kTRUE ;
00433   return convertToInteger(token,value) ;
00434 }
00435 
00436 
00437 
00438 //_____________________________________________________________________________
00439 Bool_t RooStreamParser::convertToInteger(const TString& token, Int_t& value) 
00440 {
00441   // Convert given string to an Int_t. Returns true if an error
00442   // occurred in conversion
00443 
00444   char* endptr = 0;
00445   const char* data=token.Data() ;
00446   value = strtol(data,&endptr,10) ;
00447   Bool_t error = (endptr-data!=token.Length()) ;
00448 
00449   if (error && !_prefix.IsNull()) {
00450     oocoutE((TObject*)0,InputArguments)<< _prefix << ": parse error, cannot convert '" 
00451                                        << token << "'" << " to integer" <<  endl ;
00452   }
00453   return error ;
00454 }
00455 
00456 
00457 
00458 //_____________________________________________________________________________
00459 Bool_t RooStreamParser::readString(TString& value, Bool_t /*zapOnError*/) 
00460 {
00461   // Read a string token. Returns true if an error occurred in reading
00462   // or conversion.  If a the read token is enclosed in quotation
00463   // marks those are stripped in the returned value
00464 
00465   TString token(readToken()) ;
00466   if (token.IsNull()) return kTRUE ;
00467   return convertToString(token,value) ;
00468 }
00469 
00470 
00471 
00472 //_____________________________________________________________________________
00473 Bool_t RooStreamParser::convertToString(const TString& token, TString& string) 
00474 {
00475   // Convert given token to a string (i.e. remove eventual quotation marks)
00476 
00477   // Transport to buffer 
00478   char buffer[10240],*ptr ;
00479   strncpy(buffer,token.Data(),10239) ;
00480   if (token.Length()>=10239) {
00481     oocoutW((TObject*)0,InputArguments) << "RooStreamParser::convertToString: token length exceeds 1023, truncated" << endl ;
00482     buffer[10239]=0 ;
00483   }
00484   int len = strlen(buffer) ;
00485 
00486   // Remove trailing quote if any
00487   if ((len) && (buffer[len-1]=='"'))
00488     buffer[len-1]=0 ;
00489 
00490   // Skip leading quote, if present
00491   ptr=(buffer[0]=='"') ? buffer+1 : buffer ;
00492 
00493   string = ptr ;
00494   return kFALSE ;
00495 }