tokenizer.cxx

Go to the documentation of this file.
00001 // @(#)root/editline:$Id: tokenizer.cxx 35219 2010-09-10 09:59:12Z axel $
00002 // Author: Mary-Louise Gill, 2009
00003 
00004 /*************************************************************************
00005  * Copyright (C) 1995-2009, Rene Brun and Fons Rademakers.               *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 /*      $NetBSD: ElTokenizer_t.c,v 1.7 2001/01/04 15:56:32 christos Exp $       */
00013 
00014 /*-
00015  * Copyright (c) 1992, 1993
00016  *      The Regents of the University of California.  All rights reserved.
00017  *
00018  * This code is derived from software contributed to Berkeley by
00019  * Christos Zoulas of Cornell University.
00020  *
00021  * Redistribution and use in source and binary forms, with or without
00022  * modification, are permitted provided that the following conditions
00023  * are met:
00024  * 1. Redistributions of source code must retain the above copyright
00025  *    notice, this list of conditions and the following disclaimer.
00026  * 2. Redistributions in binary form must reproduce the above copyright
00027  *    notice, this list of conditions and the following disclaimer in the
00028  *    documentation and/or other materials provided with the distribution.
00029  * 3. Neither the name of the University nor the names of its contributors
00030  *    may be used to endorse or promote products derived from this software
00031  *    without specific prior written permission.
00032  *
00033  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00034  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00035  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00036  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00037  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00038  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00039  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00040  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00041  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00042  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00043  * SUCH DAMAGE.
00044  */
00045 
00046 #include "compat.h"
00047 
00048 /*
00049  * tokenize.c: Bourne shell like ElTokenizer_t
00050  */
00051 #include "sys.h"
00052 #include <string.h>
00053 #include <stdlib.h>
00054 #include "tokenizer.h"
00055 
00056 typedef enum {
00057    kQuoteNone, kQuoteSingle, kQuoteDouble, kQuoteOne, kQuoteDoubleone
00058 } Quote_t;
00059 
00060 #define IFS "\t \n"
00061 
00062 #define TOK_KEEP 1
00063 #define TOK_EAT 2
00064 
00065 #define WINCR 20
00066 #define AINCR 10
00067 
00068 #define tok_malloc(a) malloc(a)
00069 #define tok_free(a) free(a)
00070 #define tok_realloc(a, b) realloc(a, b)
00071 
00072 
00073 struct ElTokenizer_t {
00074    char* fIfs;                   /* In field separator                   */
00075    int fArgC;                    /* Current number of args       */
00076    int fAMax;                    /* Maximum number of args       */
00077    char** fArgV;                 /* Argument list                        */
00078    char* fWPtr;                  /* Space on the word buffer     */
00079    char* fWMax;                  /* Limit on the word buffer     */
00080    char* fWStart;                /* Beginning of next word               */
00081    char* fWSpace;                /* Space of word buffer                         */
00082    Quote_t fQuote;               /* Quoting state                        */
00083    int fFlags;                   /* flags;                               */
00084 };
00085 
00086 
00087 el_private void tok_finish_word(Tokenizer_t*);
00088 
00089 
00090 /* tok_finish_word():
00091  *      Finish a word in the ElTokenizer_t.
00092  */
00093 el_private void
00094 tok_finish_word(Tokenizer_t* tok) {
00095    *tok->fWPtr = '\0';
00096 
00097    if ((tok->fFlags & TOK_KEEP) || tok->fWPtr != tok->fWStart) {
00098       tok->fArgV[tok->fArgC++] = tok->fWStart;
00099       tok->fArgV[tok->fArgC] = NULL;
00100       tok->fWStart = ++tok->fWPtr;
00101    }
00102    tok->fFlags &= ~TOK_KEEP;
00103 }
00104 
00105 
00106 /* tok_init():
00107  *      Initialize the ElTokenizer_t
00108  */
00109 el_public Tokenizer_t*
00110 tok_init(const char* ifs) {
00111    Tokenizer_t* tok = (Tokenizer_t*) tok_malloc(sizeof(Tokenizer_t));
00112 
00113    tok->fIfs = strdup(ifs ? ifs : IFS);
00114    tok->fArgC = 0;
00115    tok->fAMax = AINCR;
00116    tok->fArgV = (char**) tok_malloc(sizeof(char*) * tok->fAMax);
00117 
00118    if (tok->fArgV == NULL) {
00119       tok_free((ptr_t) tok);
00120       return NULL;
00121    }
00122    tok->fArgV[0] = NULL;
00123    tok->fWSpace = (char*) tok_malloc(WINCR);
00124 
00125    if (tok->fWSpace == NULL) {
00126       tok_free((ptr_t) tok);
00127       return NULL;
00128    }
00129    tok->fWMax = tok->fWSpace + WINCR;
00130    tok->fWStart = tok->fWSpace;
00131    tok->fWPtr = tok->fWSpace;
00132    tok->fFlags = 0;
00133    tok->fQuote = kQuoteNone;
00134 
00135    return tok;
00136 } // tok_init
00137 
00138 
00139 /* tok_reset():
00140  *      Reset the ElTokenizer_t
00141  */
00142 el_public void
00143 tok_reset(Tokenizer_t* tok) {
00144    tok->fArgC = 0;
00145    tok->fWStart = tok->fWSpace;
00146    tok->fWPtr = tok->fWSpace;
00147    tok->fFlags = 0;
00148    tok->fQuote = kQuoteNone;
00149 }
00150 
00151 
00152 /* tok_end():
00153  *      Clean up
00154  */
00155 el_public void
00156 tok_end(Tokenizer_t* tok) {
00157    tok_free((ptr_t) tok->fIfs);
00158    tok_free((ptr_t) tok->fWSpace);
00159    tok_free((ptr_t) tok->fArgV);
00160    tok_free((ptr_t) tok);
00161 }
00162 
00163 
00164 /* tok_line():
00165  *      Bourne shell like tokenizing
00166  *      Return:
00167  *              -1: Internal error
00168  *               3: Quoted return
00169  *               2: Unmatched double quote
00170  *               1: Unmatched single quote
00171  *               0: Ok
00172  */
00173 el_public int
00174 tok_line(Tokenizer_t* tok, const char* line, int* argc, char*** argv) {
00175    const char* ptr;
00176 
00177    for ( ; ;) {
00178       switch (*(ptr = line++)) {
00179       case '\'':
00180          tok->fFlags |= TOK_KEEP;
00181          tok->fFlags &= ~TOK_EAT;
00182 
00183          switch (tok->fQuote) {
00184          case kQuoteNone:
00185             tok->fQuote = kQuoteSingle;                      /* Enter single quote
00186                                                          * mode */
00187             break;
00188 
00189          case kQuoteSingle:                 /* Exit single quote mode */
00190             tok->fQuote = kQuoteNone;
00191             break;
00192 
00193          case kQuoteOne:                    /* Quote this ' */
00194             tok->fQuote = kQuoteNone;
00195             *tok->fWPtr++ = *ptr;
00196             break;
00197 
00198          case kQuoteDouble:                 /* Stay in double quote mode */
00199             *tok->fWPtr++ = *ptr;
00200             break;
00201 
00202          case kQuoteDoubleone:                      /* Quote this ' */
00203             tok->fQuote = kQuoteDouble;
00204             *tok->fWPtr++ = *ptr;
00205             break;
00206 
00207          default:
00208             return -1;
00209          } // switch
00210          break;
00211 
00212       case '"':
00213          tok->fFlags &= ~TOK_EAT;
00214          tok->fFlags |= TOK_KEEP;
00215 
00216          switch (tok->fQuote) {
00217          case kQuoteNone:                   /* Enter double quote mode */
00218             tok->fQuote = kQuoteDouble;
00219             break;
00220 
00221          case kQuoteDouble:                 /* Exit double quote mode */
00222             tok->fQuote = kQuoteNone;
00223             break;
00224 
00225          case kQuoteOne:                    /* Quote this " */
00226             tok->fQuote = kQuoteNone;
00227             *tok->fWPtr++ = *ptr;
00228             break;
00229 
00230          case kQuoteSingle:                 /* Stay in single quote mode */
00231             *tok->fWPtr++ = *ptr;
00232             break;
00233 
00234          case kQuoteDoubleone:                      /* Quote this " */
00235             tok->fQuote = kQuoteDouble;
00236             *tok->fWPtr++ = *ptr;
00237             break;
00238 
00239          default:
00240             return -1;
00241          } // switch
00242          break;
00243 
00244       case '\\':
00245          tok->fFlags |= TOK_KEEP;
00246          tok->fFlags &= ~TOK_EAT;
00247 
00248          switch (tok->fQuote) {
00249          case kQuoteNone:                   /* Quote next character */
00250             tok->fQuote = kQuoteOne;
00251             break;
00252 
00253          case kQuoteDouble:                 /* Quote next character */
00254             tok->fQuote = kQuoteDoubleone;
00255             break;
00256 
00257          case kQuoteOne:                    /* Quote this, restore state */
00258             *tok->fWPtr++ = *ptr;
00259             tok->fQuote = kQuoteNone;
00260             break;
00261 
00262          case kQuoteSingle:                 /* Stay in single quote mode */
00263             *tok->fWPtr++ = *ptr;
00264             break;
00265 
00266          case kQuoteDoubleone:                      /* Quote this \ */
00267             tok->fQuote = kQuoteDouble;
00268             *tok->fWPtr++ = *ptr;
00269             break;
00270 
00271          default:
00272             return -1;
00273          } // switch
00274          break;
00275 
00276       case '\n':
00277          tok->fFlags &= ~TOK_EAT;
00278 
00279          switch (tok->fQuote) {
00280          case kQuoteNone:
00281             tok_finish_word(tok);
00282             *argv = tok->fArgV;
00283             *argc = tok->fArgC;
00284             return 0;
00285 
00286          case kQuoteSingle:
00287          case kQuoteDouble:
00288             *tok->fWPtr++ = *ptr;                        /* Add the return */
00289             break;
00290 
00291          case kQuoteDoubleone:                  /* Back to double, eat the '\n' */
00292             tok->fFlags |= TOK_EAT;
00293             tok->fQuote = kQuoteDouble;
00294             break;
00295 
00296          case kQuoteOne:                    /* No quote, more eat the '\n' */
00297             tok->fFlags |= TOK_EAT;
00298             tok->fQuote = kQuoteNone;
00299             break;
00300 
00301          default:
00302             return 0;
00303          } // switch
00304          break;
00305 
00306       case '\0':
00307 
00308          switch (tok->fQuote) {
00309          case kQuoteNone:
00310 
00311             /* Finish word and return */
00312             if (tok->fFlags & TOK_EAT) {
00313                tok->fFlags &= ~TOK_EAT;
00314                return 3;
00315             }
00316             tok_finish_word(tok);
00317             *argv = tok->fArgV;
00318             *argc = tok->fArgC;
00319             return 0;
00320 
00321          case kQuoteSingle:
00322             return 1;
00323 
00324          case kQuoteDouble:
00325             return 2;
00326 
00327          case kQuoteDoubleone:
00328             tok->fQuote = kQuoteDouble;
00329             *tok->fWPtr++ = *ptr;
00330             break;
00331 
00332          case kQuoteOne:
00333             tok->fQuote = kQuoteNone;
00334             *tok->fWPtr++ = *ptr;
00335             break;
00336 
00337          default:
00338             return -1;
00339          } // switch
00340          break;
00341 
00342       default:
00343          tok->fFlags &= ~TOK_EAT;
00344 
00345          switch (tok->fQuote) {
00346          case kQuoteNone:
00347 
00348             if (strchr(tok->fIfs, *ptr) != NULL) {
00349                tok_finish_word(tok);
00350             } else {
00351                *tok->fWPtr++ = *ptr;
00352             }
00353             break;
00354 
00355          case kQuoteSingle:
00356          case kQuoteDouble:
00357             *tok->fWPtr++ = *ptr;
00358             break;
00359 
00360 
00361          case kQuoteDoubleone:
00362             *tok->fWPtr++ = '\\';
00363             tok->fQuote = kQuoteDouble;
00364             *tok->fWPtr++ = *ptr;
00365             break;
00366 
00367          case kQuoteOne:
00368             tok->fQuote = kQuoteNone;
00369             *tok->fWPtr++ = *ptr;
00370             break;
00371 
00372          default:
00373             return -1;
00374 
00375          } // switch
00376          break;
00377       } // switch
00378 
00379       if (tok->fWPtr >= tok->fWMax - 4) {
00380          size_t size = tok->fWMax - tok->fWSpace + WINCR;
00381          char* s = (char*) tok_realloc(tok->fWSpace, size);
00382          /* SUPPRESS 22 */
00383          int offs = s - tok->fWSpace;
00384 
00385          if (s == NULL) {
00386             return -1;
00387          }
00388 
00389          if (offs != 0) {
00390             int i;
00391 
00392             for (i = 0; i < tok->fArgC; i++) {
00393                tok->fArgV[i] = tok->fArgV[i] + offs;
00394             }
00395             tok->fWPtr = tok->fWPtr + offs;
00396             tok->fWStart = tok->fWStart + offs;
00397             tok->fWMax = s + size;
00398             tok->fWSpace = s;
00399          } else {
00400             tok_free((ptr_t) s);
00401          }
00402       }
00403 
00404       if (tok->fArgC >= tok->fAMax - 4) {
00405          char** p;
00406          tok->fAMax += AINCR;
00407          p = (char**) tok_realloc(tok->fArgV,
00408                                   tok->fAMax * sizeof(char*));
00409 
00410          if (p == NULL) {
00411             return -1;
00412          }
00413          tok->fArgV = p;
00414       }
00415    }
00416    return 0;      /* ??? added by stephan */
00417 } // tok_line

Generated on Tue Jul 5 14:11:39 2011 for ROOT_528-00b_version by  doxygen 1.5.1