00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046 #include "compat.h"
00047
00048
00049
00050
00051 #include "sys.h"
00052 #include <string.h>
00053 #include <stdlib.h>
00054 #include "tokenizer.h"
00055
00056 typedef enum {
00057 kQuoteNone, kQuoteSingle, kQuoteDouble, kQuoteOne, kQuoteDoubleone
00058 } Quote_t;
00059
00060 #define IFS "\t \n"
00061
00062 #define TOK_KEEP 1
00063 #define TOK_EAT 2
00064
00065 #define WINCR 20
00066 #define AINCR 10
00067
00068 #define tok_malloc(a) malloc(a)
00069 #define tok_free(a) free(a)
00070 #define tok_realloc(a, b) realloc(a, b)
00071
00072
00073 struct ElTokenizer_t {
00074 char* fIfs;
00075 int fArgC;
00076 int fAMax;
00077 char** fArgV;
00078 char* fWPtr;
00079 char* fWMax;
00080 char* fWStart;
00081 char* fWSpace;
00082 Quote_t fQuote;
00083 int fFlags;
00084 };
00085
00086
00087 el_private void tok_finish_word(Tokenizer_t*);
00088
00089
00090
00091
00092
00093 el_private void
00094 tok_finish_word(Tokenizer_t* tok) {
00095 *tok->fWPtr = '\0';
00096
00097 if ((tok->fFlags & TOK_KEEP) || tok->fWPtr != tok->fWStart) {
00098 tok->fArgV[tok->fArgC++] = tok->fWStart;
00099 tok->fArgV[tok->fArgC] = NULL;
00100 tok->fWStart = ++tok->fWPtr;
00101 }
00102 tok->fFlags &= ~TOK_KEEP;
00103 }
00104
00105
00106
00107
00108
00109 el_public Tokenizer_t*
00110 tok_init(const char* ifs) {
00111 Tokenizer_t* tok = (Tokenizer_t*) tok_malloc(sizeof(Tokenizer_t));
00112
00113 tok->fIfs = strdup(ifs ? ifs : IFS);
00114 tok->fArgC = 0;
00115 tok->fAMax = AINCR;
00116 tok->fArgV = (char**) tok_malloc(sizeof(char*) * tok->fAMax);
00117
00118 if (tok->fArgV == NULL) {
00119 tok_free((ptr_t) tok);
00120 return NULL;
00121 }
00122 tok->fArgV[0] = NULL;
00123 tok->fWSpace = (char*) tok_malloc(WINCR);
00124
00125 if (tok->fWSpace == NULL) {
00126 tok_free((ptr_t) tok);
00127 return NULL;
00128 }
00129 tok->fWMax = tok->fWSpace + WINCR;
00130 tok->fWStart = tok->fWSpace;
00131 tok->fWPtr = tok->fWSpace;
00132 tok->fFlags = 0;
00133 tok->fQuote = kQuoteNone;
00134
00135 return tok;
00136 }
00137
00138
00139
00140
00141
00142 el_public void
00143 tok_reset(Tokenizer_t* tok) {
00144 tok->fArgC = 0;
00145 tok->fWStart = tok->fWSpace;
00146 tok->fWPtr = tok->fWSpace;
00147 tok->fFlags = 0;
00148 tok->fQuote = kQuoteNone;
00149 }
00150
00151
00152
00153
00154
00155 el_public void
00156 tok_end(Tokenizer_t* tok) {
00157 tok_free((ptr_t) tok->fIfs);
00158 tok_free((ptr_t) tok->fWSpace);
00159 tok_free((ptr_t) tok->fArgV);
00160 tok_free((ptr_t) tok);
00161 }
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173 el_public int
00174 tok_line(Tokenizer_t* tok, const char* line, int* argc, char*** argv) {
00175 const char* ptr;
00176
00177 for ( ; ;) {
00178 switch (*(ptr = line++)) {
00179 case '\'':
00180 tok->fFlags |= TOK_KEEP;
00181 tok->fFlags &= ~TOK_EAT;
00182
00183 switch (tok->fQuote) {
00184 case kQuoteNone:
00185 tok->fQuote = kQuoteSingle;
00186
00187 break;
00188
00189 case kQuoteSingle:
00190 tok->fQuote = kQuoteNone;
00191 break;
00192
00193 case kQuoteOne:
00194 tok->fQuote = kQuoteNone;
00195 *tok->fWPtr++ = *ptr;
00196 break;
00197
00198 case kQuoteDouble:
00199 *tok->fWPtr++ = *ptr;
00200 break;
00201
00202 case kQuoteDoubleone:
00203 tok->fQuote = kQuoteDouble;
00204 *tok->fWPtr++ = *ptr;
00205 break;
00206
00207 default:
00208 return -1;
00209 }
00210 break;
00211
00212 case '"':
00213 tok->fFlags &= ~TOK_EAT;
00214 tok->fFlags |= TOK_KEEP;
00215
00216 switch (tok->fQuote) {
00217 case kQuoteNone:
00218 tok->fQuote = kQuoteDouble;
00219 break;
00220
00221 case kQuoteDouble:
00222 tok->fQuote = kQuoteNone;
00223 break;
00224
00225 case kQuoteOne:
00226 tok->fQuote = kQuoteNone;
00227 *tok->fWPtr++ = *ptr;
00228 break;
00229
00230 case kQuoteSingle:
00231 *tok->fWPtr++ = *ptr;
00232 break;
00233
00234 case kQuoteDoubleone:
00235 tok->fQuote = kQuoteDouble;
00236 *tok->fWPtr++ = *ptr;
00237 break;
00238
00239 default:
00240 return -1;
00241 }
00242 break;
00243
00244 case '\\':
00245 tok->fFlags |= TOK_KEEP;
00246 tok->fFlags &= ~TOK_EAT;
00247
00248 switch (tok->fQuote) {
00249 case kQuoteNone:
00250 tok->fQuote = kQuoteOne;
00251 break;
00252
00253 case kQuoteDouble:
00254 tok->fQuote = kQuoteDoubleone;
00255 break;
00256
00257 case kQuoteOne:
00258 *tok->fWPtr++ = *ptr;
00259 tok->fQuote = kQuoteNone;
00260 break;
00261
00262 case kQuoteSingle:
00263 *tok->fWPtr++ = *ptr;
00264 break;
00265
00266 case kQuoteDoubleone:
00267 tok->fQuote = kQuoteDouble;
00268 *tok->fWPtr++ = *ptr;
00269 break;
00270
00271 default:
00272 return -1;
00273 }
00274 break;
00275
00276 case '\n':
00277 tok->fFlags &= ~TOK_EAT;
00278
00279 switch (tok->fQuote) {
00280 case kQuoteNone:
00281 tok_finish_word(tok);
00282 *argv = tok->fArgV;
00283 *argc = tok->fArgC;
00284 return 0;
00285
00286 case kQuoteSingle:
00287 case kQuoteDouble:
00288 *tok->fWPtr++ = *ptr;
00289 break;
00290
00291 case kQuoteDoubleone:
00292 tok->fFlags |= TOK_EAT;
00293 tok->fQuote = kQuoteDouble;
00294 break;
00295
00296 case kQuoteOne:
00297 tok->fFlags |= TOK_EAT;
00298 tok->fQuote = kQuoteNone;
00299 break;
00300
00301 default:
00302 return 0;
00303 }
00304 break;
00305
00306 case '\0':
00307
00308 switch (tok->fQuote) {
00309 case kQuoteNone:
00310
00311
00312 if (tok->fFlags & TOK_EAT) {
00313 tok->fFlags &= ~TOK_EAT;
00314 return 3;
00315 }
00316 tok_finish_word(tok);
00317 *argv = tok->fArgV;
00318 *argc = tok->fArgC;
00319 return 0;
00320
00321 case kQuoteSingle:
00322 return 1;
00323
00324 case kQuoteDouble:
00325 return 2;
00326
00327 case kQuoteDoubleone:
00328 tok->fQuote = kQuoteDouble;
00329 *tok->fWPtr++ = *ptr;
00330 break;
00331
00332 case kQuoteOne:
00333 tok->fQuote = kQuoteNone;
00334 *tok->fWPtr++ = *ptr;
00335 break;
00336
00337 default:
00338 return -1;
00339 }
00340 break;
00341
00342 default:
00343 tok->fFlags &= ~TOK_EAT;
00344
00345 switch (tok->fQuote) {
00346 case kQuoteNone:
00347
00348 if (strchr(tok->fIfs, *ptr) != NULL) {
00349 tok_finish_word(tok);
00350 } else {
00351 *tok->fWPtr++ = *ptr;
00352 }
00353 break;
00354
00355 case kQuoteSingle:
00356 case kQuoteDouble:
00357 *tok->fWPtr++ = *ptr;
00358 break;
00359
00360
00361 case kQuoteDoubleone:
00362 *tok->fWPtr++ = '\\';
00363 tok->fQuote = kQuoteDouble;
00364 *tok->fWPtr++ = *ptr;
00365 break;
00366
00367 case kQuoteOne:
00368 tok->fQuote = kQuoteNone;
00369 *tok->fWPtr++ = *ptr;
00370 break;
00371
00372 default:
00373 return -1;
00374
00375 }
00376 break;
00377 }
00378
00379 if (tok->fWPtr >= tok->fWMax - 4) {
00380 size_t size = tok->fWMax - tok->fWSpace + WINCR;
00381 char* s = (char*) tok_realloc(tok->fWSpace, size);
00382
00383 int offs = s - tok->fWSpace;
00384
00385 if (s == NULL) {
00386 return -1;
00387 }
00388
00389 if (offs != 0) {
00390 int i;
00391
00392 for (i = 0; i < tok->fArgC; i++) {
00393 tok->fArgV[i] = tok->fArgV[i] + offs;
00394 }
00395 tok->fWPtr = tok->fWPtr + offs;
00396 tok->fWStart = tok->fWStart + offs;
00397 tok->fWMax = s + size;
00398 tok->fWSpace = s;
00399 } else {
00400 tok_free((ptr_t) s);
00401 }
00402 }
00403
00404 if (tok->fArgC >= tok->fAMax - 4) {
00405 char** p;
00406 tok->fAMax += AINCR;
00407 p = (char**) tok_realloc(tok->fArgV,
00408 tok->fAMax * sizeof(char*));
00409
00410 if (p == NULL) {
00411 return -1;
00412 }
00413 tok->fArgV = p;
00414 }
00415 }
00416 return 0;
00417 }