TGHtmlParse.cxx

Go to the documentation of this file.
00001 // $Id: TGHtmlParse.cxx,v 1.1 2007/05/04 17:07:01 brun Exp $
00002 // Author:  Valeriy Onuchin   03/05/2007
00003 
00004 /*************************************************************************
00005  * Copyright (C) 1995-2001, Rene Brun, Fons Rademakers and Reiner Rohlfs *
00006  * All rights reserved.                                                  *
00007  *                                                                       *
00008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
00009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
00010  *************************************************************************/
00011 
00012 /**************************************************************************
00013 
00014     HTML widget for xclass. Based on tkhtml 1.28
00015     Copyright (C) 1997-2000 D. Richard Hipp <drh@acm.org>
00016     Copyright (C) 2002-2003 Hector Peraza.
00017 
00018     This library is free software; you can redistribute it and/or
00019     modify it under the terms of the GNU Library General Public
00020     License as published by the Free Software Foundation; either
00021     version 2 of the License, or (at your option) any later version.
00022 
00023     This library is distributed in the hope that it will be useful,
00024     but WITHOUT ANY WARRANTY; without even the implied warranty of
00025     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00026     Library General Public License for more details.
00027 
00028     You should have received a copy of the GNU Library General Public
00029     License along with this library; if not, write to the Free
00030     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00031 
00032 **************************************************************************/
00033 
00034 // A tokenizer that converts raw HTML into a linked list of HTML elements.
00035 
00036 #include <string.h>
00037 #include <stdlib.h>
00038 #include <stdio.h>
00039 #include <ctype.h>
00040 
00041 #include "TGHtml.h"
00042 #include "TGHtmlTokens.h"
00043 
00044 
00045 //----------------------------------------------------------------------
00046 
00047 extern SHtmlTokenMap_t HtmlMarkupMap[];
00048 
00049 
00050 /****************** Begin Escape Sequence Translator *************/
00051 
00052 // The next section of code implements routines used to translate
00053 // the '&' escape sequences of SGML to individual characters.
00054 // Examples:
00055 //
00056 //         &amp;          &
00057 //         &lt;           <
00058 //         &gt;           >
00059 //         &nbsp;         nonbreakable space
00060 //
00061 
00062 // Each escape sequence is recorded as an instance of the following
00063 // structure
00064 
00065 struct SgEsc_t {
00066    const char *fZName;      // The name of this escape sequence.  ex:  "amp"
00067    char  fValue[8];         // The value for this sequence.       ex:  "&"
00068    SgEsc_t *fPNext;         // Next sequence with the same hash on zName
00069 };
00070 
00071 // The following is a table of all escape sequences.  Add new sequences
00072 // by adding entries to this table.
00073 
00074 static struct SgEsc_t gEscSequences[] = {
00075    { "quot",      "\"",    0 },
00076    { "amp",       "&",     0 },
00077    { "lt",        "<",     0 },
00078    { "gt",        ">",     0 },
00079    { "nbsp",      " ",     0 },
00080    { "iexcl",     "\241",  0 },
00081    { "cent",      "\242",  0 },
00082    { "pound",     "\243",  0 },
00083    { "curren",    "\244",  0 },
00084    { "yen",       "\245",  0 },
00085    { "brvbar",    "\246",  0 },
00086    { "sect",      "\247",  0 },
00087    { "uml",       "\250",  0 },
00088    { "copy",      "\251",  0 },
00089    { "ordf",      "\252",  0 },
00090    { "laquo",     "\253",  0 },
00091    { "not",       "\254",  0 },
00092    { "shy",       "\255",  0 },
00093    { "reg",       "\256",  0 },
00094    { "macr",      "\257",  0 },
00095    { "deg",       "\260",  0 },
00096    { "plusmn",    "\261",  0 },
00097    { "sup2",      "\262",  0 },
00098    { "sup3",      "\263",  0 },
00099    { "acute",     "\264",  0 },
00100    { "micro",     "\265",  0 },
00101    { "para",      "\266",  0 },
00102    { "middot",    "\267",  0 },
00103    { "cedil",     "\270",  0 },
00104    { "sup1",      "\271",  0 },
00105    { "ordm",      "\272",  0 },
00106    { "raquo",     "\273",  0 },
00107    { "frac14",    "\274",  0 },
00108    { "frac12",    "\275",  0 },
00109    { "frac34",    "\276",  0 },
00110    { "iquest",    "\277",  0 },
00111    { "Agrave",    "\300",  0 },
00112    { "Aacute",    "\301",  0 },
00113    { "Acirc",     "\302",  0 },
00114    { "Atilde",    "\303",  0 },
00115    { "Auml",      "\304",  0 },
00116    { "Aring",     "\305",  0 },
00117    { "AElig",     "\306",  0 },
00118    { "Ccedil",    "\307",  0 },
00119    { "Egrave",    "\310",  0 },
00120    { "Eacute",    "\311",  0 },
00121    { "Ecirc",     "\312",  0 },
00122    { "Euml",      "\313",  0 },
00123    { "Igrave",    "\314",  0 },
00124    { "Iacute",    "\315",  0 },
00125    { "Icirc",     "\316",  0 },
00126    { "Iuml",      "\317",  0 },
00127    { "ETH",       "\320",  0 },
00128    { "Ntilde",    "\321",  0 },
00129    { "Ograve",    "\322",  0 },
00130    { "Oacute",    "\323",  0 },
00131    { "Ocirc",     "\324",  0 },
00132    { "Otilde",    "\325",  0 },
00133    { "Ouml",      "\326",  0 },
00134    { "times",     "\327",  0 },
00135    { "Oslash",    "\330",  0 },
00136    { "Ugrave",    "\331",  0 },
00137    { "Uacute",    "\332",  0 },
00138    { "Ucirc",     "\333",  0 },
00139    { "Uuml",      "\334",  0 },
00140    { "Yacute",    "\335",  0 },
00141    { "THORN",     "\336",  0 },
00142    { "szlig",     "\337",  0 },
00143    { "agrave",    "\340",  0 },
00144    { "aacute",    "\341",  0 },
00145    { "acirc",     "\342",  0 },
00146    { "atilde",    "\343",  0 },
00147    { "auml",      "\344",  0 },
00148    { "aring",     "\345",  0 },
00149    { "aelig",     "\346",  0 },
00150    { "ccedil",    "\347",  0 },
00151    { "egrave",    "\350",  0 },
00152    { "eacute",    "\351",  0 },
00153    { "ecirc",     "\352",  0 },
00154    { "euml",      "\353",  0 },
00155    { "igrave",    "\354",  0 },
00156    { "iacute",    "\355",  0 },
00157    { "icirc",     "\356",  0 },
00158    { "iuml",      "\357",  0 },
00159    { "eth",       "\360",  0 },
00160    { "ntilde",    "\361",  0 },
00161    { "ograve",    "\362",  0 },
00162    { "oacute",    "\363",  0 },
00163    { "ocirc",     "\364",  0 },
00164    { "otilde",    "\365",  0 },
00165    { "ouml",      "\366",  0 },
00166    { "divide",    "\367",  0 },
00167    { "oslash",    "\370",  0 },
00168    { "ugrave",    "\371",  0 },
00169    { "uacute",    "\372",  0 },
00170    { "ucirc",     "\373",  0 },
00171    { "uuml",      "\374",  0 },
00172    { "yacute",    "\375",  0 },
00173    { "thorn",     "\376",  0 },
00174    { "yuml",      "\377",  0 },
00175 };
00176 
00177 
00178 // The size of the handler hash table.  For best results this should
00179 // be a prime number which is about the same size as the number of
00180 // escape sequences known to the system.
00181 
00182 #define ESC_HASH_SIZE (sizeof(gEscSequences)/sizeof(gEscSequences[0])+7)
00183 
00184 
00185 // The hash table
00186 //
00187 // If the name of an escape sequence hashes to the value H, then
00188 // gApEscHash[H] will point to a linked list of Esc structures, one of
00189 // which will be the Esc structure for that escape sequence.
00190 
00191 static struct SgEsc_t *gApEscHash[ESC_HASH_SIZE];
00192 
00193 
00194 // Hash a escape sequence name. The value returned is an integer
00195 // between 0 and ESC_HASH_SIZE-1, inclusive.
00196 
00197 static int EscHash(const char *zName) {
00198    int h = 0;      // The hash value to be returned
00199    char c;         // The next character in the name being hashed
00200 
00201    while ((c = *zName) != 0) {
00202       h = h<<5 ^ h ^ c;
00203       zName++;
00204    }
00205    if (h < 0) h = -h;
00206 
00207    return h % ESC_HASH_SIZE;
00208 }
00209 
00210 #ifdef TEST
00211 // Compute the longest and average collision chain length for the
00212 // escape sequence hash table
00213 
00214 static void EscHashStats()
00215 {
00216    int i;
00217    int sum = 0;
00218    int max = 0;
00219    int cnt;
00220    int notempty = 0;
00221    struct SgEsc_t *p;
00222 
00223    for (i = 0; i < sizeof(gEscSequences) / sizeof(gEscSequences[0]); i++) {
00224       cnt = 0;
00225       p = gApEscHash[i];
00226       if (p) notempty++;
00227       while (p) {
00228          ++cnt;
00229          p = p->fPNext;
00230       }
00231       sum += cnt;
00232       if (cnt > max) max = cnt;
00233    }
00234    printf("Longest chain=%d  avg=%g  slots=%d  empty=%d (%g%%)\n",
00235           max, (double)sum/(double)notempty, i, i-notempty,
00236           100.0*(i-notempty)/(double)i);
00237 }
00238 #endif
00239 
00240 // Initialize the escape sequence hash table
00241 
00242 static void EscInit() {
00243    int i;  /* For looping thru the list of escape sequences */
00244    int h;  /* The hash on a sequence */
00245 
00246    for (i = 0; i < int(sizeof(gEscSequences) / sizeof(gEscSequences[i])); i++) {
00247 /* #ifdef XCLASS_UTF_MAX */
00248 #if 0
00249       int c = gEscSequences[i].value[0];
00250       xclass::UniCharToUtf(c, gEscSequences[i].value);
00251    }
00252 #endif
00253       h = EscHash(gEscSequences[i].fZName);
00254       gEscSequences[i].fPNext = gApEscHash[h];
00255       gApEscHash[h] = &gEscSequences[i];
00256    }
00257 #ifdef TEST
00258    EscHashStats();
00259 #endif
00260 }
00261 
00262 
00263 // This table translates the non-standard microsoft characters between 0x80
00264 // and 0x9f into plain ASCII so that the characters will be visible on Unix
00265 // systems. Care is taken to translate the characters into values less than
00266 // 0x80, to avoid UTF-8 problems.
00267 
00268 static char gAcMsChar[] = {
00269    /* 0x80 */ 'C',
00270    /* 0x81 */ ' ',
00271    /* 0x82 */ ',',
00272    /* 0x83 */ 'f',
00273    /* 0x84 */ '"',
00274    /* 0x85 */ '.',
00275    /* 0x86 */ '*',
00276    /* 0x87 */ '*',
00277    /* 0x88 */ '^',
00278     /* 0x89 */ '%',
00279    /* 0x8a */ 'S',
00280    /* 0x8b */ '<',
00281    /* 0x8c */ 'O',
00282    /* 0x8d */ ' ',
00283    /* 0x8e */ 'Z',
00284    /* 0x8f */ ' ',
00285    /* 0x90 */ ' ',
00286    /* 0x91 */ '\'',
00287    /* 0x92 */ '\'',
00288    /* 0x93 */ '"',
00289    /* 0x94 */ '"',
00290    /* 0x95 */ '*',
00291    /* 0x96 */ '-',
00292    /* 0x97 */ '-',
00293    /* 0x98 */ '~',
00294    /* 0x99 */ '@',
00295    /* 0x9a */ 's',
00296    /* 0x9b */ '>',
00297    /* 0x9c */ 'o',
00298    /* 0x9d */ ' ',
00299    /* 0x9e */ 'z',
00300    /* 0x9f */ 'Y',
00301 };
00302 
00303 
00304 //______________________________________________________________________________
00305 void HtmlTranslateEscapes(char *z)
00306 {
00307    // Translate escape sequences in the string "z".  "z" is overwritten
00308    // with the translated sequence.
00309    //
00310    // Unrecognized escape sequences are unaltered.
00311    //
00312    // Example:
00313    //
00314    //      input  = "AT&amp;T &gt MCI"
00315    //      output = "AT&T > MCI"
00316 
00317    int from;   // Read characters from this position in z[]
00318    int to;     // Write characters into this position in z[]
00319    int h;      // A hash on the escape sequence
00320    struct SgEsc_t *p;  // For looping down the escape sequence collision chain
00321    static int isInit = 0;   // True after initialization
00322 
00323    from = to = 0;
00324    if (!isInit) {
00325       EscInit();
00326       isInit = 1;
00327    }
00328    while (z[from]) {
00329       if (z[from] == '&') {
00330          if (z[from+1] == '#') {
00331             int i = from + 2;
00332             int v = 0;
00333             while (isdigit(z[i])) {
00334                v = v*10 + z[i] - '0';
00335                i++;
00336             }
00337             if (z[i] == ';') { i++; }
00338 
00339             // Translate the non-standard microsoft characters in the range of
00340             // 0x80 to 0x9f into something we can see.
00341 
00342             if (v >= 0x80 && v < 0xa0) {
00343                v = gAcMsChar[v & 0x1f];
00344             }
00345 
00346             // Put the character in the output stream in place of the "&#000;".
00347             // How we do this depends on whether or not we are using UTF-8.
00348 
00349             z[to++] = v;
00350             from = i;
00351          } else {
00352             int i = from+1;
00353             int c;
00354             while (z[i] && isalnum(z[i])) ++i;
00355             c = z[i];
00356             z[i] = 0;
00357             h = EscHash(&z[from+1]);
00358             p = gApEscHash[h];
00359             while (p && strcmp(p->fZName, &z[from+1]) != 0) p = p->fPNext;
00360             z[i] = c;
00361             if (p) {
00362                int j;
00363                for (j = 0; p->fValue[j]; ++j) z[to++] = p->fValue[j];
00364                from = i;
00365                if (c == ';') from++;
00366             } else {
00367                z[to++] = z[from++];
00368             }
00369          }
00370 
00371          // Look for the non-standard microsoft characters between 0x80 and 0x9f
00372          // and translate them into printable ASCII codes. Separate algorithms
00373          // are required to do this for plain ascii and for utf-8.
00374 
00375       } else if (((unsigned char) z[from]) >= 0x80 &&
00376                  ((unsigned char) z[from]) < 0xa0) {
00377          z[to++] = gAcMsChar[z[from++] & 0x1f];
00378       } else {
00379          z[to++] = z[from++];
00380       }
00381    }
00382    z[to] = 0;
00383 }
00384 
00385 /******************* End Escape Sequence Translator ***************/
00386 
00387 /******************* Begin HTML tokenizer code *******************/
00388 
00389 // The following variable becomes TRUE when the markup hash table
00390 // (stored in HtmlMarkupMap[]) is initialized.
00391 
00392 static int gIsInit = 0;
00393 
00394 // The hash table for HTML markup names.
00395 //
00396 // If an HTML markup name hashes to H, then gApMap[H] will point to
00397 // a linked list of sgMap structure, one of which will describe the
00398 // the particular markup (if it exists.)
00399 
00400 static SHtmlTokenMap_t *gApMap[HTML_MARKUP_HASH_SIZE];
00401 
00402 // Hash a markup name
00403 //
00404 // HTML markup is case insensitive, so this function will give the
00405 // same hash regardless of the case of the markup name.
00406 //
00407 // The value returned is an integer between 0 and HTML_MARKUP_HASH_SIZE-1,
00408 // inclusive.
00409 
00410 static int HtmlHash(const char *zName) {
00411   int h = 0;
00412   char c;
00413 
00414   while ((c = *zName) != 0) {
00415     if (isupper(c)) {   // do we have to check for this??????
00416       c = tolower(c);
00417     }
00418     h = h<<5 ^ h ^ c;
00419     zName++;
00420   }
00421   if (h < 0) {
00422     h = -h;
00423   }
00424 
00425   return h % HTML_MARKUP_HASH_SIZE;
00426 }
00427 
00428 
00429 #ifdef TEST
00430 // Compute the longest and average collision chain length for the
00431 // markup hash table
00432 
00433 static void HtmlHashStats() {
00434   int i;
00435   int sum = 0;
00436   int max = 0;
00437   int cnt;
00438   int notempty = 0;
00439   struct sgMap *p;
00440 
00441    for (i = 0; i < HTML_MARKUP_COUNT; i++) {
00442       cnt = 0;
00443       p = gApMap[i];
00444       if (p) notempty++;
00445       while (p) {
00446          cnt++;
00447          p = p->fPCollide;
00448       }
00449       sum += cnt;
00450       if (cnt > max) max = cnt;
00451   }
00452 
00453   printf("longest chain=%d  avg=%g  slots=%d  empty=%d (%g%%)\n",
00454          max, (double)sum/(double)notempty, i, i-notempty,
00455          100.0*(i-notempty)/(double)i);
00456 }
00457 #endif
00458 
00459 
00460 // Initialize the escape sequence hash table
00461 
00462 static void HtmlHashInit(void){
00463   int i;
00464   int h;  // The hash on a markup name
00465 
00466   for (i = 0; i < HTML_MARKUP_COUNT; i++) {
00467     h = HtmlHash(HtmlMarkupMap[i].fZName);
00468     HtmlMarkupMap[i].fPCollide = gApMap[h];
00469     gApMap[h] = &HtmlMarkupMap[i];
00470   }
00471 #ifdef TEST
00472   HtmlHashStats();
00473 #endif
00474 }
00475 
00476 //______________________________________________________________________________
00477 void TGHtml::AppendElement(TGHtmlElement *pElem)
00478 {
00479    // Append the given TGHtmlElement to the tokenizers list of elements
00480 
00481    pElem->fPNext = 0;
00482    pElem->fPPrev = fPLast;
00483    if (fPFirst == 0) {
00484       fPFirst = pElem;
00485    } else {
00486       fPLast->fPNext = pElem;
00487    }
00488    fPLast = pElem;
00489    fNToken++;
00490 }
00491 
00492 //______________________________________________________________________________
00493 void TGHtml::AppToken(TGHtmlElement *pNew, TGHtmlElement *p, int offs)
00494 {
00495    // Insert token pNew before token p
00496 
00497    if (offs < 0) {
00498       if (p) {
00499          offs = p->fOffs;
00500       } else {
00501          offs = fNText;
00502       }
00503    }
00504 
00505 ////if (p) { pNew->fStyle = p->fStyle; pNew->fFlags = p->fFlags; }
00506 
00507 //  pNew->fCount = 0;
00508    pNew->fOffs = offs;
00509    pNew->fPNext = p;
00510    if (p) {
00511       pNew->fElId = p->fElId;
00512       p->fElId = ++fIdind;
00513       pNew->fPPrev = p->fPPrev;
00514       if (p->fPPrev) p->fPPrev->fPNext = pNew;
00515       if (fPFirst == p) fPFirst = pNew;
00516       p->fPPrev = pNew;
00517    } else {
00518       pNew->fElId = ++fIdind;
00519       AppendElement(pNew);
00520    }
00521    fNToken++;
00522 }
00523 
00524 //______________________________________________________________________________
00525 static int NextColumn(int iCol, char c)
00526 {
00527    // Compute the new column index following the given character.
00528 
00529    switch (c) {
00530       case '\n': return 0;
00531       case '\t': return (iCol | 7) + 1;
00532       default:   return iCol+1;
00533    }
00534    /* NOT REACHED */
00535 }
00536 
00537 //______________________________________________________________________________
00538 void ToLower(char *z)
00539 {
00540    // Convert a string to all lower-case letters.
00541 
00542    while (*z) {
00543       if (isupper(*z)) *z = tolower(*z);
00544       z++;
00545    }
00546 }
00547 
00548 //______________________________________________________________________________
00549 int TGHtml::Tokenize()
00550 {
00551    // Process as much of the input HTML as possible. Construct new
00552    // TGHtmlElement objects and appended them to the list. Return
00553    // the number of characters actually processed.
00554    //
00555    // This routine may invoke a callback procedure which could delete
00556    // the HTML widget.
00557    //
00558    // This routine is not reentrant for the same HTML widget.  To
00559    // prevent reentrancy (during a callback), the p->fICol field is
00560    // set to a negative number. This is a flag to future invocations
00561    // not to reentry this routine. The p->fICol field is restored
00562    // before exiting, of course.
00563 
00564    char *z;             // The input HTML text
00565    int c;               // The next character of input
00566    int n;               // Number of characters processed so far
00567    int inpCol;          // Column of input
00568    int i, j;            // Loop counters
00569    int h;               // Result from HtmlHash()
00570    TGHtmlElement *pElem;// A new HTML element
00571    int selfClose;       // True for content free elements. Ex: <br/>
00572    int argc;            // The number of arguments on a markup
00573    SHtmlTokenMap_t *pMap; // For searching the markup name hash table
00574 # define mxARG 200      // Maximum number of parameters in a single markup
00575    char *argv[mxARG];   // Pointers to each markup argument.
00576    int arglen[mxARG];   // Length of each markup argument
00577    //int rl, ol;
00578    int pIsInScript = 0;
00579    int pIsInNoScript = 0;
00580    int pIsInNoFrames = 0;
00581    int sawdot = 0;
00582    int inLi = 0;
00583 
00584    static char null[1] = { "" };
00585 
00586    inpCol = fICol;
00587    n = fNComplete;
00588    z = fZText;
00589    if (inpCol < 0) return n;   // Prevents recursion
00590    fICol = -1;
00591    pElem = 0;
00592 
00593    while ((c = z[n]) != 0) {
00594 
00595       sawdot--;
00596       if (c == -64 && z[n+1] == -128) {
00597          n += 2;
00598          continue;
00599       }
00600 
00601       if (fPScript) {
00602 
00603          // We are in the middle of <SCRIPT>...</SCRIPT>.  Just look for
00604          // the </SCRIPT> markup.  (later:)  Treat <STYLE>...</STYLE> the
00605          // same way.
00606 
00607          TGHtmlScript *pScr = fPScript;
00608          const char *zEnd;
00609          int nEnd;
00610          //int curline, curch, curlast = n;
00611          int sqcnt;
00612          if (pScr->fType == Html_SCRIPT) {
00613             zEnd = "</script>";
00614             nEnd = 9;
00615          } else if (pScr->fType == Html_NOSCRIPT) {
00616             zEnd = "</noscript>";
00617             nEnd = 11;
00618          } else if (pScr->fType == Html_NOFRAMES) {
00619             zEnd = "</noframes>";
00620             nEnd = 11;
00621          } else {
00622             zEnd = "</style>";
00623             nEnd = 8;
00624          }
00625          if (pScr->fNStart < 0) {
00626             pScr->fNStart = n;
00627             pScr->fNScript = 0;
00628          }
00629          sqcnt = 0;
00630          for (i = n /*pScr->fNStart + pScr->fNScript*/; z[i]; i++) {
00631             if (z[i] == '\'' || z[i] == '"') {
00632                sqcnt++; // Skip if odd # quotes
00633             } else if (z[i] == '\n') {
00634                sqcnt = 0;
00635             }
00636             if (z[i] == '<' && z[i+1] == '/' &&
00637                strncasecmp(&z[i], zEnd, nEnd) == 0) {
00638                if (zEnd[3] == 'c' && ((sqcnt % 2) == 1)) continue;
00639                pScr->fNScript = i - n;
00640                fPScript = 0;
00641                n = i + nEnd;
00642                break;
00643             }
00644          }
00645          if (z[i] == 0) goto incomplete;
00646          if (fPScript) {
00647             pScr->fNScript = i - n;
00648             n = i;
00649          }
00650          else {
00651 #if 0
00652             // If there is a script, execute it now and insert any output
00653             // to the html stream for parsing as html. (ie. client side scripting)
00654 
00655             if (pIsInScript && !pIsInNoScript && !pIsInNoFrames) {
00656 
00657                //for (curch = 0, curline = 1; curch <= curlast; curch++)
00658                //  if (z[curch] == '\n') curline++;
00659 
00660                // arglist in pElem and text pointers in pScr?
00661                // Inline scripts can contain unmatched brackets :-)
00662                //char varind[50];
00663                //sprintf(varind, "HtmlScrVar%d", p->varind++);
00664                //char savech = fZText[pScr->fNStart + pScr->fNScript];
00665                //fZText[pScr->fNStart + pScr->fNScript] = 0;
00666                //char *scriptBody = StrDup(fZText[pScr->fNStart]);
00667                //fZText[pScr->fNStart + pScr->fNScript] = savech;
00668                AdvanceLayout(p);
00669                inParse++;
00670                char *result = ProcessScript((TGHtmlScript *) pElem);  // pElem or pScr??
00671                inParse--;
00672                if (result) {
00673                   ol = fNAlloc;
00674                   rl = strlen(result);
00675                   fNAlloc += rl;
00676                   z = fZText = HtmlRealloc(z, ol+rl);
00677                   memmove(z + n + rl, z+n, ol - n);
00678                   memmove(z + n, result, rl);
00679                }
00680             }
00681 #endif
00682             pIsInScript = 0;
00683             pIsInNoScript = 0;
00684             pIsInNoFrames = 0;
00685          }
00686          //continue;
00687 
00688       }
00689       else if (isspace((unsigned char)c)) {
00690 
00691          // White space
00692          for (i = 0;
00693              (c = z[n+i]) != 0 && isspace((unsigned char)c) && c != '\n' && c != '\r';
00694               i++) { }
00695          if (c == '\r' && z[n+i+1] == '\n') ++i;
00696 #if 0  // this is certainly NOT OK, since it alters pre-formatted text
00697          if (sawdot == 1) {
00698             pElem = new TGHtmlTextElement(2);
00699             strcpy(((TGHtmlTextElement *)pElem)->fZText, " ");
00700             pElem->fElId = ++fIdind;
00701             pElem->fOffs = n;
00702             pElem->fCount = 1;
00703             AppendElement(pElem);
00704          }
00705 #endif
00706          pElem = new TGHtmlSpaceElement;
00707          if (pElem == 0) goto incomplete;
00708          ((TGHtmlSpaceElement *)pElem)->fW = 0;
00709          pElem->fOffs = n;
00710          pElem->fElId = ++fIdind;
00711          if (c == '\n' || c == '\r') {
00712             pElem->fFlags = HTML_NewLine;
00713             pElem->fCount = 1;
00714             i++;
00715             inpCol = 0;
00716          } else {
00717             int iColStart = inpCol;
00718             pElem->fFlags = 0;
00719             for (j = 0; j < i; j++) {
00720                inpCol = NextColumn(inpCol, z[n+j]);
00721             }
00722             pElem->fCount = inpCol - iColStart;
00723          }
00724          AppendElement(pElem);
00725          n += i;
00726 
00727       }
00728       else if (c != '<' || fIPlaintext != 0 ||
00729               (!isalpha(z[n+1]) && z[n+1] != '/' && z[n+1] != '!' && z[n+1] != '?')) {
00730 
00731          // Ordinary text
00732          for (i = 1; (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '<'; i++) {}
00733          if (z[n+i-1] == '.' || z[n+i-1] == '!' || z[n+i-1] == '?') sawdot = 2;
00734          if (c == 0) goto incomplete;
00735          if (fIPlaintext != 0 && z[n] == '<') {
00736             switch (fIPlaintext) {
00737                case Html_LISTING:
00738                   if (i >= 10 && strncasecmp(&z[n], "</listing>", 10) == 0) {
00739                      fIPlaintext = 0;
00740                      goto doMarkup;
00741                   }
00742                   break;
00743 
00744                case Html_XMP:
00745                   if (i >= 6 && strncasecmp(&z[n], "</xmp>", 6) == 0) {
00746                      fIPlaintext = 0;
00747                      goto doMarkup;
00748                   }
00749                   break;
00750 
00751                case Html_TEXTAREA:
00752                   if (i >= 11 && strncasecmp(&z[n], "</textarea>", 11) == 0) {
00753                      fIPlaintext = 0;
00754                      goto doMarkup;
00755                   }
00756                   break;
00757 
00758                default:
00759                   break;
00760             }
00761          }
00762          pElem = new TGHtmlTextElement(i);
00763          if (pElem == 0) goto incomplete;
00764          TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
00765          tpElem->fElId = ++fIdind;
00766          tpElem->fOffs = n;
00767          strncpy(tpElem->fZText, &z[n], i);
00768          tpElem->fZText[i] = 0;
00769          AppendElement(pElem);
00770          if (fIPlaintext == 0 || fIPlaintext == Html_TEXTAREA) {
00771             HtmlTranslateEscapes(tpElem->fZText);
00772          }
00773          pElem->fCount = strlen(tpElem->fZText);
00774          n += i;
00775          inpCol += i;
00776 
00777       } else if (strncmp(&z[n], "<!--", 4) == 0) {
00778 
00779          // An HTML comment. Just skip it.
00780          for (i = 4; z[n+i]; i++) {
00781             if (z[n+i] == '-' && strncmp(&z[n+i], "-->", 3) == 0) break;
00782          }
00783          if (z[n+i] == 0) goto incomplete;
00784 
00785          pElem = new TGHtmlTextElement(i);
00786          if (pElem == 0) goto incomplete;
00787          TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
00788          tpElem->fType = Html_COMMENT;
00789          tpElem->fElId = ++fIdind;
00790          tpElem->fOffs = n;
00791          strncpy(tpElem->fZText, &z[n+4], i-4);
00792          tpElem->fZText[i-4] = 0;
00793          tpElem->fCount = 0;
00794          AppendElement(pElem);
00795 
00796          pElem = new TGHtmlElement(Html_EndCOMMENT);
00797          AppToken(pElem, 0, n+4);
00798 
00799          for (j = 0; j < i+3; j++) {
00800            inpCol = NextColumn(inpCol, z[n+j]);
00801          }
00802          n += i + 3;
00803 
00804       }
00805       else {
00806 
00807          // Markup.
00808          //
00809          // First get the name of the markup
00810 doMarkup:
00811          argc = 1;
00812          argv[0] = &z[n+1];
00813          for (i = 1;
00814              (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '>' && (i < 2 || c != '/');
00815              i++) {}
00816          arglen[0] = i - 1;
00817          if (c == 0) goto incomplete;
00818 
00819          // Now parse up the arguments
00820 
00821          while (isspace((unsigned char)z[n+i])) ++i;
00822          while ((c = z[n+i]) != 0 && c != '>' && (c != '/' || z[n+i+1] != '>')) {
00823             if (argc > mxARG - 3) argc = mxARG - 3;
00824             argv[argc] = &z[n+i];
00825             j = 0;
00826             while ((c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>' &&
00827                     c != '=' && (c != '/' || z[n+i+j+1] != '>')) ++j;
00828             arglen[argc] = j;
00829             if (c == 0) goto incomplete;
00830             i += j;
00831             while (isspace((unsigned char)c)) {
00832                i++;
00833                c = z[n+i];
00834             }
00835             if (c == 0) goto incomplete;
00836             argc++;
00837             if (c != '=') {
00838                argv[argc] = null;
00839                arglen[argc] = 0;
00840                argc++;
00841                continue;
00842             }
00843             i++;
00844             c = z[n+i];
00845             while (isspace((unsigned char)c)) {
00846                i++;
00847                c = z[n+i];
00848             }
00849             if (c == 0) goto incomplete;
00850             if (c == '\'' || c == '"') {
00851                int cQuote = c;
00852                i++;
00853                argv[argc] = &z[n+i];
00854                for (j = 0; (c = z[n+i+j]) != 0 && c != cQuote; j++) {}
00855                if (c == 0) goto incomplete;
00856                arglen[argc] = j;
00857                i += j+1;
00858             } else {
00859                argv[argc] = &z[n+i];
00860                for (j = 0; (c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>'; j++) {}
00861                if (c == 0) goto incomplete;
00862                arglen[argc] = j;
00863                i += j;
00864             }
00865             argc++;
00866             while (isspace(z[n+i])) ++i;
00867          }
00868          if (c == '/') {
00869             i++;
00870             c = z[n+i];
00871             selfClose = 1;
00872          } else {
00873             selfClose = 0;
00874          }
00875          if (c == 0) goto incomplete;
00876          for (j = 0; j < i+1; j++) {
00877             inpCol = NextColumn(inpCol, z[n+j]);
00878          }
00879          n += i + 1;
00880 
00881          // Lookup the markup name in the hash table
00882 
00883          if (!gIsInit) {
00884             HtmlHashInit();
00885             gIsInit = 1;
00886          }
00887          c = argv[0][arglen[0]];
00888          argv[0][arglen[0]] = 0;
00889          h = HtmlHash(argv[0]);
00890          for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
00891             if (strcasecmp(pMap->fZName, argv[0]) == 0) break;
00892          }
00893          argv[0][arglen[0]] = c;
00894          if (pMap == 0) continue;  // Ignore unknown markup
00895 
00896 makeMarkupEntry:
00897          // Construct a TGHtmlMarkupElement object for this markup.
00898 
00899          pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc, arglen, argv);
00900          if (pElem == 0) goto incomplete;
00901 
00902          pElem->fElId = ++fIdind;
00903          pElem->fOffs = n;
00904 
00905          AddFormInfo(pElem);
00906 
00907          // The new markup has now been constructed in pElem. But before
00908          // appending it to the list, check to see if there is a special
00909          // handler for this markup type.
00910 
00911          if (ProcessToken(pElem, pMap->fZName, pMap->fType)) {
00912             // delete pElem;
00913 
00914             // Tricky, tricky. The user function might have caused the p->fZText
00915             // pointer to change, so renew our copy of that pointer.
00916 
00917             z = fZText;
00918             if (z == 0) {
00919                n = 0;
00920                inpCol = 0;
00921                goto incomplete;
00922             }
00923             continue;
00924          }
00925 
00926          // No special handler for this markup. Just append it to the
00927          // list of all tokens.
00928 
00929          AppendElement(pElem);
00930          switch (pMap->fType) {
00931             case Html_TABLE:
00932                break;
00933 
00934             case Html_PLAINTEXT:
00935             case Html_LISTING:
00936             case Html_XMP:
00937             case Html_TEXTAREA:
00938                fIPlaintext = pMap->fType;
00939                break;
00940 
00941             case Html_NOFRAMES:
00942                if (!fHasFrames) break;
00943                pIsInNoFrames = 1;
00944             case Html_NOSCRIPT:
00945                break;
00946                if (!fHasScript) break;
00947                pIsInNoScript = 1;
00948             case Html_SCRIPT:
00949                pIsInScript = 1;
00950                // fallthrough
00951             case Html_STYLE:
00952                fPScript = (TGHtmlScript *) pElem;
00953                break;
00954 
00955             case Html_LI:
00956                if (!fAddEndTags) break;
00957                if (inLi) {
00958                   TGHtmlElement *e = new TGHtmlMarkupElement(Html_EndLI, 1, 0, 0);
00959                   AppToken(e, pElem, n);
00960                } else {
00961                   inLi = 1;
00962                }
00963                break;
00964 
00965             case Html_EndLI:
00966                inLi=0;
00967                break;
00968 
00969             case Html_EndOL:
00970             case Html_EndUL:
00971                if (!fAddEndTags) break;
00972                if (inLi) {
00973                   TGHtmlElement *e = new TGHtmlMarkupElement(Html_EndLI, 1, 0, 0);
00974                   AppToken(e, pElem, n);
00975                } else {
00976                   inLi = 0;
00977                }
00978                break;
00979 
00980             default:
00981                break;
00982          }
00983 
00984          // If this is self-closing markup (ex: <br/> or <img/>) then
00985          // synthesize a closing token.
00986 
00987          if (selfClose && argv[0][0] != '/' &&
00988              strcmp(&pMap[1].fZName[1], pMap->fZName) == 0) {
00989             selfClose = 0;
00990             pMap++;
00991             argc = 1;
00992             goto makeMarkupEntry;
00993          }
00994       }
00995    }
00996 
00997 incomplete:
00998    fICol = inpCol;
00999    ////fPScript = 0;
01000 
01001    return n;
01002 }
01003 
01004 /************************** End HTML Tokenizer Code ***************************/
01005 
01006 //______________________________________________________________________________
01007 TGHtmlMarkupElement *TGHtml::MakeMarkupEntry(int objType, int type, int argc,
01008                                              int arglen[], char *argv[])
01009 {
01010    // Make one markup entry.
01011 
01012    TGHtmlMarkupElement *e;
01013 
01014    switch (objType) {
01015     case O_HtmlCell:
01016       e = new TGHtmlCell(type, argc, arglen, argv);
01017       break;
01018 
01019     case O_HtmlTable:
01020       e = new TGHtmlTable(type, argc, arglen, argv);
01021       break;
01022 
01023     case O_HtmlRef:
01024       e = new TGHtmlRef(type, argc, arglen, argv);
01025       break;
01026 
01027     case O_HtmlLi:
01028       e = new TGHtmlLi(type, argc, arglen, argv);
01029       break;
01030 
01031     case O_HtmlListStart:
01032       e = new TGHtmlListStart(type, argc, arglen, argv);
01033       break;
01034 
01035     case O_HtmlImageMarkup:
01036       e = new TGHtmlImageMarkup(type, argc, arglen, argv);
01037       break;
01038 
01039     case O_HtmlInput:
01040       e = new TGHtmlInput(type, argc, arglen, argv);
01041       break;
01042 
01043     case O_HtmlForm:
01044       e = new TGHtmlForm(type, argc, arglen, argv);
01045       break;
01046 
01047     case O_HtmlHr:
01048       e = new TGHtmlHr(type, argc, arglen, argv);
01049       break;
01050 
01051     case O_HtmlAnchor:
01052       e = new TGHtmlAnchor(type, argc, arglen, argv);
01053       break;
01054 
01055     case O_HtmlScript:
01056       e = new TGHtmlScript(type, argc, arglen, argv);
01057       break;
01058 
01059     case O_HtmlMapArea:
01060       e = new TGHtmlMapArea(type, argc, arglen, argv);
01061       break;
01062 
01063     default:
01064       e = new TGHtmlMarkupElement(type, argc, arglen, argv);
01065       break;
01066    }
01067 
01068    return e;
01069 }
01070 
01071 //______________________________________________________________________________
01072 void TGHtml::TokenizerAppend(const char *text)
01073 {
01074    // Append text to the tokenizer engine.
01075 
01076    int len = strlen(text);
01077 
01078    if (fNText == 0) {
01079       fNAlloc = len + 100;
01080       fZText = new char [fNAlloc];
01081    } else if (fNText + len >= fNAlloc) {
01082       fNAlloc += len + 100;
01083       char *tmp = new char[fNAlloc];
01084       strcpy(tmp, fZText);
01085       delete[] fZText;
01086       fZText = tmp;
01087    }
01088 
01089    if (fZText == 0) {
01090       fNText = 0;
01091       UNTESTED;
01092       return;
01093    }
01094 
01095    strcpy(&fZText[fNText], text);
01096    fNText += len;
01097    fNComplete = Tokenize();
01098 }
01099 
01100 //______________________________________________________________________________
01101 TGHtmlElement *TGHtml::InsertToken(TGHtmlElement *pToken,
01102                                   char *zType, char *zArgs, int offs)
01103 {
01104    // This routine takes a text representation of a token, converts it into an
01105    // TGHtmlElement object and inserts it immediately prior to pToken. If pToken
01106    // is 0, then the newly created TGHtmlElement is appended.
01107    //
01108    // This routine does nothing to resize, restyle, relayout or redisplay
01109    // the HTML. That is the calling routines responsibility.
01110    //
01111    // Return the new TGHtmlElement object if successful. Return zero if
01112    // zType is not a known markup name.
01113    //
01114    //  pToken  - Insert before this. Append if pToken == 0
01115    //  zType   - Type of markup. Ex: "/a" or "table"
01116    //  zArgs   - List of arguments
01117    //  offs    - Calculate offset, and insert changed text into fZText!
01118 
01119    SHtmlTokenMap_t *pMap;     // For searching the markup name hash table
01120    int h;                   // The hash on zType
01121    TGHtmlElement *pElem;     // The new element
01122    //int nByte;               // How many bytes to allocate
01123    //int i;                   // Loop counter
01124 
01125    if (!gIsInit) {
01126       HtmlHashInit();
01127       gIsInit = 1;
01128    }
01129 
01130    if (strcmp(zType, "Text") == 0) {
01131       pElem = new TGHtmlTextElement(zArgs ? strlen(zArgs) : 0);
01132       if (pElem == 0) return 0;
01133       if (zArgs) {
01134          strcpy (((TGHtmlTextElement *)pElem)->fZText, zArgs);
01135          pElem->fCount = strlen(zArgs);
01136       }
01137    } else if (!strcmp(zType, "Space")) {
01138       pElem = new TGHtmlSpaceElement();
01139       if (pElem == 0) return 0;
01140    } else {
01141       h = HtmlHash(zType);
01142       for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
01143          if (strcasecmp(pMap->fZName, zType) == 0) break;
01144       }
01145       if (pMap == 0) return 0;
01146       if (zArgs == 0 || *zArgs == 0) {
01147          // Special case of no arguments. This is a lot easier...
01148          // well... now its the same thing!
01149          pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, 1, 0, 0);
01150          if (pElem == 0) return 0;
01151       } else {
01152          // The general case. There are arguments that need to be parsed
01153          // up. This is slower, but we gotta do it.
01154          //int  argc;
01155          //char **argv;
01156          //char *zBuf;
01157 
01158 #if 0
01159       if (!SplitList(zArgs, &argc, &argv)) return 0;
01160 
01161       // shall we insert a dummy argv[0]?
01162 
01163       pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc/*+1??*/, 0, argv);
01164       if (pElem == 0) return 1;
01165 
01166       while (--argc >= 0) if (argv[argc]) delete[] argv[argc];
01167       delete[] argv;
01168 #else
01169          return 0;
01170 #endif
01171       }
01172    }
01173 
01174    pElem->fElId = ++fIdind;
01175 
01176    AppToken(pElem, pToken, offs);
01177 
01178    return pElem;
01179 }
01180 
01181 //______________________________________________________________________________
01182 int TGHtml::TextInsertCmd(int /*argc*/, char ** /*argv*/)
01183 {
01184    // Insert text into text token, or break token into two text tokens.
01185    // Also, handle backspace char by deleting text.
01186    // Should also handle newline char by splitting text.
01187 
01188 #if 0
01189   TGHtmlElement *p, *pElem;
01190   int i, l, n = 0;
01191   int idx = 0;
01192   int ptyp = Html_Unknown;
01193   int istxt = 0;
01194   char *cp = 0, c, *cp2;
01195 
01196   if (GetIndex(argv[3], &p, &i) != 0) {
01197     // sprintf(tmp, "malformed index: \"%s\"", argv[3]);
01198     return 0;
01199   }
01200   if (p) {
01201     ptyp = p->fType;
01202     if ((istxt = (ptyp == Html_Text))) {
01203       l = p->fCount;
01204       cp = ((TGHtmlTextElement *)p)->fZText;
01205     }
01206   }
01207   if (argv[2][0] == 'b') {  // Break text token into two.
01208     if (!istxt) return 1;
01209     if (i == 0 || i == l) return 1;
01210     pElem = InsertToken(p->fPNext, "Text", cp + i, -1);
01211     cp[i] = 0;
01212     p->fCount = i;
01213     return 1;
01214   }
01215   c = argv[4][0];
01216   if (!c) return 1;
01217   if (c == '\b') {
01218     if ((!istxt) || (!l) || (!i)) {
01219       if (!p) return 1;
01220       if (p->fType == Html_BR)
01221         RemoveElements(p, p);
01222       return 1;
01223     }
01224     if (p && l == 1) {
01225       RemoveElements(p, p);
01226       return 1;
01227     }
01228     if (i == l)
01229       cp[p->fCount] = 0;
01230     else
01231       memcpy(cp+i-1, cp+i, l-i+1);
01232 
01233     cp[--p->fCount] = 0;
01234     if (ins.i-- <= 0) ins.i = 0;
01235     ins.p = p;
01236     return 1;
01237   }
01238   if (c == '\n' || c == '\r') {
01239   }
01240   if (istxt) {
01241     char *cp;
01242     int t, j, alen = strlen(argv[4]);
01243     n = alen + l;
01244 
01245     TGHtmlTextElement *text = (TGHtmlTextElement *) p;
01246 
01247     if (text->fZText == (char*) ((&text->fZText)+1)) {
01248       cp = new char[n+1];
01249       strcpy(cp, text->fZText);
01250     } else {
01251       cp = new char[n+1];
01252       strcpy(cp, text->fZText);
01253     }
01254     cp2 = new char[alen+1];
01255     memcpy(cp2, argv[4], alen+1);
01256     HtmlTranslateEscapes(cp2);
01257     alen = strlen(cp2);
01258     memmove(cp+alen+i, cp+i, l-i+1);
01259     for (j = 0; j < alen; j++) cp[i+j] = cp2[j];
01260     delete[] cp2;
01261     delete[] text->fZText;
01262     text->fZText = cp;
01263     p->fCount = strlen(cp);
01264     ins.p = p;
01265     ins.i = i+alen;
01266   } else {
01267     p = InsertToken(p ? p->fPNext : 0, "Text", argv[4], -1);
01268     AddStyle(p);
01269     i = 0;
01270     ins.p = p;
01271     ins.i = 1;
01272   }
01273   if (p) {
01274     idx = p->base.id;
01275     AddStrOffset(p, argv[4], i);
01276   }
01277 #endif
01278   return 1;
01279 }
01280 
01281 //______________________________________________________________________________
01282 SHtmlTokenMap_t *TGHtml::NameToPmap(char *zType)
01283 {
01284    // Returns token map matching zType name.
01285 
01286    SHtmlTokenMap_t *pMap;     // For searching the markup name hash table
01287    int h;                   // The hash on zType
01288 
01289    if (!gIsInit) {
01290       HtmlHashInit();
01291       gIsInit = 1;
01292    }
01293    h = HtmlHash(zType);
01294    for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
01295       if (strcasecmp(pMap->fZName, zType) == 0) break;
01296    }
01297 
01298    return pMap;
01299 }
01300 
01301 //______________________________________________________________________________
01302 int TGHtml::NameToType(char *zType)
01303 {
01304    // Convert a markup name into a type integer
01305 
01306    SHtmlTokenMap_t *pMap = NameToPmap(zType);
01307    return pMap ? pMap->fType : (int)Html_Unknown;
01308 }
01309 
01310 //______________________________________________________________________________
01311 const char *TGHtml::TypeToName(int type)
01312 {
01313    // Convert a type into a symbolic name
01314 
01315    if (type >= Html_A && type <= Html_EndXMP) {
01316       SHtmlTokenMap_t *pMap = gApMap[type - Html_A];
01317       return pMap->fZName;
01318    } else {
01319       return "???";
01320    }
01321 }
01322 
01323 //______________________________________________________________________________
01324 char *TGHtml::DumpToken(TGHtmlElement *p)
01325 {
01326    // For debugging purposes, print information about a token
01327 
01328 //#ifdef DEBUG
01329    static char zBuf[200];
01330    int j;
01331    const char *zName;
01332 
01333    if (p == 0) {
01334       sprintf(zBuf, "NULL");
01335       return zBuf;
01336    }
01337    switch (p->fType) {
01338       case Html_Text:
01339          sprintf(zBuf, "text: \"%.*s\"", p->fCount, ((TGHtmlTextElement *)p)->fZText);
01340          break;
01341 
01342       case Html_Space:
01343          if (p->fFlags & HTML_NewLine) {
01344             sprintf(zBuf, "space: \"\\n\"");
01345          } else {
01346             sprintf(zBuf, "space: \" \"");
01347          }
01348          break;
01349 
01350       case Html_Block: {
01351          TGHtmlBlock *block = (TGHtmlBlock *) p;
01352          if (block->fN > 0) {
01353             int n = block->fN;
01354             if (n > 150) n = 150;
01355                sprintf(zBuf, "<Block z=\"%.*s\">", n, block->fZ);
01356             } else {
01357                sprintf(zBuf, "<Block>");
01358             }
01359             break;
01360       }
01361 
01362       default:
01363          if (p->fType >= HtmlMarkupMap[0].fType
01364              && p->fType <= HtmlMarkupMap[HTML_MARKUP_COUNT-1].fType) {
01365             zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
01366          } else {
01367             zName = "Unknown";
01368          }
01369          sprintf(zBuf, "markup (%d) <%s", p->fType, zName);
01370          for (j = 1 ; j < p->fCount; j += 2) {
01371             sprintf(&zBuf[strlen(zBuf)], " %s=\"%s\"",
01372                     ((TGHtmlMarkupElement *)p)->fArgv[j-1],
01373                     ((TGHtmlMarkupElement *)p)->fArgv[j]);
01374          }
01375          strcat(zBuf, ">");
01376          break;
01377    }
01378    return zBuf;
01379 //#else
01380 //  return 0;
01381 //#endif
01382 }
01383 
01384 //______________________________________________________________________________
01385 void TGHtml::AppendArglist(TGString *str, TGHtmlMarkupElement *pElem)
01386 {
01387    // Append all the arguments of the given markup to the given TGString.
01388    //
01389    // Example:  If the markup is <IMG SRC=image.gif ALT="hello!">
01390    // then the following text is appended to the TGString:
01391    //
01392    //       "src image.gif alt hello!"
01393    //
01394    // Notice how all attribute names are converted to lower case.
01395    // This conversion happens in the parser.
01396 
01397    int i;
01398 
01399    for (i = 0; i + 1 < pElem->fCount; i += 2) {
01400       str->Append(pElem->fArgv[i]);
01401       str->Append("=");
01402       str->Append(pElem->fArgv[i+1]);
01403       str->Append(" ");
01404    }
01405 }
01406 
01407 //______________________________________________________________________________
01408 char *TGHtml::GetTokenName(TGHtmlElement *p)
01409 {
01410    // Returns token name of html element p.
01411 
01412    static char zBuf[200];
01413    //int j;
01414    const char *zName;
01415 
01416    zBuf[0] = 0;
01417    if (p == 0) {
01418       strcpy(zBuf, "NULL");
01419       return zBuf;
01420    }
01421    switch (p->fType) {
01422       case Html_Text:
01423       case Html_Space:
01424          break;
01425 
01426       case Html_Block:
01427          break;
01428 
01429       default:
01430          if (p->fType >= HtmlMarkupMap[0].fType &&
01431              p->fType <= HtmlMarkupMap[HTML_MARKUP_COUNT-1].fType) {
01432             zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
01433          } else {
01434             zName = "Unknown";
01435          }
01436          strlcpy(zBuf, zName, sizeof(zBuf));
01437          break;
01438    }
01439 
01440    return zBuf;
01441 }
01442 
01443 //______________________________________________________________________________
01444 SHtmlTokenMap_t* TGHtml::GetMarkupMap(int n)
01445 {
01446    // Returns token map at location n.
01447 
01448    return HtmlMarkupMap+n;
01449 }
01450 
01451 //______________________________________________________________________________
01452 TGString *TGHtml::ListTokens(TGHtmlElement *p, TGHtmlElement *pEnd)
01453 {
01454    // Return all tokens between the two elements as a string list.
01455 
01456    TGString *str;
01457    int i;
01458    const char *zName;
01459    char zLine[100];
01460 
01461    str = new TGString("");
01462    while (p && p != pEnd) {
01463       switch (p->fType) {
01464          case Html_Block:
01465             break;
01466 
01467          case Html_Text:
01468             str->Append("{ Text \"");
01469             str->Append(((TGHtmlTextElement *)p)->fZText);
01470             str->Append("\" } ");
01471             break;
01472 
01473          case Html_Space:
01474             snprintf(zLine, 100, "Space %d %d ",
01475                      p->fCount, (p->fFlags & HTML_NewLine) != 0);
01476             str->Append(zLine);
01477             break;
01478 
01479          case Html_Unknown:
01480             str->Append("Unknown ");
01481             break;
01482 
01483          default:
01484             str->Append("{ Markup ");
01485             if (p->fType >= HtmlMarkupMap[0].fType &&
01486                 p->fType <= HtmlMarkupMap[HTML_MARKUP_COUNT-1].fType) {
01487                zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
01488             } else {
01489                zName = "Unknown";
01490             }
01491             str->Append(zName);
01492             str->Append(" ");
01493             for (i = 0; i < p->fCount; ++i) {
01494                str->Append(((TGHtmlMarkupElement *)p)->fArgv[i]);
01495                str->Append(" ");
01496             }
01497             str->Append("} ");
01498             break;
01499       }
01500       p = p->fPNext;
01501    }
01502 
01503    return str;
01504 }
01505 
01506 //______________________________________________________________________________
01507 void TGHtml::PrintList(TGHtmlElement *first, TGHtmlElement *last)
01508 {
01509    // Print a list of tokens
01510 
01511    TGHtmlElement *p;
01512 
01513    for (p = first; p != last; p = p->fPNext) {
01514       if (p->fType == Html_Block) {
01515          TGHtmlBlock *block = (TGHtmlBlock *) p;
01516          const char *z = block->fZ;
01517          int n = block->fN;
01518          if (n == 0 || z == 0) {
01519             n = 1;
01520             z = "";
01521          }
01522          printf("Block flags=%02x cnt=%d x=%d..%d y=%d..%d z=\"%.*s\"\n",
01523                 p->fFlags, p->fCount, block->fLeft, block->fRight,
01524                 block->fTop, block->fBottom, n, z);
01525       } else {
01526          printf("Token font=%2d color=%2d align=%d flags=0x%04x name=%s\n",
01527                 p->fStyle.fFont, p->fStyle.fColor,
01528                 p->fStyle.fAlign, p->fStyle.fFlags, DumpToken(p));
01529       }
01530    }
01531 }

Generated on Tue Jul 5 14:22:19 2011 for ROOT_528-00b_version by  doxygen 1.5.1