00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #include <string.h>
00037 #include <stdlib.h>
00038 #include <stdio.h>
00039 #include <ctype.h>
00040
00041 #include "TGHtml.h"
00042 #include "TGHtmlTokens.h"
00043
00044
00045
00046
00047 extern SHtmlTokenMap_t HtmlMarkupMap[];
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065 struct SgEsc_t {
00066 const char *fZName;
00067 char fValue[8];
00068 SgEsc_t *fPNext;
00069 };
00070
00071
00072
00073
00074 static struct SgEsc_t gEscSequences[] = {
00075 { "quot", "\"", 0 },
00076 { "amp", "&", 0 },
00077 { "lt", "<", 0 },
00078 { "gt", ">", 0 },
00079 { "nbsp", " ", 0 },
00080 { "iexcl", "\241", 0 },
00081 { "cent", "\242", 0 },
00082 { "pound", "\243", 0 },
00083 { "curren", "\244", 0 },
00084 { "yen", "\245", 0 },
00085 { "brvbar", "\246", 0 },
00086 { "sect", "\247", 0 },
00087 { "uml", "\250", 0 },
00088 { "copy", "\251", 0 },
00089 { "ordf", "\252", 0 },
00090 { "laquo", "\253", 0 },
00091 { "not", "\254", 0 },
00092 { "shy", "\255", 0 },
00093 { "reg", "\256", 0 },
00094 { "macr", "\257", 0 },
00095 { "deg", "\260", 0 },
00096 { "plusmn", "\261", 0 },
00097 { "sup2", "\262", 0 },
00098 { "sup3", "\263", 0 },
00099 { "acute", "\264", 0 },
00100 { "micro", "\265", 0 },
00101 { "para", "\266", 0 },
00102 { "middot", "\267", 0 },
00103 { "cedil", "\270", 0 },
00104 { "sup1", "\271", 0 },
00105 { "ordm", "\272", 0 },
00106 { "raquo", "\273", 0 },
00107 { "frac14", "\274", 0 },
00108 { "frac12", "\275", 0 },
00109 { "frac34", "\276", 0 },
00110 { "iquest", "\277", 0 },
00111 { "Agrave", "\300", 0 },
00112 { "Aacute", "\301", 0 },
00113 { "Acirc", "\302", 0 },
00114 { "Atilde", "\303", 0 },
00115 { "Auml", "\304", 0 },
00116 { "Aring", "\305", 0 },
00117 { "AElig", "\306", 0 },
00118 { "Ccedil", "\307", 0 },
00119 { "Egrave", "\310", 0 },
00120 { "Eacute", "\311", 0 },
00121 { "Ecirc", "\312", 0 },
00122 { "Euml", "\313", 0 },
00123 { "Igrave", "\314", 0 },
00124 { "Iacute", "\315", 0 },
00125 { "Icirc", "\316", 0 },
00126 { "Iuml", "\317", 0 },
00127 { "ETH", "\320", 0 },
00128 { "Ntilde", "\321", 0 },
00129 { "Ograve", "\322", 0 },
00130 { "Oacute", "\323", 0 },
00131 { "Ocirc", "\324", 0 },
00132 { "Otilde", "\325", 0 },
00133 { "Ouml", "\326", 0 },
00134 { "times", "\327", 0 },
00135 { "Oslash", "\330", 0 },
00136 { "Ugrave", "\331", 0 },
00137 { "Uacute", "\332", 0 },
00138 { "Ucirc", "\333", 0 },
00139 { "Uuml", "\334", 0 },
00140 { "Yacute", "\335", 0 },
00141 { "THORN", "\336", 0 },
00142 { "szlig", "\337", 0 },
00143 { "agrave", "\340", 0 },
00144 { "aacute", "\341", 0 },
00145 { "acirc", "\342", 0 },
00146 { "atilde", "\343", 0 },
00147 { "auml", "\344", 0 },
00148 { "aring", "\345", 0 },
00149 { "aelig", "\346", 0 },
00150 { "ccedil", "\347", 0 },
00151 { "egrave", "\350", 0 },
00152 { "eacute", "\351", 0 },
00153 { "ecirc", "\352", 0 },
00154 { "euml", "\353", 0 },
00155 { "igrave", "\354", 0 },
00156 { "iacute", "\355", 0 },
00157 { "icirc", "\356", 0 },
00158 { "iuml", "\357", 0 },
00159 { "eth", "\360", 0 },
00160 { "ntilde", "\361", 0 },
00161 { "ograve", "\362", 0 },
00162 { "oacute", "\363", 0 },
00163 { "ocirc", "\364", 0 },
00164 { "otilde", "\365", 0 },
00165 { "ouml", "\366", 0 },
00166 { "divide", "\367", 0 },
00167 { "oslash", "\370", 0 },
00168 { "ugrave", "\371", 0 },
00169 { "uacute", "\372", 0 },
00170 { "ucirc", "\373", 0 },
00171 { "uuml", "\374", 0 },
00172 { "yacute", "\375", 0 },
00173 { "thorn", "\376", 0 },
00174 { "yuml", "\377", 0 },
00175 };
00176
00177
00178
00179
00180
00181
00182 #define ESC_HASH_SIZE (sizeof(gEscSequences)/sizeof(gEscSequences[0])+7)
00183
00184
00185
00186
00187
00188
00189
00190
00191 static struct SgEsc_t *gApEscHash[ESC_HASH_SIZE];
00192
00193
00194
00195
00196
00197 static int EscHash(const char *zName) {
00198 int h = 0;
00199 char c;
00200
00201 while ((c = *zName) != 0) {
00202 h = h<<5 ^ h ^ c;
00203 zName++;
00204 }
00205 if (h < 0) h = -h;
00206
00207 return h % ESC_HASH_SIZE;
00208 }
00209
00210 #ifdef TEST
00211
00212
00213
00214 static void EscHashStats()
00215 {
00216 int i;
00217 int sum = 0;
00218 int max = 0;
00219 int cnt;
00220 int notempty = 0;
00221 struct SgEsc_t *p;
00222
00223 for (i = 0; i < sizeof(gEscSequences) / sizeof(gEscSequences[0]); i++) {
00224 cnt = 0;
00225 p = gApEscHash[i];
00226 if (p) notempty++;
00227 while (p) {
00228 ++cnt;
00229 p = p->fPNext;
00230 }
00231 sum += cnt;
00232 if (cnt > max) max = cnt;
00233 }
00234 printf("Longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
00235 max, (double)sum/(double)notempty, i, i-notempty,
00236 100.0*(i-notempty)/(double)i);
00237 }
00238 #endif
00239
00240
00241
00242 static void EscInit() {
00243 int i;
00244 int h;
00245
00246 for (i = 0; i < int(sizeof(gEscSequences) / sizeof(gEscSequences[i])); i++) {
00247
00248 #if 0
00249 int c = gEscSequences[i].value[0];
00250 xclass::UniCharToUtf(c, gEscSequences[i].value);
00251 }
00252 #endif
00253 h = EscHash(gEscSequences[i].fZName);
00254 gEscSequences[i].fPNext = gApEscHash[h];
00255 gApEscHash[h] = &gEscSequences[i];
00256 }
00257 #ifdef TEST
00258 EscHashStats();
00259 #endif
00260 }
00261
00262
00263
00264
00265
00266
00267
00268 static char gAcMsChar[] = {
00269 'C',
00270 ' ',
00271 ',',
00272 'f',
00273 '"',
00274 '.',
00275 '*',
00276 '*',
00277 '^',
00278 '%',
00279 'S',
00280 '<',
00281 'O',
00282 ' ',
00283 'Z',
00284 ' ',
00285 ' ',
00286 '\'',
00287 '\'',
00288 '"',
00289 '"',
00290 '*',
00291 '-',
00292 '-',
00293 '~',
00294 '@',
00295 's',
00296 '>',
00297 'o',
00298 ' ',
00299 'z',
00300 'Y',
00301 };
00302
00303
00304
00305 void HtmlTranslateEscapes(char *z)
00306 {
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317 int from;
00318 int to;
00319 int h;
00320 struct SgEsc_t *p;
00321 static int isInit = 0;
00322
00323 from = to = 0;
00324 if (!isInit) {
00325 EscInit();
00326 isInit = 1;
00327 }
00328 while (z[from]) {
00329 if (z[from] == '&') {
00330 if (z[from+1] == '#') {
00331 int i = from + 2;
00332 int v = 0;
00333 while (isdigit(z[i])) {
00334 v = v*10 + z[i] - '0';
00335 i++;
00336 }
00337 if (z[i] == ';') { i++; }
00338
00339
00340
00341
00342 if (v >= 0x80 && v < 0xa0) {
00343 v = gAcMsChar[v & 0x1f];
00344 }
00345
00346
00347
00348
00349 z[to++] = v;
00350 from = i;
00351 } else {
00352 int i = from+1;
00353 int c;
00354 while (z[i] && isalnum(z[i])) ++i;
00355 c = z[i];
00356 z[i] = 0;
00357 h = EscHash(&z[from+1]);
00358 p = gApEscHash[h];
00359 while (p && strcmp(p->fZName, &z[from+1]) != 0) p = p->fPNext;
00360 z[i] = c;
00361 if (p) {
00362 int j;
00363 for (j = 0; p->fValue[j]; ++j) z[to++] = p->fValue[j];
00364 from = i;
00365 if (c == ';') from++;
00366 } else {
00367 z[to++] = z[from++];
00368 }
00369 }
00370
00371
00372
00373
00374
00375 } else if (((unsigned char) z[from]) >= 0x80 &&
00376 ((unsigned char) z[from]) < 0xa0) {
00377 z[to++] = gAcMsChar[z[from++] & 0x1f];
00378 } else {
00379 z[to++] = z[from++];
00380 }
00381 }
00382 z[to] = 0;
00383 }
00384
00385
00386
00387
00388
00389
00390
00391
00392 static int gIsInit = 0;
00393
00394
00395
00396
00397
00398
00399
00400 static SHtmlTokenMap_t *gApMap[HTML_MARKUP_HASH_SIZE];
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410 static int HtmlHash(const char *zName) {
00411 int h = 0;
00412 char c;
00413
00414 while ((c = *zName) != 0) {
00415 if (isupper(c)) {
00416 c = tolower(c);
00417 }
00418 h = h<<5 ^ h ^ c;
00419 zName++;
00420 }
00421 if (h < 0) {
00422 h = -h;
00423 }
00424
00425 return h % HTML_MARKUP_HASH_SIZE;
00426 }
00427
00428
00429 #ifdef TEST
00430
00431
00432
00433 static void HtmlHashStats() {
00434 int i;
00435 int sum = 0;
00436 int max = 0;
00437 int cnt;
00438 int notempty = 0;
00439 struct sgMap *p;
00440
00441 for (i = 0; i < HTML_MARKUP_COUNT; i++) {
00442 cnt = 0;
00443 p = gApMap[i];
00444 if (p) notempty++;
00445 while (p) {
00446 cnt++;
00447 p = p->fPCollide;
00448 }
00449 sum += cnt;
00450 if (cnt > max) max = cnt;
00451 }
00452
00453 printf("longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
00454 max, (double)sum/(double)notempty, i, i-notempty,
00455 100.0*(i-notempty)/(double)i);
00456 }
00457 #endif
00458
00459
00460
00461
00462 static void HtmlHashInit(void){
00463 int i;
00464 int h;
00465
00466 for (i = 0; i < HTML_MARKUP_COUNT; i++) {
00467 h = HtmlHash(HtmlMarkupMap[i].fZName);
00468 HtmlMarkupMap[i].fPCollide = gApMap[h];
00469 gApMap[h] = &HtmlMarkupMap[i];
00470 }
00471 #ifdef TEST
00472 HtmlHashStats();
00473 #endif
00474 }
00475
00476
00477 void TGHtml::AppendElement(TGHtmlElement *pElem)
00478 {
00479
00480
00481 pElem->fPNext = 0;
00482 pElem->fPPrev = fPLast;
00483 if (fPFirst == 0) {
00484 fPFirst = pElem;
00485 } else {
00486 fPLast->fPNext = pElem;
00487 }
00488 fPLast = pElem;
00489 fNToken++;
00490 }
00491
00492
00493 void TGHtml::AppToken(TGHtmlElement *pNew, TGHtmlElement *p, int offs)
00494 {
00495
00496
00497 if (offs < 0) {
00498 if (p) {
00499 offs = p->fOffs;
00500 } else {
00501 offs = fNText;
00502 }
00503 }
00504
00505
00506
00507
00508 pNew->fOffs = offs;
00509 pNew->fPNext = p;
00510 if (p) {
00511 pNew->fElId = p->fElId;
00512 p->fElId = ++fIdind;
00513 pNew->fPPrev = p->fPPrev;
00514 if (p->fPPrev) p->fPPrev->fPNext = pNew;
00515 if (fPFirst == p) fPFirst = pNew;
00516 p->fPPrev = pNew;
00517 } else {
00518 pNew->fElId = ++fIdind;
00519 AppendElement(pNew);
00520 }
00521 fNToken++;
00522 }
00523
00524
00525 static int NextColumn(int iCol, char c)
00526 {
00527
00528
00529 switch (c) {
00530 case '\n': return 0;
00531 case '\t': return (iCol | 7) + 1;
00532 default: return iCol+1;
00533 }
00534
00535 }
00536
00537
00538 void ToLower(char *z)
00539 {
00540
00541
00542 while (*z) {
00543 if (isupper(*z)) *z = tolower(*z);
00544 z++;
00545 }
00546 }
00547
00548
00549 int TGHtml::Tokenize()
00550 {
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564 char *z;
00565 int c;
00566 int n;
00567 int inpCol;
00568 int i, j;
00569 int h;
00570 TGHtmlElement *pElem;
00571 int selfClose;
00572 int argc;
00573 SHtmlTokenMap_t *pMap;
00574 # define mxARG 200 // Maximum number of parameters in a single markup
00575 char *argv[mxARG];
00576 int arglen[mxARG];
00577
00578 int pIsInScript = 0;
00579 int pIsInNoScript = 0;
00580 int pIsInNoFrames = 0;
00581 int sawdot = 0;
00582 int inLi = 0;
00583
00584 static char null[1] = { "" };
00585
00586 inpCol = fICol;
00587 n = fNComplete;
00588 z = fZText;
00589 if (inpCol < 0) return n;
00590 fICol = -1;
00591 pElem = 0;
00592
00593 while ((c = z[n]) != 0) {
00594
00595 sawdot--;
00596 if (c == -64 && z[n+1] == -128) {
00597 n += 2;
00598 continue;
00599 }
00600
00601 if (fPScript) {
00602
00603
00604
00605
00606
00607 TGHtmlScript *pScr = fPScript;
00608 const char *zEnd;
00609 int nEnd;
00610
00611 int sqcnt;
00612 if (pScr->fType == Html_SCRIPT) {
00613 zEnd = "</script>";
00614 nEnd = 9;
00615 } else if (pScr->fType == Html_NOSCRIPT) {
00616 zEnd = "</noscript>";
00617 nEnd = 11;
00618 } else if (pScr->fType == Html_NOFRAMES) {
00619 zEnd = "</noframes>";
00620 nEnd = 11;
00621 } else {
00622 zEnd = "</style>";
00623 nEnd = 8;
00624 }
00625 if (pScr->fNStart < 0) {
00626 pScr->fNStart = n;
00627 pScr->fNScript = 0;
00628 }
00629 sqcnt = 0;
00630 for (i = n ; z[i]; i++) {
00631 if (z[i] == '\'' || z[i] == '"') {
00632 sqcnt++;
00633 } else if (z[i] == '\n') {
00634 sqcnt = 0;
00635 }
00636 if (z[i] == '<' && z[i+1] == '/' &&
00637 strncasecmp(&z[i], zEnd, nEnd) == 0) {
00638 if (zEnd[3] == 'c' && ((sqcnt % 2) == 1)) continue;
00639 pScr->fNScript = i - n;
00640 fPScript = 0;
00641 n = i + nEnd;
00642 break;
00643 }
00644 }
00645 if (z[i] == 0) goto incomplete;
00646 if (fPScript) {
00647 pScr->fNScript = i - n;
00648 n = i;
00649 }
00650 else {
00651 #if 0
00652
00653
00654
00655 if (pIsInScript && !pIsInNoScript && !pIsInNoFrames) {
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665
00666
00667
00668 AdvanceLayout(p);
00669 inParse++;
00670 char *result = ProcessScript((TGHtmlScript *) pElem);
00671 inParse--;
00672 if (result) {
00673 ol = fNAlloc;
00674 rl = strlen(result);
00675 fNAlloc += rl;
00676 z = fZText = HtmlRealloc(z, ol+rl);
00677 memmove(z + n + rl, z+n, ol - n);
00678 memmove(z + n, result, rl);
00679 }
00680 }
00681 #endif
00682 pIsInScript = 0;
00683 pIsInNoScript = 0;
00684 pIsInNoFrames = 0;
00685 }
00686
00687
00688 }
00689 else if (isspace((unsigned char)c)) {
00690
00691
00692 for (i = 0;
00693 (c = z[n+i]) != 0 && isspace((unsigned char)c) && c != '\n' && c != '\r';
00694 i++) { }
00695 if (c == '\r' && z[n+i+1] == '\n') ++i;
00696 #if 0 // this is certainly NOT OK, since it alters pre-formatted text
00697 if (sawdot == 1) {
00698 pElem = new TGHtmlTextElement(2);
00699 strcpy(((TGHtmlTextElement *)pElem)->fZText, " ");
00700 pElem->fElId = ++fIdind;
00701 pElem->fOffs = n;
00702 pElem->fCount = 1;
00703 AppendElement(pElem);
00704 }
00705 #endif
00706 pElem = new TGHtmlSpaceElement;
00707 if (pElem == 0) goto incomplete;
00708 ((TGHtmlSpaceElement *)pElem)->fW = 0;
00709 pElem->fOffs = n;
00710 pElem->fElId = ++fIdind;
00711 if (c == '\n' || c == '\r') {
00712 pElem->fFlags = HTML_NewLine;
00713 pElem->fCount = 1;
00714 i++;
00715 inpCol = 0;
00716 } else {
00717 int iColStart = inpCol;
00718 pElem->fFlags = 0;
00719 for (j = 0; j < i; j++) {
00720 inpCol = NextColumn(inpCol, z[n+j]);
00721 }
00722 pElem->fCount = inpCol - iColStart;
00723 }
00724 AppendElement(pElem);
00725 n += i;
00726
00727 }
00728 else if (c != '<' || fIPlaintext != 0 ||
00729 (!isalpha(z[n+1]) && z[n+1] != '/' && z[n+1] != '!' && z[n+1] != '?')) {
00730
00731
00732 for (i = 1; (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '<'; i++) {}
00733 if (z[n+i-1] == '.' || z[n+i-1] == '!' || z[n+i-1] == '?') sawdot = 2;
00734 if (c == 0) goto incomplete;
00735 if (fIPlaintext != 0 && z[n] == '<') {
00736 switch (fIPlaintext) {
00737 case Html_LISTING:
00738 if (i >= 10 && strncasecmp(&z[n], "</listing>", 10) == 0) {
00739 fIPlaintext = 0;
00740 goto doMarkup;
00741 }
00742 break;
00743
00744 case Html_XMP:
00745 if (i >= 6 && strncasecmp(&z[n], "</xmp>", 6) == 0) {
00746 fIPlaintext = 0;
00747 goto doMarkup;
00748 }
00749 break;
00750
00751 case Html_TEXTAREA:
00752 if (i >= 11 && strncasecmp(&z[n], "</textarea>", 11) == 0) {
00753 fIPlaintext = 0;
00754 goto doMarkup;
00755 }
00756 break;
00757
00758 default:
00759 break;
00760 }
00761 }
00762 pElem = new TGHtmlTextElement(i);
00763 if (pElem == 0) goto incomplete;
00764 TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
00765 tpElem->fElId = ++fIdind;
00766 tpElem->fOffs = n;
00767 strncpy(tpElem->fZText, &z[n], i);
00768 tpElem->fZText[i] = 0;
00769 AppendElement(pElem);
00770 if (fIPlaintext == 0 || fIPlaintext == Html_TEXTAREA) {
00771 HtmlTranslateEscapes(tpElem->fZText);
00772 }
00773 pElem->fCount = strlen(tpElem->fZText);
00774 n += i;
00775 inpCol += i;
00776
00777 } else if (strncmp(&z[n], "<!--", 4) == 0) {
00778
00779
00780 for (i = 4; z[n+i]; i++) {
00781 if (z[n+i] == '-' && strncmp(&z[n+i], "-->", 3) == 0) break;
00782 }
00783 if (z[n+i] == 0) goto incomplete;
00784
00785 pElem = new TGHtmlTextElement(i);
00786 if (pElem == 0) goto incomplete;
00787 TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
00788 tpElem->fType = Html_COMMENT;
00789 tpElem->fElId = ++fIdind;
00790 tpElem->fOffs = n;
00791 strncpy(tpElem->fZText, &z[n+4], i-4);
00792 tpElem->fZText[i-4] = 0;
00793 tpElem->fCount = 0;
00794 AppendElement(pElem);
00795
00796 pElem = new TGHtmlElement(Html_EndCOMMENT);
00797 AppToken(pElem, 0, n+4);
00798
00799 for (j = 0; j < i+3; j++) {
00800 inpCol = NextColumn(inpCol, z[n+j]);
00801 }
00802 n += i + 3;
00803
00804 }
00805 else {
00806
00807
00808
00809
00810 doMarkup:
00811 argc = 1;
00812 argv[0] = &z[n+1];
00813 for (i = 1;
00814 (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '>' && (i < 2 || c != '/');
00815 i++) {}
00816 arglen[0] = i - 1;
00817 if (c == 0) goto incomplete;
00818
00819
00820
00821 while (isspace((unsigned char)z[n+i])) ++i;
00822 while ((c = z[n+i]) != 0 && c != '>' && (c != '/' || z[n+i+1] != '>')) {
00823 if (argc > mxARG - 3) argc = mxARG - 3;
00824 argv[argc] = &z[n+i];
00825 j = 0;
00826 while ((c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>' &&
00827 c != '=' && (c != '/' || z[n+i+j+1] != '>')) ++j;
00828 arglen[argc] = j;
00829 if (c == 0) goto incomplete;
00830 i += j;
00831 while (isspace((unsigned char)c)) {
00832 i++;
00833 c = z[n+i];
00834 }
00835 if (c == 0) goto incomplete;
00836 argc++;
00837 if (c != '=') {
00838 argv[argc] = null;
00839 arglen[argc] = 0;
00840 argc++;
00841 continue;
00842 }
00843 i++;
00844 c = z[n+i];
00845 while (isspace((unsigned char)c)) {
00846 i++;
00847 c = z[n+i];
00848 }
00849 if (c == 0) goto incomplete;
00850 if (c == '\'' || c == '"') {
00851 int cQuote = c;
00852 i++;
00853 argv[argc] = &z[n+i];
00854 for (j = 0; (c = z[n+i+j]) != 0 && c != cQuote; j++) {}
00855 if (c == 0) goto incomplete;
00856 arglen[argc] = j;
00857 i += j+1;
00858 } else {
00859 argv[argc] = &z[n+i];
00860 for (j = 0; (c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>'; j++) {}
00861 if (c == 0) goto incomplete;
00862 arglen[argc] = j;
00863 i += j;
00864 }
00865 argc++;
00866 while (isspace(z[n+i])) ++i;
00867 }
00868 if (c == '/') {
00869 i++;
00870 c = z[n+i];
00871 selfClose = 1;
00872 } else {
00873 selfClose = 0;
00874 }
00875 if (c == 0) goto incomplete;
00876 for (j = 0; j < i+1; j++) {
00877 inpCol = NextColumn(inpCol, z[n+j]);
00878 }
00879 n += i + 1;
00880
00881
00882
00883 if (!gIsInit) {
00884 HtmlHashInit();
00885 gIsInit = 1;
00886 }
00887 c = argv[0][arglen[0]];
00888 argv[0][arglen[0]] = 0;
00889 h = HtmlHash(argv[0]);
00890 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
00891 if (strcasecmp(pMap->fZName, argv[0]) == 0) break;
00892 }
00893 argv[0][arglen[0]] = c;
00894 if (pMap == 0) continue;
00895
00896 makeMarkupEntry:
00897
00898
00899 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc, arglen, argv);
00900 if (pElem == 0) goto incomplete;
00901
00902 pElem->fElId = ++fIdind;
00903 pElem->fOffs = n;
00904
00905 AddFormInfo(pElem);
00906
00907
00908
00909
00910
00911 if (ProcessToken(pElem, pMap->fZName, pMap->fType)) {
00912
00913
00914
00915
00916
00917 z = fZText;
00918 if (z == 0) {
00919 n = 0;
00920 inpCol = 0;
00921 goto incomplete;
00922 }
00923 continue;
00924 }
00925
00926
00927
00928
00929 AppendElement(pElem);
00930 switch (pMap->fType) {
00931 case Html_TABLE:
00932 break;
00933
00934 case Html_PLAINTEXT:
00935 case Html_LISTING:
00936 case Html_XMP:
00937 case Html_TEXTAREA:
00938 fIPlaintext = pMap->fType;
00939 break;
00940
00941 case Html_NOFRAMES:
00942 if (!fHasFrames) break;
00943 pIsInNoFrames = 1;
00944 case Html_NOSCRIPT:
00945 break;
00946 if (!fHasScript) break;
00947 pIsInNoScript = 1;
00948 case Html_SCRIPT:
00949 pIsInScript = 1;
00950
00951 case Html_STYLE:
00952 fPScript = (TGHtmlScript *) pElem;
00953 break;
00954
00955 case Html_LI:
00956 if (!fAddEndTags) break;
00957 if (inLi) {
00958 TGHtmlElement *e = new TGHtmlMarkupElement(Html_EndLI, 1, 0, 0);
00959 AppToken(e, pElem, n);
00960 } else {
00961 inLi = 1;
00962 }
00963 break;
00964
00965 case Html_EndLI:
00966 inLi=0;
00967 break;
00968
00969 case Html_EndOL:
00970 case Html_EndUL:
00971 if (!fAddEndTags) break;
00972 if (inLi) {
00973 TGHtmlElement *e = new TGHtmlMarkupElement(Html_EndLI, 1, 0, 0);
00974 AppToken(e, pElem, n);
00975 } else {
00976 inLi = 0;
00977 }
00978 break;
00979
00980 default:
00981 break;
00982 }
00983
00984
00985
00986
00987 if (selfClose && argv[0][0] != '/' &&
00988 strcmp(&pMap[1].fZName[1], pMap->fZName) == 0) {
00989 selfClose = 0;
00990 pMap++;
00991 argc = 1;
00992 goto makeMarkupEntry;
00993 }
00994 }
00995 }
00996
00997 incomplete:
00998 fICol = inpCol;
00999
01000
01001 return n;
01002 }
01003
01004
01005
01006
01007 TGHtmlMarkupElement *TGHtml::MakeMarkupEntry(int objType, int type, int argc,
01008 int arglen[], char *argv[])
01009 {
01010
01011
01012 TGHtmlMarkupElement *e;
01013
01014 switch (objType) {
01015 case O_HtmlCell:
01016 e = new TGHtmlCell(type, argc, arglen, argv);
01017 break;
01018
01019 case O_HtmlTable:
01020 e = new TGHtmlTable(type, argc, arglen, argv);
01021 break;
01022
01023 case O_HtmlRef:
01024 e = new TGHtmlRef(type, argc, arglen, argv);
01025 break;
01026
01027 case O_HtmlLi:
01028 e = new TGHtmlLi(type, argc, arglen, argv);
01029 break;
01030
01031 case O_HtmlListStart:
01032 e = new TGHtmlListStart(type, argc, arglen, argv);
01033 break;
01034
01035 case O_HtmlImageMarkup:
01036 e = new TGHtmlImageMarkup(type, argc, arglen, argv);
01037 break;
01038
01039 case O_HtmlInput:
01040 e = new TGHtmlInput(type, argc, arglen, argv);
01041 break;
01042
01043 case O_HtmlForm:
01044 e = new TGHtmlForm(type, argc, arglen, argv);
01045 break;
01046
01047 case O_HtmlHr:
01048 e = new TGHtmlHr(type, argc, arglen, argv);
01049 break;
01050
01051 case O_HtmlAnchor:
01052 e = new TGHtmlAnchor(type, argc, arglen, argv);
01053 break;
01054
01055 case O_HtmlScript:
01056 e = new TGHtmlScript(type, argc, arglen, argv);
01057 break;
01058
01059 case O_HtmlMapArea:
01060 e = new TGHtmlMapArea(type, argc, arglen, argv);
01061 break;
01062
01063 default:
01064 e = new TGHtmlMarkupElement(type, argc, arglen, argv);
01065 break;
01066 }
01067
01068 return e;
01069 }
01070
01071
01072 void TGHtml::TokenizerAppend(const char *text)
01073 {
01074
01075
01076 int len = strlen(text);
01077
01078 if (fNText == 0) {
01079 fNAlloc = len + 100;
01080 fZText = new char [fNAlloc];
01081 } else if (fNText + len >= fNAlloc) {
01082 fNAlloc += len + 100;
01083 char *tmp = new char[fNAlloc];
01084 strcpy(tmp, fZText);
01085 delete[] fZText;
01086 fZText = tmp;
01087 }
01088
01089 if (fZText == 0) {
01090 fNText = 0;
01091 UNTESTED;
01092 return;
01093 }
01094
01095 strcpy(&fZText[fNText], text);
01096 fNText += len;
01097 fNComplete = Tokenize();
01098 }
01099
01100
01101 TGHtmlElement *TGHtml::InsertToken(TGHtmlElement *pToken,
01102 char *zType, char *zArgs, int offs)
01103 {
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119 SHtmlTokenMap_t *pMap;
01120 int h;
01121 TGHtmlElement *pElem;
01122
01123
01124
01125 if (!gIsInit) {
01126 HtmlHashInit();
01127 gIsInit = 1;
01128 }
01129
01130 if (strcmp(zType, "Text") == 0) {
01131 pElem = new TGHtmlTextElement(zArgs ? strlen(zArgs) : 0);
01132 if (pElem == 0) return 0;
01133 if (zArgs) {
01134 strcpy (((TGHtmlTextElement *)pElem)->fZText, zArgs);
01135 pElem->fCount = strlen(zArgs);
01136 }
01137 } else if (!strcmp(zType, "Space")) {
01138 pElem = new TGHtmlSpaceElement();
01139 if (pElem == 0) return 0;
01140 } else {
01141 h = HtmlHash(zType);
01142 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
01143 if (strcasecmp(pMap->fZName, zType) == 0) break;
01144 }
01145 if (pMap == 0) return 0;
01146 if (zArgs == 0 || *zArgs == 0) {
01147
01148
01149 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, 1, 0, 0);
01150 if (pElem == 0) return 0;
01151 } else {
01152
01153
01154
01155
01156
01157
01158 #if 0
01159 if (!SplitList(zArgs, &argc, &argv)) return 0;
01160
01161
01162
01163 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc, 0, argv);
01164 if (pElem == 0) return 1;
01165
01166 while (--argc >= 0) if (argv[argc]) delete[] argv[argc];
01167 delete[] argv;
01168 #else
01169 return 0;
01170 #endif
01171 }
01172 }
01173
01174 pElem->fElId = ++fIdind;
01175
01176 AppToken(pElem, pToken, offs);
01177
01178 return pElem;
01179 }
01180
01181
01182 int TGHtml::TextInsertCmd(int , char ** )
01183 {
01184
01185
01186
01187
01188 #if 0
01189 TGHtmlElement *p, *pElem;
01190 int i, l, n = 0;
01191 int idx = 0;
01192 int ptyp = Html_Unknown;
01193 int istxt = 0;
01194 char *cp = 0, c, *cp2;
01195
01196 if (GetIndex(argv[3], &p, &i) != 0) {
01197
01198 return 0;
01199 }
01200 if (p) {
01201 ptyp = p->fType;
01202 if ((istxt = (ptyp == Html_Text))) {
01203 l = p->fCount;
01204 cp = ((TGHtmlTextElement *)p)->fZText;
01205 }
01206 }
01207 if (argv[2][0] == 'b') {
01208 if (!istxt) return 1;
01209 if (i == 0 || i == l) return 1;
01210 pElem = InsertToken(p->fPNext, "Text", cp + i, -1);
01211 cp[i] = 0;
01212 p->fCount = i;
01213 return 1;
01214 }
01215 c = argv[4][0];
01216 if (!c) return 1;
01217 if (c == '\b') {
01218 if ((!istxt) || (!l) || (!i)) {
01219 if (!p) return 1;
01220 if (p->fType == Html_BR)
01221 RemoveElements(p, p);
01222 return 1;
01223 }
01224 if (p && l == 1) {
01225 RemoveElements(p, p);
01226 return 1;
01227 }
01228 if (i == l)
01229 cp[p->fCount] = 0;
01230 else
01231 memcpy(cp+i-1, cp+i, l-i+1);
01232
01233 cp[--p->fCount] = 0;
01234 if (ins.i-- <= 0) ins.i = 0;
01235 ins.p = p;
01236 return 1;
01237 }
01238 if (c == '\n' || c == '\r') {
01239 }
01240 if (istxt) {
01241 char *cp;
01242 int t, j, alen = strlen(argv[4]);
01243 n = alen + l;
01244
01245 TGHtmlTextElement *text = (TGHtmlTextElement *) p;
01246
01247 if (text->fZText == (char*) ((&text->fZText)+1)) {
01248 cp = new char[n+1];
01249 strcpy(cp, text->fZText);
01250 } else {
01251 cp = new char[n+1];
01252 strcpy(cp, text->fZText);
01253 }
01254 cp2 = new char[alen+1];
01255 memcpy(cp2, argv[4], alen+1);
01256 HtmlTranslateEscapes(cp2);
01257 alen = strlen(cp2);
01258 memmove(cp+alen+i, cp+i, l-i+1);
01259 for (j = 0; j < alen; j++) cp[i+j] = cp2[j];
01260 delete[] cp2;
01261 delete[] text->fZText;
01262 text->fZText = cp;
01263 p->fCount = strlen(cp);
01264 ins.p = p;
01265 ins.i = i+alen;
01266 } else {
01267 p = InsertToken(p ? p->fPNext : 0, "Text", argv[4], -1);
01268 AddStyle(p);
01269 i = 0;
01270 ins.p = p;
01271 ins.i = 1;
01272 }
01273 if (p) {
01274 idx = p->base.id;
01275 AddStrOffset(p, argv[4], i);
01276 }
01277 #endif
01278 return 1;
01279 }
01280
01281
01282 SHtmlTokenMap_t *TGHtml::NameToPmap(char *zType)
01283 {
01284
01285
01286 SHtmlTokenMap_t *pMap;
01287 int h;
01288
01289 if (!gIsInit) {
01290 HtmlHashInit();
01291 gIsInit = 1;
01292 }
01293 h = HtmlHash(zType);
01294 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
01295 if (strcasecmp(pMap->fZName, zType) == 0) break;
01296 }
01297
01298 return pMap;
01299 }
01300
01301
01302 int TGHtml::NameToType(char *zType)
01303 {
01304
01305
01306 SHtmlTokenMap_t *pMap = NameToPmap(zType);
01307 return pMap ? pMap->fType : (int)Html_Unknown;
01308 }
01309
01310
01311 const char *TGHtml::TypeToName(int type)
01312 {
01313
01314
01315 if (type >= Html_A && type <= Html_EndXMP) {
01316 SHtmlTokenMap_t *pMap = gApMap[type - Html_A];
01317 return pMap->fZName;
01318 } else {
01319 return "???";
01320 }
01321 }
01322
01323
01324 char *TGHtml::DumpToken(TGHtmlElement *p)
01325 {
01326
01327
01328
01329 static char zBuf[200];
01330 int j;
01331 const char *zName;
01332
01333 if (p == 0) {
01334 sprintf(zBuf, "NULL");
01335 return zBuf;
01336 }
01337 switch (p->fType) {
01338 case Html_Text:
01339 sprintf(zBuf, "text: \"%.*s\"", p->fCount, ((TGHtmlTextElement *)p)->fZText);
01340 break;
01341
01342 case Html_Space:
01343 if (p->fFlags & HTML_NewLine) {
01344 sprintf(zBuf, "space: \"\\n\"");
01345 } else {
01346 sprintf(zBuf, "space: \" \"");
01347 }
01348 break;
01349
01350 case Html_Block: {
01351 TGHtmlBlock *block = (TGHtmlBlock *) p;
01352 if (block->fN > 0) {
01353 int n = block->fN;
01354 if (n > 150) n = 150;
01355 sprintf(zBuf, "<Block z=\"%.*s\">", n, block->fZ);
01356 } else {
01357 sprintf(zBuf, "<Block>");
01358 }
01359 break;
01360 }
01361
01362 default:
01363 if (p->fType >= HtmlMarkupMap[0].fType
01364 && p->fType <= HtmlMarkupMap[HTML_MARKUP_COUNT-1].fType) {
01365 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
01366 } else {
01367 zName = "Unknown";
01368 }
01369 sprintf(zBuf, "markup (%d) <%s", p->fType, zName);
01370 for (j = 1 ; j < p->fCount; j += 2) {
01371 sprintf(&zBuf[strlen(zBuf)], " %s=\"%s\"",
01372 ((TGHtmlMarkupElement *)p)->fArgv[j-1],
01373 ((TGHtmlMarkupElement *)p)->fArgv[j]);
01374 }
01375 strcat(zBuf, ">");
01376 break;
01377 }
01378 return zBuf;
01379
01380
01381
01382 }
01383
01384
01385 void TGHtml::AppendArglist(TGString *str, TGHtmlMarkupElement *pElem)
01386 {
01387
01388
01389
01390
01391
01392
01393
01394
01395
01396
01397 int i;
01398
01399 for (i = 0; i + 1 < pElem->fCount; i += 2) {
01400 str->Append(pElem->fArgv[i]);
01401 str->Append("=");
01402 str->Append(pElem->fArgv[i+1]);
01403 str->Append(" ");
01404 }
01405 }
01406
01407
01408 char *TGHtml::GetTokenName(TGHtmlElement *p)
01409 {
01410
01411
01412 static char zBuf[200];
01413
01414 const char *zName;
01415
01416 zBuf[0] = 0;
01417 if (p == 0) {
01418 strcpy(zBuf, "NULL");
01419 return zBuf;
01420 }
01421 switch (p->fType) {
01422 case Html_Text:
01423 case Html_Space:
01424 break;
01425
01426 case Html_Block:
01427 break;
01428
01429 default:
01430 if (p->fType >= HtmlMarkupMap[0].fType &&
01431 p->fType <= HtmlMarkupMap[HTML_MARKUP_COUNT-1].fType) {
01432 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
01433 } else {
01434 zName = "Unknown";
01435 }
01436 strlcpy(zBuf, zName, sizeof(zBuf));
01437 break;
01438 }
01439
01440 return zBuf;
01441 }
01442
01443
01444 SHtmlTokenMap_t* TGHtml::GetMarkupMap(int n)
01445 {
01446
01447
01448 return HtmlMarkupMap+n;
01449 }
01450
01451
01452 TGString *TGHtml::ListTokens(TGHtmlElement *p, TGHtmlElement *pEnd)
01453 {
01454
01455
01456 TGString *str;
01457 int i;
01458 const char *zName;
01459 char zLine[100];
01460
01461 str = new TGString("");
01462 while (p && p != pEnd) {
01463 switch (p->fType) {
01464 case Html_Block:
01465 break;
01466
01467 case Html_Text:
01468 str->Append("{ Text \"");
01469 str->Append(((TGHtmlTextElement *)p)->fZText);
01470 str->Append("\" } ");
01471 break;
01472
01473 case Html_Space:
01474 snprintf(zLine, 100, "Space %d %d ",
01475 p->fCount, (p->fFlags & HTML_NewLine) != 0);
01476 str->Append(zLine);
01477 break;
01478
01479 case Html_Unknown:
01480 str->Append("Unknown ");
01481 break;
01482
01483 default:
01484 str->Append("{ Markup ");
01485 if (p->fType >= HtmlMarkupMap[0].fType &&
01486 p->fType <= HtmlMarkupMap[HTML_MARKUP_COUNT-1].fType) {
01487 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
01488 } else {
01489 zName = "Unknown";
01490 }
01491 str->Append(zName);
01492 str->Append(" ");
01493 for (i = 0; i < p->fCount; ++i) {
01494 str->Append(((TGHtmlMarkupElement *)p)->fArgv[i]);
01495 str->Append(" ");
01496 }
01497 str->Append("} ");
01498 break;
01499 }
01500 p = p->fPNext;
01501 }
01502
01503 return str;
01504 }
01505
01506
01507 void TGHtml::PrintList(TGHtmlElement *first, TGHtmlElement *last)
01508 {
01509
01510
01511 TGHtmlElement *p;
01512
01513 for (p = first; p != last; p = p->fPNext) {
01514 if (p->fType == Html_Block) {
01515 TGHtmlBlock *block = (TGHtmlBlock *) p;
01516 const char *z = block->fZ;
01517 int n = block->fN;
01518 if (n == 0 || z == 0) {
01519 n = 1;
01520 z = "";
01521 }
01522 printf("Block flags=%02x cnt=%d x=%d..%d y=%d..%d z=\"%.*s\"\n",
01523 p->fFlags, p->fCount, block->fLeft, block->fRight,
01524 block->fTop, block->fBottom, n, z);
01525 } else {
01526 printf("Token font=%2d color=%2d align=%d flags=0x%04x name=%s\n",
01527 p->fStyle.fFont, p->fStyle.fColor,
01528 p->fStyle.fAlign, p->fStyle.fFlags, DumpToken(p));
01529 }
01530 }
01531 }