/* * $LynxId: HTMLDTD.c,v 1.57 2010/09/25 00:30:56 tom Exp $ * * Our Static DTD for HTML * ----------------------- */ /* Implements: */ #include #include #include #include /* * Character entities like   now excluded from our DTD tables, they are * mapped to Unicode and handled by chartrans code directly the similar way the * numeric entities like { does. See src/chrtrans/entities.h for real * mapping. */ /* Entity Names * ------------ * * This table must be matched exactly with ALL the translation tables * (this is an obsolete translation mechanism, probably unused, * currently replaced with Unicode chartrans in most cases...) */ static const char *entities[] = { "AElig", /* capital AE diphthong (ligature) */ "Aacute", /* capital A, acute accent */ "Acirc", /* capital A, circumflex accent */ "Agrave", /* capital A, grave accent */ "Aring", /* capital A, ring */ "Atilde", /* capital A, tilde */ "Auml", /* capital A, dieresis or umlaut mark */ "Ccedil", /* capital C, cedilla */ "Dstrok", /* capital Eth, Icelandic */ "ETH", /* capital Eth, Icelandic */ "Eacute", /* capital E, acute accent */ "Ecirc", /* capital E, circumflex accent */ "Egrave", /* capital E, grave accent */ "Euml", /* capital E, dieresis or umlaut mark */ "Iacute", /* capital I, acute accent */ "Icirc", /* capital I, circumflex accent */ "Igrave", /* capital I, grave accent */ "Iuml", /* capital I, dieresis or umlaut mark */ "Ntilde", /* capital N, tilde */ "Oacute", /* capital O, acute accent */ "Ocirc", /* capital O, circumflex accent */ "Ograve", /* capital O, grave accent */ "Oslash", /* capital O, slash */ "Otilde", /* capital O, tilde */ "Ouml", /* capital O, dieresis or umlaut mark */ "THORN", /* capital THORN, Icelandic */ "Uacute", /* capital U, acute accent */ "Ucirc", /* capital U, circumflex accent */ "Ugrave", /* capital U, grave accent */ "Uuml", /* capital U, dieresis or umlaut mark */ "Yacute", /* capital Y, acute accent */ "aacute", /* small a, acute accent */ "acirc", /* small a, circumflex accent */ "acute", /* spacing acute */ "aelig", /* small ae diphthong (ligature) */ "agrave", /* small a, grave accent */ "amp", /* ampersand */ "aring", /* small a, ring */ "atilde", /* small a, tilde */ "auml", /* small a, dieresis or umlaut mark */ "brkbar", /* broken vertical bar */ "brvbar", /* broken vertical bar */ "ccedil", /* small c, cedilla */ "cedil", /* spacing cedilla */ "cent", /* cent sign */ "copy", /* copyright sign */ "curren", /* currency sign */ "deg", /* degree sign */ "die", /* spacing dieresis */ "divide", /* division sign */ "eacute", /* small e, acute accent */ "ecirc", /* small e, circumflex accent */ "egrave", /* small e, grave accent */ "emdash", /* dash the width of emsp */ "emsp", /* em space - not collapsed */ "endash", /* dash the width of ensp */ "ensp", /* en space - not collapsed */ "eth", /* small eth, Icelandic */ "euml", /* small e, dieresis or umlaut mark */ "frac12", /* fraction 1/2 */ "frac14", /* fraction 1/4 */ "frac34", /* fraction 3/4 */ "gt", /* greater than */ "hibar", /* spacing macron */ "iacute", /* small i, acute accent */ "icirc", /* small i, circumflex accent */ "iexcl", /* inverted exclamation mark */ "igrave", /* small i, grave accent */ "iquest", /* inverted question mark */ "iuml", /* small i, dieresis or umlaut mark */ "laquo", /* angle quotation mark, left */ "lt", /* less than */ "macr", /* spacing macron */ "mdash", /* dash the width of emsp */ "micro", /* micro sign */ "middot", /* middle dot */ "nbsp", /* non breaking space */ "ndash", /* dash the width of ensp */ "not", /* negation sign */ "ntilde", /* small n, tilde */ "oacute", /* small o, acute accent */ "ocirc", /* small o, circumflex accent */ "ograve", /* small o, grave accent */ "ordf", /* feminine ordinal indicator */ "ordm", /* masculine ordinal indicator */ "oslash", /* small o, slash */ "otilde", /* small o, tilde */ "ouml", /* small o, dieresis or umlaut mark */ "para", /* paragraph sign */ "plusmn", /* plus-or-minus sign */ "pound", /* pound sign */ "quot", /* quote '"' */ "raquo", /* angle quotation mark, right */ "reg", /* circled R registered sign */ "sect", /* section sign */ "shy", /* soft hyphen */ "sup1", /* superscript 1 */ "sup2", /* superscript 2 */ "sup3", /* superscript 3 */ "szlig", /* small sharp s, German (sz ligature) */ "thinsp", /* thin space (not collapsed) */ "thorn", /* small thorn, Icelandic */ "times", /* multiplication sign */ "trade", /* trade mark sign (U+2122) */ "uacute", /* small u, acute accent */ "ucirc", /* small u, circumflex accent */ "ugrave", /* small u, grave accent */ "uml", /* spacing dieresis */ "uuml", /* small u, dieresis or umlaut mark */ "yacute", /* small y, acute accent */ "yen", /* yen sign */ "yuml", /* small y, dieresis or umlaut mark */ }; /* Attribute Lists * --------------- * * Lists must be in alphabetical order by attribute name * The tag elements contain the number of attributes */ /* From Peter Flynn's intro to the HTML Pro DTD: %structure; DIV, CENTER, H1 to H6, P, UL, OL, DL, DIR, MENU, PRE, XMP, LISTING, BLOCKQUOTE, BQ, 2 1 2 2 1 8 8 8 8 8 8 8 8 4 4 MULTICOL,?NOBR, FORM, TABLE, ADDRESS, FIG, BDO, NOTE, and FN; plus?WBR, LI, and LH 8 n ?1 n 8 8 2 2 2 2 2 ?1 nE 4 4 %insertions; Elements which usually contain special-purpose material, or no text material at all. BASEFONT, APPLET, OBJECT, EMBED, SCRIPT, MAP, MARQUEE, HR, ISINDEX, BGSOUND, TAB,?IMG, 1 e? 2 2 l 1 e 2 l 8 4 4 E 1? E 1 E ! E ?1 E IMAGE, BR, plus NOEMBED, SERVER, SPACER, AUDIOSCOPE, and SIDEBAR; ?area 1 n 1 E n n n n n 8 E %text; Elements within the %structure; which directly contain running text. Descriptive or analytic markup: EM, STRONG, DFN, CODE, SAMP, KBD, VAR, CITE, Q, LANG, AU, 2 2 2 2 2 2 2 2 2 2 n 2 AUTHOR, PERSON, ACRONYM, ABBR, INS, DEL, and SPAN 2 2 n 2 2 2 2 2 Visual markup:S, STRIKE, I, B, TT, U,?NOBR,?WBR, BR, BIG, SMALL, FONT, STYLE, BLINK, TAB, 1 1 1 1 1 1 ?1 n ?1nE? 1 E 1 1 1 1 l 1 1 E? BLACKFACE, LIMITTEXT, NOSMARTQUOTES, and SHADOW 1 n 1 n 1 n 1 n Hypertext and graphics: A and?IMG 8 ?8 E Mathematical: SUB, SUP, and MATH 4 4 4 l Documentary: COMMENT, ENTITY, ELEMENT, and ATTRIB 4 4 n 4 n 4 n %formula; */ /* Elements * -------- * * Must match definitions in HTMLDTD.html! * Must be in alphabetical order. * * The T_* extra info is listed here, even though most fields are not used * in SGML.c if Old_DTD is set (with the exception of some Tgf_* flags). * This simplifies comparison of the tags_table0[] table (otherwise unchanged * from original Lynx treatment) with the tags_table1[] table below. - kw * * Name*, Attributes, No. of attributes, content, extra info... */ #include #include /* Dummy space, will be filled with the contents of either tags_table1 or tags_table0 on calling HTSwitchDTD - kw */ static HTTag tags[HTML_ALL_ELEMENTS]; const SGML_dtd HTML_dtd = { tags, HTML_ELEMENTS, entities, /* probably unused */ TABLESIZE(entities), }; /* This function fills the "tags" part of the HTML_dtd structure with what we want to use, either tags_table0 or tags_table1. Note that it has to be called at least once before HTML_dtd is used, otherwise the HTML_dtd contents will be invalid! This could be coded in a way that would make an initialisation call unnecessary, but my C knowledge is limited and I didn't want to list the whole tags_table1 table twice... - kw */ void HTSwitchDTD(int new_flag) { if (TRACE) CTRACE((tfp, "HTMLDTD: Copying %s DTD element info of size %d, %d * %d\n", new_flag ? "strict" : "tagsoup", (int) (new_flag ? sizeof(tags_table1) : sizeof(tags_table0)), HTML_ALL_ELEMENTS, (int) sizeof(HTTag))); if (new_flag) MemCpy(tags, tags_table1, HTML_ALL_ELEMENTS * sizeof(HTTag)); else MemCpy(tags, tags_table0, HTML_ALL_ELEMENTS * sizeof(HTTag)); } HTTag HTTag_unrecognized = {NULL_HTTag, NULL, 0, 0, SGML_EMPTY, T__UNREC_}; /* * Utility Routine: Useful for people building HTML objects. */ /* Start anchor element * -------------------- * * It is kinda convenient to have a particulr routine for * starting an anchor element, as everything else for HTML is * simple anyway. */ struct _HTStructured { HTStructuredClass *isa; /* ... */ }; void HTStartAnchor(HTStructured * obj, const char *name, const char *href) { BOOL present[HTML_A_ATTRIBUTES]; const char *value[HTML_A_ATTRIBUTES]; int i; for (i = 0; i < HTML_A_ATTRIBUTES; i++) present[i] = NO; if (name && *name) { present[HTML_A_NAME] = YES; value[HTML_A_NAME] = (const char *) name; } if (href) { present[HTML_A_HREF] = YES; value[HTML_A_HREF] = (const char *) href; } (*obj->isa->start_element) (obj, HTML_A, present, value, -1, 0); } void HTStartAnchor5(HTStructured * obj, const char *name, const char *href, const char *linktype, int tag_charset) { BOOL present[HTML_A_ATTRIBUTES]; const char *value[HTML_A_ATTRIBUTES]; int i; for (i = 0; i < HTML_A_ATTRIBUTES; i++) present[i] = NO; if (name && *name) { present[HTML_A_NAME] = YES; value[HTML_A_NAME] = name; } if (href && *href) { present[HTML_A_HREF] = YES; value[HTML_A_HREF] = href; } if (linktype && *linktype) { present[HTML_A_TYPE] = YES; value[HTML_A_TYPE] = linktype; } (*obj->isa->start_element) (obj, HTML_A, present, value, tag_charset, 0); } void HTStartIsIndex(HTStructured * obj, const char *prompt, const char *href) { BOOL present[HTML_ISINDEX_ATTRIBUTES]; const char *value[HTML_ISINDEX_ATTRIBUTES]; int i; for (i = 0; i < HTML_ISINDEX_ATTRIBUTES; i++) present[i] = NO; if (prompt && *prompt) { present[HTML_ISINDEX_PROMPT] = YES; value[HTML_ISINDEX_PROMPT] = (const char *) prompt; } if (href) { present[HTML_ISINDEX_HREF] = YES; value[HTML_ISINDEX_HREF] = (const char *) href; } (*obj->isa->start_element) (obj, HTML_ISINDEX, present, value, -1, 0); }