/* HTML source syntax highlighting by Vlad Harchev March 1999 */ #include #include #include #include /* This file creates too many "leak detected" entries in Lynx.leaks. */ #define NO_MEMORY_TRACKING #include #ifdef USE_PRETTYSRC BOOL psrc_convert_string = FALSE; PUBLIC BOOL psrc_view = FALSE;/* this is read by SGML_put_character - TRUE when viewing pretty source */ PUBLIC BOOL LYpsrc = FALSE; /* this tells what will be shown on '\': if TRUE, then pretty source, normal source view otherwise. Toggled by -prettysrc commandline option. */ PUBLIC BOOL sgml_in_psrc_was_initialized; PUBLIC BOOL psrc_nested_call; PUBLIC BOOL psrc_first_tag; PUBLIC BOOL mark_htext_as_source=FALSE; /* tagspecs from lynx.cfg are read here. After .lss file is read (is with lss support), the style cache and markup are created before entering the mainloop. */ PUBLIC BOOL psrcview_no_anchor_numbering = FALSE; PRIVATE char* HTL_tagspecs_defaults[HTL_num_lexemes] = { /* these values are defaults. They are also listed in comments of distibution's lynx.cfg.*/ #ifdef USE_COLOR_STYLE "span.htmlsrc_comment:!span", "span.htmlsrc_tag:!span", "span.htmlsrc_attrib:!span", "span.htmlsrc_attrval:!span", "span.htmlsrc_abracket:!span", "span.htmlsrc_entity:!span", "span.htmlsrc_href:!span", "span.htmlsrc_entire:!span", "span.htmlsrc_badseq:!span", "span.htmlsrc_badtag:!span", "span.htmlsrc_badattr:!span", "span.htmlsrc_sgmlspecial:!span" #else "b:!b", /* comment */ "b:!b", /* tag */ "b:!b", /* attrib */ ":", /* attrval */ "b:!b", /* abracket*/ "b:!b", /* entity */ ":", /* href */ ":", /* entire */ "b:!b", /* badseq */ ":", /* badtag */ ":", /* badattr */ "b:!b" /* sgmlspec*/ #endif }; PUBLIC char* HTL_tagspecs[HTL_num_lexemes]; /* these are pointers since tagspec can be empty (the pointer will be NULL in that case) */ PUBLIC HT_tagspec* lexeme_start[HTL_num_lexemes]; PUBLIC HT_tagspec* lexeme_end[HTL_num_lexemes]; PUBLIC int tagname_transform = 2; PUBLIC int attrname_transform = 2; PRIVATE int html_src_tag_index ARGS1( char*, tagname) { HTTag* tag = SGMLFindTag(&HTML_dtd, tagname); return (tag && tag != &HTTag_unrecognized ) ? tag - HTML_dtd.tags : -1; } typedef enum _html_src_check_state { HTSRC_CK_normal, HTSRC_CK_seen_excl, HTSRC_CK_after_tagname, HTSRC_CK_seen_dot } html_src_check_state; PRIVATE void append_close_tag ARGS3( char*, tagname, HT_tagspec**, head, HT_tagspec**, tail) { int idx, nattr; HTTag* tag; HT_tagspec* subj; idx = html_src_tag_index(tagname); tag = HTML_dtd.tags+idx; nattr = tag->number_of_attributes; if (idx == -1) { fprintf(stderr, "internal error: previous check didn't find bad HTML tag %s", tagname); exit_immediately(-1); } subj = (HT_tagspec*) calloc( sizeof(*subj), 1); subj->element = idx; subj->present = (BOOL*)calloc( nattr*sizeof (BOOL), 1); subj->value = (char**)calloc( nattr*sizeof (char*), 1); subj->start = FALSE; #ifdef USE_COLOR_STYLE subj->class_name = NULL; #endif if (!*head) { *head = subj; *tail = subj; } else { (*tail)->next = subj; *tail = subj; } } /* this will allocate node, initialize all members, and node append to the list, possibly modifying head and modifying tail */ PRIVATE void append_open_tag ARGS4( char*, tagname, char*, classname GCC_UNUSED, HT_tagspec**, head, HT_tagspec**, tail) { HT_tagspec* subj; HTTag* tag; #ifdef USE_COLOR_STYLE int hcode; #endif append_close_tag(tagname, head, tail); /* initialize common members*/ subj = *tail; subj->start = TRUE; tag = HTML_dtd.tags+subj->element; #ifdef USE_COLOR_STYLE hcode = hash_code_lowercase_on_fly(tagname); if (classname && *classname) { # if 0 /* * we don't provide a classname as attribute of that tag, since for plain * formatting tags they are not used directly for anything except style - * and we provide style value directly. */ int class_attr_idx = 0; int n = tag->number_of_attributes; attr* attrs = tag->attributes; /*.... */ /* this is not implemented though it's easy */ # endif hcode = hash_code_aggregate_char('.', hcode); hcode = hash_code_aggregate_lower_str(classname, hcode); StrAllocCopy(subj->class_name, classname); } else { StrAllocCopy(subj->class_name,""); } subj->style = hcode; #endif } /* returns 1 if incorrect */ PUBLIC int html_src_parse_tagspec ARGS4( char*, ts, HTlexeme, lexeme, BOOL, checkonly, BOOL, isstart) { char *p = ts; char *tagstart = 0; char *tagend = 0; char *classstart; char *classend; char stop = FALSE, after_excl = FALSE; html_src_check_state state = HTSRC_CK_normal; HT_tagspec* head = NULL, *tail = NULL; HT_tagspec** slot = ( isstart ? lexeme_start : lexeme_end ) +lexeme; while (!stop) { switch (state) { case HTSRC_CK_normal: case HTSRC_CK_seen_excl: switch (*p) { case '\0': stop = TRUE; break; case ' ': case '\t': break; case '!': if (state == HTSRC_CK_seen_excl) return 1; /*second '!'*/ state = HTSRC_CK_seen_excl; after_excl = TRUE; break; default: if (isalpha(*p) || *p == '_') { tagstart = p; while (*p && ( isalnum(*p) || *p == '_') ) ++p; tagend = p; state = HTSRC_CK_after_tagname; } else return 1; continue; } break; case HTSRC_CK_after_tagname: switch (*p) { case '\0': stop = TRUE; /* FALLTHRU */ case ' ': /* FALLTHRU */ case '\t': { char save = *tagend; *tagend = '\0'; classstart = 0; if (checkonly) { int idx = html_src_tag_index(tagstart); *tagend = save; if (idx == -1) return 1; } else { if (after_excl) append_close_tag(tagstart, &head, &tail); else append_open_tag(tagstart, NULL, &head, &tail); } state = HTSRC_CK_normal; after_excl = FALSE; } break; case '.': if (after_excl) return 1; state = HTSRC_CK_seen_dot; break; default: return 1; } break; case HTSRC_CK_seen_dot: { switch (*p) { case ' ': case '\t': break; case '\0': return 1; default: { char save, save1; if ( isalpha(*p) || *p == '_' ) { classstart = p; while (*p && ( isalnum(*p) || *p == '_') ) ++p; classend = p; save = *classend; *classend = '\0'; save1 = *tagend; *tagend = '\0'; if (checkonly) { int idx = html_src_tag_index(tagstart); *tagend = save1; *classend = save; if (idx == -1) return 1; } else { append_open_tag(tagstart, classstart, &head, &tail); } state = HTSRC_CK_normal;after_excl = FALSE; continue; } else return 1; } }/*of switch(*p)*/ break; } /* of case HTSRC_CK_seen_dot: */ }/* of switch */ ++p; } if (!checkonly) *slot = head; return 0; } /*this will clean the data associated with lexeme 'l' */ PUBLIC void html_src_clean_item ARGS1( HTlexeme, l) { int i; if (HTL_tagspecs[l]) FREE(HTL_tagspecs[l]); for(i = 0; i < 2; ++i) { HT_tagspec* cur,** pts = ( i ? lexeme_start : lexeme_end)+l,*ts = *pts; *pts = NULL; while (ts) { FREE(ts->present); FREE(ts->value); #ifdef USE_COLOR_STYLE if (ts->start) { FREE(ts->class_name); } #endif cur = ts; ts = ts->next; FREE(cur); } } } /*this will be registered with atexit*/ PUBLIC void html_src_clean_data NOARGS { int i; for (i = 0; i < HTL_num_lexemes; ++i) html_src_clean_item(i); } PUBLIC void html_src_on_lynxcfg_reload NOARGS { html_src_clean_data(); HTMLSRC_init_caches(TRUE); } PUBLIC void HTMLSRC_init_caches ARGS1( BOOL, dont_exit) { int i; char* p; char buf[1000]; for (i = 0; i < HTL_num_lexemes; ++i) { /*we assume that HT_tagspecs was NULLs at when program started*/ LYstrncpy(buf, HTL_tagspecs[i] ? HTL_tagspecs[i] : HTL_tagspecs_defaults[i], sizeof(buf) - 1); StrAllocCopy(HTL_tagspecs[i], buf); if ((p = strchr(buf, ':')) != 0) *p = '\0'; if (html_src_parse_tagspec(buf, i, FALSE, TRUE) && !dont_exit ) { fprintf(stderr, "internal error while caching 1st tagspec of %d lexeme", i); exit_immediately(-1); } if (html_src_parse_tagspec( p ? p+1 : NULL , i, FALSE, FALSE) && !dont_exit) { fprintf(stderr, "internal error while caching 2nd tagspec of %d lexeme", i); exit_immediately(-1); } } } #endif /* ifdef USE_PRETTYSRC */