/* ** Functions associated with LYCharSets.c and the Lynx version of HTML.c - FM ** ========================================================================== */ #include #include #define Lynx_HTML_Handler #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern BOOL HTPassEightBitRaw; extern BOOL HTPassEightBitNum; extern BOOL HTPassHighCtrlRaw; extern BOOL HTPassHighCtrlNum; /* * Used for nested lists. - FM */ PUBLIC int OL_CONTINUE = -29999; /* flag for whether CONTINUE is set */ PUBLIC int OL_VOID = -29998; /* flag for whether a count is set */ /* ** This function converts any ampersands in allocated ** strings to "&". If isTITLE is TRUE, it also ** converts any angle-brackets to "<" or ">". - FM */ PUBLIC void LYEntify ARGS2( char **, str, BOOLEAN, isTITLE) { char *p = *str; char *q = NULL, *cp = NULL; int amps = 0, lts = 0, gts = 0; #ifdef CJK_EX enum _state { S_text, S_esc, S_dollar, S_paren, S_nonascii_text, S_dollar_paren } state = S_text; int in_sjis = 0; #endif if (p == NULL || *p == '\0') return; /* * Count the ampersands. - FM */ while ((*p != '\0') && (q = strchr(p, '&')) != NULL) { amps++; p = (q + 1); } /* * Count the left-angle-brackets, if needed. - FM */ if (isTITLE == TRUE) { p = *str; while ((*p != '\0') && (q = strchr(p, '<')) != NULL) { lts++; p = (q + 1); } } /* * Count the right-angle-brackets, if needed. - FM */ if (isTITLE == TRUE) { p = *str; while ((*p != '\0') && (q = strchr(p, '>')) != NULL) { gts++; p = (q + 1); } } /* * Check whether we need to convert anything. - FM */ if (amps == 0 && lts == 0 && gts == 0) return; /* * Allocate space and convert. - FM */ q = (char *)calloc(1, (strlen(*str) + (4 * amps) + (3 * lts) + (3 * gts) + 1)); if ((cp = q) == NULL) outofmem(__FILE__, "LYEntify"); for (p = *str; *p; p++) { #ifdef CJK_EX if (HTCJK != NOCJK) { switch(state) { case S_text: if (*p == '\033') { state = S_esc; *q++ = *p; continue; } break; case S_esc: if (*p == '$') { state = S_dollar; *q++ = *p; continue; } else if (*p == '(') { state = S_paren; *q++ = *p; continue; } else { state = S_text; *q++ = *p; continue; } case S_dollar: if (*p == '@' || *p == 'B' || *p == 'A') { state = S_nonascii_text; *q++ = *p; continue; } else if (*p == '(') { state = S_dollar_paren; *q++ = *p; continue; } else { state = S_text; *q++ = *p; continue; } case S_dollar_paren: if (*p == 'C') { state = S_nonascii_text; *q++ = *p; continue; } else { state = S_text; *q++ = *p; continue; } case S_paren: if (*p == 'B' || *p == 'J' || *p =='T') { state = S_text; *q++ = *p; continue; } else if (*p == 'I') { state = S_nonascii_text; *q++ = *p; continue; } /* FALLTHRU */ case S_nonascii_text: if (*p == '\033') state = S_esc; *q++ = *p; continue; default: break; } if (*(p+1) != '\0' && (IS_EUC((unsigned char)*p, (unsigned char)*(p+1)) || IS_SJIS((unsigned char)*p, (unsigned char)*(p+1), in_sjis) || IS_BIG5((unsigned char)*p, (unsigned char)*(p+1)))) { *q++ = *p++; *q++ = *p; continue; } } #endif if (*p == '&') { *q++ = '&'; *q++ = 'a'; *q++ = 'm'; *q++ = 'p'; *q++ = ';'; } else if (isTITLE && *p == '<') { *q++ = '&'; *q++ = 'l'; *q++ = 't'; *q++ = ';'; } else if (isTITLE && *p == '>') { *q++ = '&'; *q++ = 'g'; *q++ = 't'; *q++ = ';'; } else { *q++ = *p; } } *q = '\0'; FREE(*str); *str = cp; } /* ** This function trims characters <= that of a space (32), ** including HT_NON_BREAK_SPACE (1) and HT_EN_SPACE (2), ** but not ESC, from the heads of strings. - FM */ PUBLIC void LYTrimHead ARGS1( char *, str) { int i = 0, j; if (!str || *str == '\0') return; while (str[i] != '\0' && WHITE(str[i]) && (unsigned char)str[i] != (unsigned char)CH_ESC) /* S/390 -- gil -- 1669 */ i++; if (i > 0) { for (j = 0; str[i] != '\0'; i++) { str[j++] = str[i]; } str[j] = '\0'; } } /* ** This function trims characters <= that of a space (32), ** including HT_NON_BREAK_SPACE (1), HT_EN_SPACE (2), and ** ESC from the tails of strings. - FM */ PUBLIC void LYTrimTail ARGS1( char *, str) { int i; if (!str || *str == '\0') return; i = (strlen(str) - 1); while (i >= 0) { if (WHITE(str[i])) str[i] = '\0'; else break; i--; } } /* ** This function should receive a pointer to the start ** of a comment. It returns a pointer to the end ('>') ** character of comment, or it's best guess if the comment ** is invalid. - FM */ PUBLIC char *LYFindEndOfComment ARGS1( char *, str) { char *cp, *cp1; enum comment_state { start1, start2, end1, end2 } state; if (str == NULL) /* * We got NULL, so return NULL. - FM */ return NULL; if (strncmp(str, "