/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: html
* Author name: Zach Smith
* Create date: 18 Sep 01
* Purpose: HTML-specific output module
*----------------------------------------------------------------------
* Changes:
* 01 Aug 01, tuorfa@yahoo.com: code moved over from convert.c
* 03 Aug 01, tuorfa@yahoo.com: removed null entries to save space
* 08 Aug 01, tuorfa@yahoo.com, gommer@gmx.net: fixed/added some ANSI chars
* 18 Sep 01, tuorfa@yahoo.com: moved character sets into html.c etc
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "malloc.h"
#include "defs.h"
#include "error.h"
#include "main.h"
#include "output.h"
static char* ascii [96] = {
/* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'",
/* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/",
/* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
/* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?",
/* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
/* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
/* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
/* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_",
/* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
/* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
/* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
/* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "",
};
static char* ansi [] = {
/* 0x82 */ "‚", "ƒ",
"„", "…", "†", "‡",
"ˆ", "‰", "Š", "‹",
"Œ", NULL, NULL, NULL,
/* 0x90 */ NULL,"`","'","``","''","•","–","—",
/* 0x98 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
/* 0xa0 */ " ","¡","¢","£","¤","¥","¦","§",
/* 0xa8 */ "¨","©","ª","«","¬","­","®","¯",
/* 0xb0 */ "°", "±","²","³","´","µ","¶","·",
/* 0xb8 */ "¸","¹", "º","»", "¼", "½","¾","¿",
/* 0xc0 */ "À","Á","Â","Ã","Ä","Å","Æ","Ç",
/* 0xc8 */ "È","É","Ê","Ë","Ì","Í","Î","Ï",
/* 0xd0 */ "Ð","Ñ","Ò","Ó","Ô","Õ","Ö","×",
/* 0xd8 */ "Ø","Ù","Ú","Û","Ü","Ý","Þ","ß",
/* 0xe0 */ "à","á","â","ã","ä","å","æ","ç",
/* 0xe8 */ "è","é","ê","ë","ì","í","î","ï",
/* 0xf0 */ "ð","ñ","ò","ó","ô","õ","ö","÷",
/* 0xf8 */ "ø","ù","ú","û","ü","ý","þ","ÿ",
};
static char* mac [] = {
/* 0xa4 */ "•", NULL,NULL,NULL,NULL,NULL,NULL,NULL,
/* 0xb0 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
/* 0xc0 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
/* 0xd0 */ "—","–","“","”","&lquo;","&rquo;",
};
static char* cp437 [] = {
/* 0x80 */ "ç",
/* 0x81 */ "ü",
/* 0x82 */ "é",
/* 0x83 */ "â",
/* 0x84 */ "ä",
/* 0x85 */ "à",
/* 0x86 */ "å",
/* 0x87 */ "ç",
/* 0x88 */ "ê",
/* 0x89 */ "ë",
/* 0x8a */ "è",
/* 0x8b */ "ï",
/* 0x8c */ "î",
/* 0x8d */ "ì",
/* 0x8e */ "ä",
/* 0x8f */ "å",
/* 0x90 */ "é",
/* 0x91 */ "æ",
/* 0x92 */ "æ",
/* 0x93 */ "ô",
/* 0x94 */ "ö",
/* 0x95 */ "ò",
/* 0x96 */ "û",
/* 0x97 */ "ù",
/* 0x98 */ "ÿ",
/* 0x99 */ "ö",
/* 0x9a */ "ü",
/* 0x9b */ "¢",
/* 0x9c */ "£",
/* 0x9d */ "¥",
/* 0x9e */ "₧", /* peseta */
/* 0x9f */ "ƒ", /* small f with hook */
/* 0xa0 */ "á",
/* 0xa1 */ "í",
/* 0xa2 */ "ó",
/* 0xa3 */ "ú",
/* 0xa4 */ "ñ",
/* 0xa5 */ "ñ",
/* 0xa6 */ "ª",
/* 0xa7 */ "¼",
/* 0xa8 */ "¿",
/* 0xa9 */ "⌐", /* reversed not */
/* 0xaa */ "¬",
/* 0xab */ "½",
/* 0xac */ "»",
/* 0xad */ "¡",
/* 0xae */ "«",
/* 0xaf */ "º",
/* 0xb0 */ "░", /* light shade */
/* 0xb1 */ "▒", /* med. shade */
/* 0xb2 */ "▓", /* dark shade */
/* 0xb3 */ "│", /* box-draw light vert. */
/* 0xb4 */ "┤", /* box-draw light vert. + lt. */
/* 0xb5 */ "╡", /* box-draw vert. sgl. + lt. dbl. */
/* 0xb6 */ "╢", /* box-draw vert. dbl. + lt. sgl. */
/* 0xb7 */ "╖", /* box-draw dn. dbl. + lt. sgl. */
/* 0xb8 */ "╕", /* box-draw dn. sgl. + lt. dbl. */
/* 0xb9 */ "╣", /* box-draw dbl. vert. + lt. */
/* 0xba */ "║", /* box-draw dbl. vert. */
/* 0xbb */ "╗", /* box-draw dbl. dn. + lt. */
/* 0xbc */ "╝", /* box-draw dbl. up + lt. */
/* 0xbd */ "╜", /* box-draw up dbl. + lt. sgl. */
/* 0xbe */ "╛", /* box-draw up sgl. + lt. dbl. */
/* 0xbf */ "┐", /* box-draw light dn. + lt. */
/* 0xc0 */ "└", /* box-draw light up + rt. */
/* 0xc1 */ "┴", /* box-draw light up + horiz. */
/* 0xc2 */ "┬", /* box-draw light dn. + horiz. */
/* 0xc3 */ "├", /* box-draw light vert. + rt. */
/* 0xc4 */ "─", /* box-draw light horiz. */
/* 0xc5 */ "┼", /* box-draw light vert. + horiz. */
/* 0xc6 */ "╞", /* box-draw vert. sgl. + rt. dbl. */
/* 0xc7 */ "╟", /* box-draw vert. dbl. + rt. sgl. */
/* 0xc8 */ "╚", /* box-draw dbl. up + rt. */
/* 0xc9 */ "╔", /* box-draw dbl. dn. + rt. */
/* 0xca */ "╩", /* box-draw dbl. up + horiz. */
/* 0xcb */ "╦", /* box-draw dbl. dn. + horiz. */
/* 0xcc */ "╠", /* box-draw dbl. vert. + rt. */
/* 0xcd */ "═", /* box-draw dbl. horiz. */
/* 0xce */ "╬", /* box-draw dbl. vert. + horiz. */
/* 0xcf */ "╧", /* box-draw up sgl. + horiz. dbl. */
/* 0xd0 */ "╨", /* box-draw up dbl. + horiz. sgl. */
/* 0xd1 */ "╤", /* box-draw dn. sgl. + horiz. dbl. */
/* 0xd2 */ "╥", /* box-draw dn. dbl. + horiz. sgl. */
/* 0xd3 */ "╙", /* box-draw up dbl. + rt. sgl. */
/* 0xd4 */ "╘", /* box-draw up sgl. + rt. dbl. */
/* 0xd5 */ "╒", /* box-draw dn. sgl. + rt. dbl. */
/* 0xd6 */ "╓", /* box-draw dn. dbl. + rt. sgl. */
/* 0xd7 */ "╫", /* box-draw vert. dbl. + horiz. sgl. */
/* 0xd8 */ "╪", /* box-draw vert. sgl. + horiz. dbl. */
/* 0xd9 */ "┘", /* box-draw light up + lt. */
/* 0xda */ "┌", /* box-draw light dn. + rt. */
/* 0xdb */ "█", /* full block */
/* 0xdc */ "▄", /* lower 1/2 block */
/* 0xdd */ "▌", /* lt. 1/2 block */
/* 0xde */ "▐", /* rt. 1/2 block */
/* 0xdf */ "▀", /* upper 1/2 block */
/* 0xe0 */ "α", /* greek small alpha */
/* 0xe1 */ "ß",
/* 0xe2 */ "Γ", /* greek cap gamma */
/* 0xe3 */ "π", /* greek small pi */
/* 0xe4 */ "Σ", /* greek cap sigma */
/* 0xe5 */ "σ", /* greek small sigma */
/* 0xe6 */ "µ",
/* 0xe7 */ "τ", /* greek small tau */
/* 0xe8 */ "Φ", /* greek cap phi */
/* 0xe9 */ "Θ", /* greek cap theta */
/* 0xea */ "Ω", /* greek cap omega */
/* 0xeb */ "δ", /* greek small delta */
/* 0xec */ "∞", /* inf. */
/* 0xed */ "φ", /* greek small phi */
/* 0xee */ "ε", /* greek small epsilon */
/* 0xef */ "∩", /* intersect */
/* 0xf0 */ "≡", /* identical */
/* 0xf1 */ "±",
/* 0xf2 */ "≥", /* greater-than or equal to */
/* 0xf3 */ "≤", /* less-than or equal to */
/* 0xf4 */ "⌠", /* top 1/2 integral */
/* 0xf5 */ "⌡", /* bottom 1/2 integral */
/* 0xf6 */ "÷",
/* 0xf7 */ "≈", /* almost = */
/* 0xf8 */ "+",
/* 0xf9 */ "∙", /* bullet op */
/* 0xfa */ "·",
/* 0xfb */ "√", /* sqrt */
/* 0xfc */ "ⁿ", /* super-script small n */
/* 0xfd */ "²",
/* 0xfe */ "■", /* black square */
/* 0xff */ " ",
};
static char* cp850 [] = {
/* 0x80 */ "ç",
/* 0x81 */ "ü",
/* 0x82 */ "é",
/* 0x83 */ "â",
/* 0x84 */ "ä",
/* 0x85 */ "à",
/* 0x86 */ "å",
/* 0x87 */ "ç",
/* 0x88 */ "ê",
/* 0x89 */ "ë",
/* 0x8a */ "è",
/* 0x8b */ "ï",
/* 0x8c */ "î",
/* 0x8d */ "ì",
/* 0x8e */ "ä",
/* 0x8f */ "å",
/* 0x90 */ "é",
/* 0x91 */ "æ",
/* 0x92 */ "æ",
/* 0x93 */ "ô",
/* 0x94 */ "ö",
/* 0x95 */ "ò",
/* 0x96 */ "û",
/* 0x97 */ "ù",
/* 0x98 */ "ÿ",
/* 0x99 */ "ö",
/* 0x9a */ "ü",
/* 0x9b */ "ø",
/* 0x9c */ "£",
/* 0x9d */ "ø",
/* 0x9e */ "×",
/* 0x9f */ "ƒ", /* small f with hook */
/* 0xa0 */ "á",
/* 0xa1 */ "í",
/* 0xa2 */ "ó",
/* 0xa3 */ "ú",
/* 0xa4 */ "ñ",
/* 0xa5 */ "ñ",
/* 0xa6 */ "ª",
/* 0xa7 */ "¼",
/* 0xa8 */ "¿",
/* 0xa9 */ "®",
/* 0xaa */ "¬",
/* 0xab */ "½",
/* 0xac */ "»",
/* 0xad */ "¡",
/* 0xae */ "«",
/* 0xaf */ "º",
/* 0xb0 */ "░", /* light shade */
/* 0xb1 */ "▒", /* med. shade */
/* 0xb2 */ "▓", /* dark shade */
/* 0xb3 */ "│", /* box-draw light vert. */
/* 0xb4 */ "┤", /* box-draw light vert. + lt. */
/* 0xb5 */ "á",
/* 0xb6 */ "â",
/* 0xb7 */ "à",
/* 0xb8 */ "©",
/* 0xb9 */ "╣", /* box-draw dbl. vert. + lt. */
/* 0xba */ "║", /* box-draw dbl. vert. */
/* 0xbb */ "╗", /* box-draw dbl. dn. + lt. */
/* 0xbc */ "╝", /* box-draw dbl. up + lt. */
/* 0xbd */ "¢",
/* 0xbe */ "¥",
/* 0xbf */ "┐", /* box-draw light dn. + lt. */
/* 0xc0 */ "└", /* box-draw light up + rt. */
/* 0xc1 */ "┴", /* box-draw light up + horiz. */
/* 0xc2 */ "┬", /* box-draw light dn. + horiz. */
/* 0xc3 */ "├", /* box-draw light vert. + rt. */
/* 0xc4 */ "─", /* box-draw light horiz. */
/* 0xc5 */ "┼", /* box-draw light vert. + horiz. */
/* 0xc6 */ "ã",
/* 0xc7 */ "ã",
/* 0xc8 */ "╚", /* box-draw dbl. up + rt. */
/* 0xc9 */ "╔", /* box-draw dbl. dn. + rt. */
/* 0xca */ "╩", /* box-draw dbl. up + horiz. */
/* 0xcb */ "╦", /* box-draw dbl. dn. + horiz. */
/* 0xcc */ "╠", /* box-draw dbl. vert. + rt. */
/* 0xcd */ "═", /* box-draw dbl. horiz. */
/* 0xce */ "╬", /* box-draw dbl. vert. + horiz. */
/* 0xcf */ "¤",
/* 0xd0 */ "ð",
/* 0xd1 */ "ð",
/* 0xd2 */ "ê",
/* 0xd3 */ "ë",
/* 0xd4 */ "è",
/* 0xd5 */ "ı", /* small dotless i */
/* 0xd6 */ "í",
/* 0xd7 */ "î",
/* 0xd8 */ "ï",
/* 0xd9 */ "┘", /* box-draw light up + lt. */
/* 0xda */ "┌", /* box-draw light dn. + rt. */
/* 0xdb */ "█", /* full-block */
/* 0xdc */ "▄", /* lower 1/2 block */
/* 0xdd */ "¦",
/* 0xde */ "ì",
/* 0xdf */ "▀", /* upper 1/2 block */
/* 0xe0 */ "ó",
/* 0xe1 */ "ß",
/* 0xe2 */ "ô",
/* 0xe3 */ "ò",
/* 0xe4 */ "õ",
/* 0xe5 */ "õ",
/* 0xe6 */ "µ",
/* 0xe7 */ "þ",
/* 0xe8 */ "þ",
/* 0xe9 */ "ú",
/* 0xea */ "û",
/* 0xeb */ "ù",
/* 0xec */ "ý",
/* 0xed */ "ý",
/* 0xee */ "¯",
/* 0xef */ "´",
/* 0xf0 */ "­",
/* 0xf1 */ "±",
/* 0xf2 */ "‗", /* dbl. lowline */
/* 0xf3 */ "¾",
/* 0xf4 */ "¶",
/* 0xf5 */ "§",
/* 0xf6 */ "÷",
/* 0xf7 */ "¸",
/* 0xf8 */ "+",
/* 0xf9 */ "¨",
/* 0xfa */ "·",
/* 0xfb */ "¹",
/* 0xfc */ "³",
/* 0xfd */ "²",
/* 0xfe */ "■", /* black square */
/* 0xff */ " ",
};
/*========================================================================
* Name: html_init
* Purpose: Generates the HTML output personality.
* Args: None.
* Returns: OutputPersonality.
*=======================================================================*/
OutputPersonality *
html_init (void)
{
OutputPersonality* op;
op = op_create();
op->comment_begin = "<!--- ";
op->comment_end = " --->\n";
op->document_begin = "<html>\n";
op->document_end = "</html>\n";
op->header_begin = "<head>\n";
op->header_end = "</head>\n";
op->document_title_begin = "<title>";
op->document_title_end = "</title>\n";
op->document_author_begin = "<!--author: ";
op->document_author_end = "--->\n";
op->document_changedate_begin = "<!--changed: ";
op->document_changedate_end = "--->\n";
op->body_begin = "<body>";
op->body_end = "</body>\n";
op->paragraph_begin = "<p>";
op->paragraph_end = "</p>\n";
op->center_begin = "<center>";
op->center_end = "</center>\n";
op->justify_begin = "<div align=justify>\n";
op->justify_end = "</div>\n";
op->align_left_begin = "<div align=left>\n";
op->align_left_end = "</div>\n";
op->align_right_begin = "<div align=right>\n";
op->align_right_end = "</div>\n";
op->forced_space = " ";
op->line_break = "<br>\n";
op->page_break = "<p><hr><p>\n";
op->hyperlink_begin = "<a href=\"";
op->hyperlink_end = "\">hyperlink</a>";
op->imagelink_begin = "<img src=\"";
op->imagelink_end = "\">";
op->table_begin = "<table border=2>\n";
op->table_end = "</table>\n";
op->table_row_begin = "<tr>";
op->table_row_end = "</tr>\n";
op->table_cell_begin = "<td>";
op->table_cell_end = "</td>\n";
/* Character attributes */
op->font_begin = "<font face=\"%s\">";
op->font_end = "</font>";
op->fontsize_begin = "<span style=\"font-size:%spt\">";
op->fontsize_end = "</span>";
op->fontsize8_begin = "<font size=1>";
op->fontsize8_end = "</font>";
op->fontsize10_begin = "<font size=2>";
op->fontsize10_end = "</font>";
op->fontsize12_begin = "<font size=3>";
op->fontsize12_end = "</font>";
op->fontsize14_begin = "<font size=4>";
op->fontsize14_end = "</font>";
op->fontsize18_begin = "<font size=5>";
op->fontsize18_end = "</font>";
op->fontsize24_begin = "<font size=6>";
op->fontsize24_end = "</font>";
op->smaller_begin = "<small>";
op->smaller_end = "</small>";
op->bigger_begin = "<big>";
op->bigger_end = "</big>";
op->foreground_begin = "<font color=\"%s\">";
op->foreground_end = "</font>";
op->background_begin = "<span style=\"background:%s\">";
op->background_end = "</span>";
op->bold_begin = "<b>";
op->bold_end = "</b>";
op->italic_begin = "<i>";
op->italic_end = "</i>";
op->underline_begin = "<u>";
op->underline_end = "</u>";
op->dbl_underline_begin = "<u>";
op->dbl_underline_end = "</u>";
op->superscript_begin = "<sup>";
op->superscript_end = "</sup>";
op->subscript_begin = "<sub>";
op->subscript_end = "</sub>";
op->strikethru_begin = "<s>";
op->strikethru_end = "</s>";
op->dbl_strikethru_begin = "<s>";
op->dbl_strikethru_end = "</s>";
op->emboss_begin="<span style=\"background:gray\"><font color=black>";
op->emboss_end = "</font></span>";
op->engrave_begin = "<span style=\"background:gray\"><font color=navyblue>";
op->engrave_end = "</font></span>";
op->shadow_begin= "<span style=\"background:gray\">";
op->shadow_end= "</span>";
op->outline_begin= "<span style=\"background:gray\">";
op->outline_end= "</span>";
op->expand_begin = "<span style=\"letter-spacing: %s\">";
op->expand_end = "</span>";
op->pointlist_begin = "<ol>\n";
op->pointlist_end = "</ol>\n";
op->pointlist_item_begin = "<li>";
op->pointlist_item_end = "</li>\n";
op->numericlist_begin = "<ul>\n";
op->numericlist_end = "</ul>\n";
op->numericlist_item_begin = "<li>";
op->numericlist_item_end = "</li>\n";
op->simulate_small_caps = TRUE;
op->simulate_all_caps = TRUE;
op->simulate_word_underline = TRUE;
op->ascii_translation_table = ascii;
op->ansi_translation_table = ansi;
op->ansi_first_char = 0x82;
op->ansi_last_char = 0xff;
op->cp437_translation_table = cp437;
op->cp437_first_char = 0x80;
op->cp437_last_char = 0xff;
op->cp850_translation_table = cp850;
op->cp850_first_char = 0x80;
op->cp850_last_char = 0xff;
op->mac_translation_table = mac;
op->mac_first_char = 0xa4;
op->mac_last_char = 0xd5;
op->chars.right_quote = "'";
op->chars.left_quote = "`";
op->chars.right_dbl_quote = "''";
op->chars.left_dbl_quote = "``";
return op;
}