diff options
author | Brent Smith <bmsmith@src.gnome.org> | 2006-01-09 05:01:01 +0000 |
---|---|---|
committer | Brent Smith <bmsmith@src.gnome.org> | 2006-01-09 05:01:01 +0000 |
commit | 372d4c2d1b966df692e7c6b3b9a88dcc820f1ff2 (patch) | |
tree | 3ebc136059fc6f80f3af40bbbb45e8e9774a697b | |
parent | cc67df20f8e36662d72c9a229c2933d1338e60cc (diff) | |
download | yelp-372d4c2d1b966df692e7c6b3b9a88dcc820f1ff2.tar.gz |
Brent Smith <gnome@nextreality.net>
Brent Smith <gnome@nextreality.net>
* src/yelp-man-parser.c: (yelp_man_parser_parse_file),
(yelp_man_parser_free), (parser_parse_line), (args_concat_all),
(macro_ignore_handler), (macro_bold_small_italic_handler),
(macro_roman_bold_small_italic_handler),
(macro_new_paragraph_handler), (macro_insert_self_handler),
(macro_title_header_handler), (macro_section_header_handler),
(macro_spacing_handler), (macro_define_handler),
(macro_tp_handler), (macro_ip_handler),
(macro_hanging_paragraph_handler), (macro_mandoc_handler),
(macro_url_handler), (macro_rs_re_handler),
(macro_mandoc_list_handler), (macro_verbatim_handler),
(is_mandoc_manual_macro_parsed), (is_mandoc_manual_macro_callable),
(macro_mandoc_utility_handler), (macro_mandoc_listitem_handler),
(macro_template_handler), (parser_handle_linetag),
(parser_read_until), (parser_escape_tags),
(parser_append_given_text_handle_escapes), (parser_append_node),
(parser_append_node_attr), (parser_parse_table):
* stylesheets/man2html.xsl:
man page parser rework, fixes #163404, #321620, #321627, #324750,
#326058
-rw-r--r-- | ChangeLog | 23 | ||||
-rw-r--r-- | src/yelp-man-parser.c | 1655 | ||||
-rw-r--r-- | stylesheets/man2html.xsl | 157 |
3 files changed, 1292 insertions, 543 deletions
@@ -1,3 +1,26 @@ +2006-01-08 Brent Smith <gnome@nextreality.net> + + * src/yelp-man-parser.c: (yelp_man_parser_parse_file), + (yelp_man_parser_free), (parser_parse_line), (args_concat_all), + (macro_ignore_handler), (macro_bold_small_italic_handler), + (macro_roman_bold_small_italic_handler), + (macro_new_paragraph_handler), (macro_insert_self_handler), + (macro_title_header_handler), (macro_section_header_handler), + (macro_spacing_handler), (macro_define_handler), + (macro_tp_handler), (macro_ip_handler), + (macro_hanging_paragraph_handler), (macro_mandoc_handler), + (macro_url_handler), (macro_rs_re_handler), + (macro_mandoc_list_handler), (macro_verbatim_handler), + (is_mandoc_manual_macro_parsed), (is_mandoc_manual_macro_callable), + (macro_mandoc_utility_handler), (macro_mandoc_listitem_handler), + (macro_template_handler), (parser_handle_linetag), + (parser_read_until), (parser_escape_tags), + (parser_append_given_text_handle_escapes), (parser_append_node), + (parser_append_node_attr), (parser_parse_table): + * stylesheets/man2html.xsl: + man page parser rework, fixes #163404, #321620, #321627, #324750, + #326058 + 2006-01-08 Shaun McCance <shaunm@gnome.org> * src/yelp-utils.c: diff --git a/src/yelp-man-parser.c b/src/yelp-man-parser.c index 9a62ddb3..f1d9ad03 100644 --- a/src/yelp-man-parser.c +++ b/src/yelp-man-parser.c @@ -26,6 +26,7 @@ #include <glib.h> #include <glib/gi18n.h> +#include <glib/gprintf.h> #include <libxml/tree.h> #include <string.h> @@ -40,25 +41,30 @@ static void parser_parse_line (YelpManParser *parser); static void parser_handle_linetag (YelpManParser *parser); -static void parser_handle_inline (YelpManParser *parser); static void parser_ensure_P (YelpManParser *parser); static void parser_read_until (YelpManParser *parser, gchar delim); static void parser_escape_tags (YelpManParser *parser, gchar **tags, gint ntags); -static void parser_append_token (YelpManParser *parser); static xmlNodePtr parser_append_text (YelpManParser *parser); static xmlNodePtr parser_append_given_text (YelpManParser *parser, gchar *text); +static void parser_append_given_text_handle_escapes + (YelpManParser *parser, + gchar *text, + gboolean make_links); static xmlNodePtr parser_append_node (YelpManParser *parser, gchar *name); +static xmlNodePtr parser_append_node_attr (YelpManParser *parser, + gchar *name, + gchar *attr, + gchar *value); static void parser_stack_push_node (YelpManParser *parser, xmlNodePtr node); static xmlNodePtr parser_stack_pop_node (YelpManParser *parser, gchar *name); static void parser_parse_table (YelpManParser *parser); -static void parser_make_link (YelpManParser *parser); typedef struct _StackElem StackElem; struct _YelpManParser { @@ -73,11 +79,10 @@ struct _YelpManParser { gchar *anc; /* The anchor point in the document */ gchar *cur; /* Our current position in the document */ - gboolean make_links; /* Allow auto-generated hyperlinks to be disabled. */ - gboolean ignore; /* when true, ignore stream until "token" is found */ - gchar *token; /* see ignore flag; we ignore the parsing stream until * this string is found in the stream */ + gboolean make_links; /* Allow auto-generated hyperlinks to be disabled. */ + gboolean ignore; /* when true, ignore stream until "token" is found */ GSList *nodeStack; }; @@ -94,6 +99,8 @@ xmlDocPtr yelp_man_parser_parse_file (YelpManParser *parser, gchar *file) { + GError **errormsg = NULL; + parser->channel = yelp_io_channel_new_file (file, NULL); if (!parser->channel) @@ -108,7 +115,7 @@ yelp_man_parser_parse_file (YelpManParser *parser, while (g_io_channel_read_line (parser->channel, &(parser->buffer), (gsize *) &(parser->length), - NULL, NULL) + NULL, errormsg) == G_IO_STATUS_NORMAL) { parser_parse_line (parser); @@ -116,6 +123,9 @@ yelp_man_parser_parse_file (YelpManParser *parser, g_free (parser->buffer); } + if (errormsg) + g_print ("Error in g_io_channel_read_line()\n"); + g_io_channel_shutdown (parser->channel, FALSE, NULL); return parser->doc; @@ -147,7 +157,9 @@ yelp_man_parser_parse_doc (YelpManParser *parser, void yelp_man_parser_free (YelpManParser *parser) { - g_io_channel_unref (parser->channel); + if (parser->channel) + g_io_channel_unref (parser->channel); + g_free (parser); } @@ -163,8 +175,8 @@ parser_parse_line (YelpManParser *parser) { gchar *ptr; ptr = strstr (parser->buffer, parser->token); if (ptr != NULL) { - parser->cur = (ptr+2); - parser->anc = parser->cur; + while (PARSER_CUR) + parser->anc = ++parser->cur; g_free (parser->token); parser->ignore = FALSE; } else { @@ -202,349 +214,950 @@ parser_parse_line (YelpManParser *parser) { } } -static void -parser_handle_linetag (YelpManParser *parser) { - gchar c, *str; - xmlNodePtr tmpNode; +/* creates a single string from all the macro arguments */ +static gchar * +args_concat_all (GSList *args) +{ + GSList *ptr = NULL; + gchar **str_array = NULL; + gchar *retval = NULL; + gint i = 0; + + if (!args) + return NULL; - while (PARSER_CUR - && *(parser->cur) != ' ' - && ( (*parser->cur != '\\') || ((*parser->cur == '\\') && (*(parser->cur+1) == '\"')) ) - && *(parser->cur) != '\n') - parser->cur++; + str_array = g_malloc0 ((sizeof (gchar *)) * (g_slist_length (args)+1) ); - c = *(parser->cur); - *(parser->cur) = '\0'; + ptr = args; + while (ptr && ptr->data) { + str_array[i++] = ptr->data; + ptr = g_slist_next (ptr); + } + + str_array[i] = NULL; - /* skip the '.' by adding 1 */ - str = g_strdup (parser->anc + 1); - *(parser->cur) = c; + retval = g_strjoinv (" ", str_array); - if (*(parser->cur) == ' ') - parser->cur++; - parser->anc = parser->cur; + g_free (str_array); - /* \" denotes a comment, ignore it */ - if (g_str_equal (str, "\\\"")) { - while (PARSER_CUR) - parser->anc = ++parser->cur; + return retval; +} + +/* handler to ignore a macro by reading until the null character */ +static void +macro_ignore_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + //g_print ("ignoring..."); + + while (PARSER_CUR) { + parser->anc = ++parser->cur; + //g_print ("-"); } - /* handle bold, italic, and small macros */ - else if (g_str_equal (str, "B") || g_str_equal (str, "I") || - g_str_equal (str, "SM")) { - parser_ensure_P (parser); - parser->ins = parser_append_node (parser, str); + + //g_print ("\n"); +} + +static void +macro_bold_small_italic_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + gchar *str = NULL; + + parser_ensure_P (parser); + parser->ins = parser_append_node (parser, macro); + + if (args && args->data) { + str = args_concat_all (args); + parser_append_given_text_handle_escapes (parser, str, TRUE); g_free (str); + } + + parser->ins = parser->ins->parent; +} + +static void +macro_roman_bold_small_italic_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + GSList *ptr = NULL; + gchar a[2], b[2]; + gboolean toggle = TRUE; + + a[0] = macro[0]; + b[0] = macro[1]; + a[1] = b[1] = '\0'; - parser_append_token (parser); + parser_ensure_P (parser); + + ptr = args; + while (ptr && ptr->data) { + if (toggle) + parser->ins = parser_append_node (parser, a); + else + parser->ins = parser_append_node (parser, b); + + parser_append_given_text_handle_escapes (parser, ptr->data, TRUE); parser->ins = parser->ins->parent; + + toggle = (toggle) ? 0 : 1; + ptr = g_slist_next (ptr); } - /* handle roman italic, bold italic, and roman bold macros */ - else if (g_str_equal (str, "IR") || g_str_equal (str, "RI") || - g_str_equal (str, "IB") || g_str_equal (str, "BI") || - g_str_equal (str, "RB") || g_str_equal (str, "BR") ) { +} - gchar a[2], b[2]; - a[0] = str[0]; b[0] = str[1]; a[1] = b[1] = '\0'; +static void +macro_new_paragraph_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + xmlNodePtr tmpNode; + + /* Clean up from 'lists'. If this is null we don't care. */ + tmpNode = parser_stack_pop_node (parser, "IP"); + + tmpNode = parser_stack_pop_node (parser, "P"); + if (tmpNode != NULL) { + parser->ins = tmpNode->parent; + } - parser_ensure_P (parser); + parser_ensure_P (parser); +} - while (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, a); - parser_append_token (parser); - parser->ins = parser->ins->parent; - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, b); - parser_append_token (parser); - parser->ins = parser->ins->parent; - } else break; - } - } - /* all these are to start a new paragraph */ - else if (g_str_equal (str, "P") || g_str_equal (str, "PP") || - g_str_equal (str, "LP") || g_str_equal (str, "Pp")) { +static void +macro_insert_self_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + parser_append_node (parser, macro); +} - /* Clean up from 'lists'. If this is null we don't care. */ - tmpNode = parser_stack_pop_node (parser, "IP"); +static void +macro_title_header_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + GSList *ptr = NULL; + gchar *fields[5] = { "Title", "Section", "Date", "Commentary", "Name" }; + gint i; - tmpNode = parser_stack_pop_node (parser, "P"); - if (tmpNode != NULL) { - parser->ins = tmpNode->parent; + parser->ins = parser_append_node (parser, macro); + + ptr = args; + for (i=0; i < 5; i++) { + if (ptr && ptr->data) { + parser->ins = parser_append_node (parser, fields[i]); + parser_append_given_text_handle_escapes (parser, ptr->data, FALSE); + parser->ins = parser->ins->parent; + ptr = g_slist_next (ptr); + } else + break; + } + + parser->ins = parser->ins->parent; +} + +static void +macro_section_header_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + static gint id = 0; + GIOStatus retval; + GError **errormsg = NULL; + gchar *str = NULL; + gchar *macro_uc = g_strdup (macro); + gchar *ptr; + gchar idval[20]; + + if (!args) { + retval = g_io_channel_read_line (parser->channel, + &str, + NULL, NULL, errormsg); + if (retval != G_IO_STATUS_NORMAL) { + g_warning ("g_io_channel_read_line != G_IO_STATUS_NORMAL\n"); } + } else + str = args_concat_all (args); - parser_ensure_P (parser); + for (ptr = macro_uc; *ptr != '\0'; ptr++) + *ptr = g_ascii_toupper (*ptr); + + parser_stack_pop_node (parser, "IP"); + + g_snprintf (idval, 20, "%d", ++id); + + /* Sections should be their own, well, section */ + parser->ins = xmlDocGetRootElement (parser->doc); + parser->ins = parser_append_node_attr (parser, macro_uc, "id", idval); + parser_append_given_text_handle_escapes (parser, str, FALSE); + parser->ins = parser->ins->parent; + + if (str) + g_free (str); +} + +static void +macro_spacing_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + parser->ins = parser_append_node (parser, macro); + + if (args && args->data) { + parser->ins = parser_append_node (parser, "Count"); + parser_append_given_text (parser, args->data); + parser->ins = parser->ins->parent; } - /* this should just be a line break */ - else if (g_str_equal (str, "br")) { - parser_append_node (parser, str); + + parser->ins = parser->ins->parent; +} + +/* this is used to define or redefine a macro until ".." + * is reached. */ +static void +macro_define_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + parser->ignore = TRUE; + parser->token = g_strdup(".."); +} + +static void +macro_tp_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + xmlNodePtr tmpNode = NULL; + GError **errormsg = NULL; + + tmpNode = parser_stack_pop_node (parser, "IP"); + + if (tmpNode != NULL) + parser->ins = tmpNode->parent; + + parser->ins = parser_append_node (parser, "IP"); + + if (args && args->data) { + parser->ins = parser_append_node (parser, "Indent"); + parser_append_given_text (parser, args->data); + parser->ins = parser->ins->parent; } - else if (g_str_equal (str, "sp")) { - parser->ins = parser_append_node (parser, str); - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Count"); - parser_append_token (parser); - parser->ins = parser->ins->parent; - } + g_free (parser->buffer); + if (g_io_channel_read_line (parser->channel, + &(parser->buffer), + (gsize *)&(parser->length), + NULL, errormsg) + == G_IO_STATUS_NORMAL) { + parser->ins = parser_append_node (parser, "Tag"); + parser_parse_line (parser); parser->ins = parser->ins->parent; } - else if (g_str_equal (str, "SH") || g_str_equal (str, "SS") || - g_str_equal (str, "Sh") || g_str_equal (str, "Ss")) { - parser_stack_pop_node (parser, "IP"); - /* Sections should be their own, well, section */ - parser->ins = xmlDocGetRootElement (parser->doc); + parser_stack_push_node (parser, parser->ins); +} + +static void +macro_ip_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + xmlNodePtr tmpNode; + + tmpNode = parser_stack_pop_node (parser, "IP"); + + if (tmpNode != NULL) + parser->ins = tmpNode->parent; - while (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, str); - /* FIXME: man seems to take all arguments until the end of the line, -B.S. */ - parser_append_token (parser); + parser->ins = parser_append_node (parser, macro); + + if (args && args->data) { + parser->ins = parser_append_node (parser, "Tag"); + parser_append_given_text_handle_escapes (parser, args->data, TRUE); + parser->ins = parser->ins->parent; + + if (args->next && args->next->data) { + parser->ins = parser_append_node (parser, "Indent"); + parser_append_given_text_handle_escapes (parser, args->next->data, TRUE); parser->ins = parser->ins->parent; } } - else if (g_str_equal (str, "TH")) { - parser->ins = parser_append_node (parser, str); - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Title"); - parser_append_token (parser); - parser->ins = parser->ins->parent; - } - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Section"); - parser_append_token (parser); - parser->ins = parser->ins->parent; - } - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Date"); - parser_append_token (parser); - parser->ins = parser->ins->parent; - } - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Commentary"); - parser_append_token (parser); - parser->ins = parser->ins->parent; - } - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Name"); - parser_append_token (parser); - parser->ins = parser->ins->parent; - } + parser_stack_push_node (parser, parser->ins); +} + +static void +macro_hanging_paragraph_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + parser_stack_pop_node (parser, "IP"); + parser->ins = parser_append_node (parser, macro); + + if (args && args->data) { + parser->ins = parser_append_node (parser, "Indent"); + parser_append_given_text (parser, args->data); parser->ins = parser->ins->parent; } - /* Begin paragraph with hanging tag. */ - else if (g_str_equal (str, "TP")) { - tmpNode = parser_stack_pop_node (parser, "IP"); +} - if (tmpNode != NULL) - parser->ins = tmpNode->parent; +/* BSD mandoc macros + * Since mandoc man pages are required to begin with Dd, Dt, Os, + * we will use this to create the TH tag. + */ +static void +macro_mandoc_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + gchar *str = NULL; + + if (g_str_equal (macro, "Dd")) { + parser->ins = parser_append_node (parser, "TH"); - parser->ins = parser_append_node (parser, "IP"); - g_free (str); + str = args_concat_all (args); - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Indent"); - parser_read_until (parser, '\n'); - parser_append_token (parser); + if (args && args->data) { + parser->ins = parser_append_node (parser, "Date"); + parser_append_given_text (parser, str); parser->ins = parser->ins->parent; } - g_free (parser->buffer); + g_free (str); + } + else if (g_str_equal (macro, "Dt")) { + if (args && args->data) { + parser->ins = parser_append_node (parser, "Title"); + parser_append_given_text (parser, args->data); + parser->ins = parser->ins->parent; + } - if (g_io_channel_read_line (parser->channel, - &(parser->buffer), - (gsize *) &(parser->length), - NULL, NULL) - == G_IO_STATUS_NORMAL) { - parser->ins = parser_append_node (parser, "Tag"); - parser_parse_line (parser); - parser->ins = parser->ins->parent; + if (args && args->next && args->next->data) { + parser->ins = parser_append_node (parser, "Section"); + parser_append_given_text (parser, args->next->data); + parser->ins = parser->ins->parent; } + } + else if (g_str_equal (macro, "Os")) { + if (args && args->data) { + parser->ins = parser_append_node (parser, "Os"); + parser_append_given_text (parser, args->data); + parser->ins = parser->ins->parent; + } - parser_stack_push_node (parser, parser->ins); - } - /* indented paragraph, with optional hanging indent */ - else if (g_str_equal (str, "IP")) { - tmpNode = parser_stack_pop_node (parser, "IP"); - - if (tmpNode != NULL) - parser->ins = tmpNode->parent; - - parser->ins = parser_append_node (parser, str); - g_free (str); + /* Leave the TH tag */ + parser->ins = parser->ins->parent; + } +} - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Tag"); +static void +macro_url_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + xmlNodePtr tmpNode = NULL; + + if (g_str_equal (macro, "UR")) { + /* If someone wants to do automatic hyperlink wizardry outside + * for the parser, then this should instead generate a tag. + */ + if (args && args->data) { + if (g_str_equal (args->data, ":")) + parser->make_links = FALSE; + else { + parser->ins = parser_append_node (parser, macro); + + parser_stack_push_node (parser, parser->ins); + + parser->ins = parser_append_node (parser, "URI"); + parser_append_given_text (parser, args->data); + parser->ins = parser->ins->parent; + } + } + } + else if (g_str_equal (macro, "UE")) { + + if (parser->make_links) { + tmpNode = parser_stack_pop_node (parser, "UR"); - parser_append_token (parser); - parser->ins = parser->ins->parent; + if (tmpNode == NULL) + d (g_warning ("Found unexpected tag: '%s'\n", macro)); + else + parser->ins = tmpNode->parent; + } else + parser->make_links = TRUE; + + } + else if (g_str_equal (macro, "UN")) { - parser->ins = parser_append_node (parser, "Indent"); - parser_read_until (parser, '\n'); - parser_append_token (parser); - parser->ins = parser->ins->parent; + if (args && args->data) { + parser->ins = parser_append_node (parser, macro); + parser_append_given_text (parser, args->data); + parser->ins = parser->ins->parent; } - - parser_stack_push_node (parser, parser->ins); + } - /* hanging paragraph */ - else if (g_str_equal (str, "HP")) { - parser_stack_pop_node (parser, "IP"); +} - parser->ins = parser_append_node (parser, str); - g_free (str); +/* relative margin indent; FIXME: this takes a parameter that tells + * how many indents to do, which needs to be implemented to fix + * some man page formatting options */ +static void +macro_rs_re_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + xmlNodePtr tmpNode; - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Indent"); - parser_append_token (parser); - parser->ins = parser->ins->parent; - } - } - /* relative margin indent; FIXME: this takes a parameter that tells - * how many indents to do, which needs to be implemented to fix - * some man page formatting options */ - else if (g_str_equal (str, "RS")) { - parser->ins = parser_append_node (parser, str); - g_free (str); + if (g_str_equal (macro, "RS")) { + parser->ins = parser_append_node (parser, macro); parser_stack_push_node (parser, parser->ins); - if (PARSER_CUR && *(parser->cur) != '\n') { + if (args && args->data) { parser->ins = parser_append_node (parser, "Indent"); - parser_append_token (parser); + parser_append_given_text (parser, args->data); parser->ins = parser->ins->parent; } - } - /* end relative indent */ - else if (g_str_equal (str, "RE")) { + } + else if (g_str_equal (macro, "RE")) { parser_stack_pop_node (parser, "IP"); tmpNode = parser_stack_pop_node (parser, "RS"); if (tmpNode == NULL) - g_warning ("Found unexpected tag: '%s'\n", str); + d (g_warning ("Found unexpected tag: '%s'\n", macro)); else - parser->ins = tmpNode; - - g_free (str); + parser->ins = tmpNode->parent; } - else if (g_str_equal (str, "UR")) { - gchar *buf; +} - while (PARSER_CUR - && *(parser->cur) != ' ' - && *(parser->cur) != '\n') - parser->cur++; - - c = *(parser->cur); - *(parser->cur) = '\0'; - - buf = g_strdup (parser->anc + 1); - *(parser->cur) = c; +static void +macro_mandoc_list_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + xmlNodePtr tmpNode; - /* - * If someone wants to do automatic hyperlink wizardry outside - * for the parser, then this should instead generate a tag. - */ - if (g_str_equal (buf, ":")) - parser->make_links = FALSE; - else { - parser->ins = parser_append_node (parser, str); - - parser_stack_push_node (parser, parser->ins); + if (g_str_equal (macro, "Bl")) { + + parser->ins = parser_append_node (parser, macro); - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "URI"); - parser_append_text (parser); - parser->ins = parser->ins->parent; + if (args && args->data) { + gchar *listtype = (gchar *)args->data; + + if (g_str_equal (listtype, "-hang") || + g_str_equal (listtype, "-ohang") || + g_str_equal (listtype, "-tag") || + g_str_equal (listtype, "-diag") || + g_str_equal (listtype, "-inset") + ) { + listtype++; + xmlNewProp (parser->ins, BAD_CAST "listtype", + BAD_CAST listtype); + /* TODO: check for -width, -offset, -compact */ + } else if (g_str_equal (listtype, "-column")) { + /* TODO: support this */; + } else if (g_str_equal (listtype, "-item") || + g_str_equal (listtype, "-bullet") || + g_str_equal (listtype, "-hyphen") || + g_str_equal (listtype, "-dash") + ) { + listtype++; + xmlNewProp (parser->ins, BAD_CAST "listtype", + BAD_CAST listtype); + /* TODO: check for -offset, -compact */ } } + + parser_stack_push_node (parser, parser->ins); + } + else if (g_str_equal (macro, "El")) { + + tmpNode = parser_stack_pop_node (parser, "It"); - g_free (str); - g_free (buf); + if (tmpNode != NULL) + parser->ins = tmpNode->parent; + + tmpNode = parser_stack_pop_node (parser, "Bl"); + + if (tmpNode == NULL) + d (g_warning ("Found unexpected tag: '%s'\n", macro)); + else + parser->ins = tmpNode->parent; } - else if (g_str_equal (str, "UE")) { - if (parser->make_links) { - tmpNode = parser_stack_pop_node (parser, "UR"); +} - if (tmpNode == NULL) - g_warning ("Found unexpected tag: '%s'\n", str); - else - parser->ins = tmpNode; - } else - parser->make_links = TRUE; +static void +macro_verbatim_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + xmlNodePtr tmpNode; + + if (g_str_equal (macro, "nf") || g_str_equal (macro, "Vb")) { + parser->ins = parser_append_node (parser, "Verbatim"); + parser_stack_push_node (parser, parser->ins); + } + else if (g_str_equal (macro, "fi") || g_str_equal (macro, "Ve")) { + tmpNode = parser_stack_pop_node (parser, "Verbatim"); - g_free (str); + if (tmpNode == NULL) + d (g_warning ("Found unexpected tag: '%s'\n", macro)); + else + parser->ins = tmpNode->parent; } - else if (g_str_equal (str, "UN")) { - parser->ins = parser_append_node (parser, str); - g_free (str); +} - parser_append_token (parser); - parser->ins = parser->ins->parent; +/* many mandoc macros have their arguments parsed so that other + * macros can be called to operate on their arguments. This table + * indicates which macros are _parsed_ for other callable macros, + * and which are _callable_ from other macros: see mdoc(7) for more + * details + */ + +#define MANDOC_NONE 0x01 +#define MANDOC_PARSED 0x01 +#define MANDOC_CALLABLE 0x02 + +struct MandocMacro { + gchar *macro; + gint flags; +}; + +struct MandocMacro manual_macros[] = { + { "Ad", MANDOC_PARSED | MANDOC_CALLABLE }, + { "An", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Ar", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Cd", MANDOC_NONE }, + { "Cm", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Dv", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Er", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Ev", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Fa", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Fd", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Fl", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Fn", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Ic", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Li", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Nd", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Nm", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Op", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Ot", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Pa", MANDOC_PARSED | MANDOC_CALLABLE }, + { "St", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Tn", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Va", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Vt", MANDOC_PARSED | MANDOC_CALLABLE }, + { "Xr", MANDOC_PARSED | MANDOC_CALLABLE }, + { NULL, MANDOC_NONE } +}; + +static gboolean +is_mandoc_manual_macro_parsed (gchar *macro) +{ + gint i; + + for (i=0; manual_macros[i].macro != NULL; i++) { + if (g_str_equal (macro, manual_macros[i].macro) && + (manual_macros[i].flags & MANDOC_PARSED) == MANDOC_PARSED + ) { + return TRUE; + } } - /* BSD mandoc macros */ + return FALSE; +} - /* - * Since mandoc man pages are required to begin with Dd, Dt, Os, - * we will use this to create the TH tag. - */ - else if (g_str_equal (str, "Dd")) { +static gboolean +is_mandoc_manual_macro_callable (gchar *macro) +{ + gint i; + + for (i=0; manual_macros[i].macro != NULL; i++) { + if (g_str_equal (macro, manual_macros[i].macro) && + (manual_macros[i].flags & MANDOC_CALLABLE) == MANDOC_CALLABLE + ) { + return TRUE; + } + } + + return FALSE; +} + +static void +macro_mandoc_utility_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + GSList *ptr = NULL; + gchar *str = NULL; + gchar *manpage, *uri; + + g_return_if_fail (macro != NULL); + + if (is_mandoc_manual_macro_parsed (macro)) { + parser->ins = parser_append_node (parser, macro); + + ptr = args; + while (ptr && ptr->data) { + if (is_mandoc_manual_macro_callable ((gchar *)ptr->data)) { + macro_mandoc_utility_handler (parser, (gchar *)ptr->data, ptr->next); + break; + } else { + parser_append_given_text_handle_escapes (parser, (gchar *)ptr->data, TRUE); + } + ptr = ptr->next; + if (ptr && ptr->data) + parser_append_given_text (parser, " "); + } + + parser->ins = parser->ins->parent; + } else { + parser->ins = parser_append_node (parser, macro); + str = args_concat_all (args); + parser->ins = parser->ins->parent; + g_free (str); + } - parser->ins = parser_append_node (parser, "TH"); + return; + + if (g_str_equal (macro, "Op")) { + + } else if (g_str_equal (macro, "Nm")) { - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Date"); - parser_append_token (parser); - parser->ins = parser->ins->parent; + if (str) { + parser_ensure_P (parser); + + parser->ins = parser_append_node (parser, "B"); + parser_append_given_text_handle_escapes (parser, str, TRUE); + parser->ins = parser->ins->parent; } } - else if (g_str_equal (str, "Dt")) { - g_free (str); + else if (g_str_equal (macro, "Nd")) { + + if (str) { + parser_append_given_text (parser, " -- "); + parser_append_given_text_handle_escapes (parser, str, TRUE); + } + } + else if (g_str_equal (macro, "Xr")) { + + if (args && args->data && args->next && args->next->data) { + + manpage = g_strdup_printf ("%s(%s)", (gchar *)args->data, (gchar *)args->next->data); + uri = g_strdup_printf ("man:%s", manpage); + + parser_ensure_P (parser); + + parser->ins = parser_append_node (parser, "UR"); + parser->ins = parser_append_node (parser, "URI"); + parser_append_given_text (parser, uri); + parser->ins = parser->ins->parent; + parser_append_given_text (parser, manpage); + parser->ins = parser->ins->parent; + + ptr = args->next->next; - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Title"); - parser_append_token (parser); - parser->ins = parser->ins->parent; - } + while (ptr && ptr->data) { + parser_append_given_text (parser, ptr->data); + ptr = g_slist_next (ptr); + } + + g_free (uri); + g_free (manpage); + } } - else if (g_str_equal (str, "Os")) { - g_free (str); - if (PARSER_CUR && *(parser->cur) != '\n') { - parser->ins = parser_append_node (parser, "Os"); - parser_append_token (parser); - parser->ins = parser->ins->parent; - } + g_free (str); +} - /* Leave the TH tag */ +static void +macro_mandoc_listitem_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ + GSList *ptr = NULL; + xmlNodePtr tmpNode; + + tmpNode = parser_stack_pop_node (parser, "It"); + + if (tmpNode != NULL) + parser->ins = tmpNode->parent; + + parser->ins = parser_append_node (parser, macro); + + if (args && args->data) { + parser->ins = parser_append_node (parser, "ItTag"); + + ptr = args; + while (ptr && ptr->data) { + if (is_mandoc_manual_macro_callable ((gchar *)ptr->data)) { + macro_mandoc_utility_handler (parser, (gchar *)ptr->data, ptr->next); + break; + } else { + parser_append_given_text (parser, (gchar *)ptr->data); + } + ptr = ptr->next; + if (ptr && ptr->data) + parser_append_given_text (parser, " "); + } + parser->ins = parser->ins->parent; } - else if (g_str_equal (str, "Bl")) { - parser->ins = parser_append_node (parser, str); - g_free (str); - parser_stack_push_node (parser, parser->ins); + parser_stack_push_node (parser, parser->ins); +} + +static void +macro_template_handler (YelpManParser *parser, gchar *macro, GSList *args) +{ +} + +/* the handler functions for each macro all have this form: + * - the calling function, parser_handle_linetag owns the "macro", and "args" + * parameters, so do not free them. + */ +typedef void (*MacroFunc)(YelpManParser *parser, gchar *macro, GSList *args); + +struct MacroHandler { + gchar *macro; + MacroFunc handler; +}; + +/* We are calling all of these macros, when in reality some of them are + * requests (lowercase, defined by groff system), and some of them are + * macros (varying case, defined by man/mdoc/ms/tbl extensions) + * + * A great resource to figure out what each of these does is the groff + * info page. Also groff(7), man(7), and mdoc(7) are useful as well. + */ +struct MacroHandler macro_handlers[] = { + { "\\\"", macro_ignore_handler }, /* groff: comment */ + { "ad", macro_ignore_handler }, /* groff: set adjusting mode */ + { "Ad", macro_mandoc_utility_handler }, /* mandoc: Address */ + { "An", macro_mandoc_utility_handler }, /* mandoc: Author name */ + { "Ar", macro_mandoc_utility_handler }, /* mandoc: Command line argument */ + { "B", macro_bold_small_italic_handler }, /* man: set bold font */ + { "Bd", macro_ignore_handler }, /* mandoc: Begin-display block */ + { "BI", macro_roman_bold_small_italic_handler }, /* man: bold italic font */ + { "Bl", macro_mandoc_list_handler }, /* mandoc: begin list */ + { "bp", macro_ignore_handler }, /* groff: break page */ + { "br", macro_insert_self_handler }, /* groff: line break */ + { "BR", macro_roman_bold_small_italic_handler }, /* man: set bold roman font */ + { "Cd", macro_mandoc_utility_handler }, /* mandoc: Configuration declaration */ + { "Cm", macro_mandoc_utility_handler }, /* mandoc: Command line argument modifier */ + { "ce", macro_ignore_handler }, /* groff: center text */ + { "Dd", macro_mandoc_handler }, /* mandoc: Document date */ + { "de", macro_define_handler }, /* groff: define macro */ + { "ds", macro_ignore_handler }, /* groff: define string variable */ + { "D1", macro_ignore_handler }, /* mandoc: Indent and display one text line */ + { "Dl", macro_ignore_handler }, /* mandoc: Indent and display one line of literal text */ + { "Dt", macro_mandoc_handler }, /* mandoc: Document title */ + { "Dv", macro_mandoc_utility_handler }, /* mandoc: Defined variable */ + { "Ed", macro_ignore_handler }, /* mandoc: End-display block */ + { "El", macro_mandoc_list_handler }, /* mandoc: end list */ + { "Er", macro_mandoc_utility_handler }, /* mandoc: Error number */ + { "Ev", macro_mandoc_utility_handler }, /* mandoc: Environment variable */ + { "Fa", macro_mandoc_utility_handler }, /* mandoc: Function argument */ + { "Fd", macro_mandoc_utility_handler }, /* mandoc: Function declaration */ + { "fi", macro_verbatim_handler }, /* groff: activate fill mode */ + { "Fl", macro_mandoc_utility_handler }, /* mandoc: ? */ + { "Fn", macro_mandoc_utility_handler }, /* mandoc: Function call */ + { "ft", macro_ignore_handler }, /* groff: change font */ + { "HP", macro_hanging_paragraph_handler }, /* man: paragraph with hanging left indentation */ + { "hy", macro_ignore_handler }, /* groff: enable hyphenation */ + { "I", macro_bold_small_italic_handler }, /* man: set italic font */ + { "Ic", macro_mandoc_utility_handler }, /* mandoc: Interactive Command */ + { "ie", macro_ignore_handler }, /* groff: else portion of if-else */ + { "if", macro_ignore_handler }, /* groff: if statement */ + { "ig", macro_ignore_handler }, /* groff: comment until '..' or '.END' */ + { "ih", macro_ignore_handler }, /* ? */ + { "IX", macro_ignore_handler }, /* ms: print index to stderr */ + { "IB", macro_roman_bold_small_italic_handler }, /* man: set italic bold font */ + { "IP", macro_ip_handler }, /* man: indented paragraph */ + { "IR", macro_roman_bold_small_italic_handler }, /* man: set italic roman font */ + { "It", macro_mandoc_listitem_handler }, /* mandoc: item in list */ + { "Li", macro_mandoc_utility_handler }, /* mandoc: Literal text */ + { "LP", macro_new_paragraph_handler }, /* man: line break and left margin and indentation are reset */ + { "na", macro_ignore_handler }, /* groff: disable adjusting */ + { "Nd", macro_mandoc_utility_handler }, /* mandoc: description of utility/program */ + { "ne", macro_ignore_handler }, /* groff: force space at bottom of page */ + { "nf", macro_verbatim_handler }, /* groff: no fill mode */ + { "nh", macro_ignore_handler }, /* groff: disable hyphenation */ + { "Nd", macro_mandoc_utility_handler }, /* mandoc: ? */ + { "Nm", macro_mandoc_utility_handler }, /* mandoc: Command/utility/program name*/ + { "Op", macro_mandoc_utility_handler }, /* mandoc: Option */ + { "Os", macro_mandoc_handler }, /* mandoc: Operating System */ + { "Ot", macro_mandoc_utility_handler }, /* mandoc: Old style function type (Fortran) */ + { "P", macro_new_paragraph_handler }, /* man: line break and left margin and indentation are reset */ + { "Pa", macro_mandoc_utility_handler }, /* mandoc: Pathname or filename */ + { "PP", macro_new_paragraph_handler }, /* man: line break and left margin and indentation are reset */ + { "Pp", macro_new_paragraph_handler }, /* man: line break and left margin and indentation are reset */ + { "ps", macro_ignore_handler }, /* groff: change type size */ + { "RB", macro_roman_bold_small_italic_handler }, /* man: set roman bold font */ + { "RE", macro_ignore_handler }, /* man: move left margin back to NNN */ + { "RI", macro_roman_bold_small_italic_handler }, /* man: set roman italic font */ + { "RS", macro_ignore_handler }, /* man: move left margin to right by NNN */ + { "SH", macro_section_header_handler }, /* man: unnumbered section heading */ + { "Sh", macro_section_header_handler }, /* man: unnumbered section heading */ + { "SM", macro_bold_small_italic_handler }, /* man: set font size one SMaller */ + { "so", macro_ignore_handler }, /* groff: include file */ + { "sp", macro_spacing_handler }, /* groff: */ + { "SS", macro_section_header_handler }, /* man: unnumbered subsection heading */ + { "Ss", macro_section_header_handler }, /* man: unnumbered subsection heading */ + { "St", macro_mandoc_utility_handler }, /* mandoc: Standards (-p1003.2, -p1003.1 or -ansiC) */ + { "TH", macro_title_header_handler }, /* man: set title of man page */ + { "TP", macro_tp_handler }, /* man: set indented paragraph with label */ + { "UR", macro_url_handler }, /* man: URL start hyperlink */ + { "UE", macro_url_handler }, /* man: URL end hyperlink */ + { "UN", macro_ignore_handler }, /* ? */ + { "TE", macro_ignore_handler }, /* ms: table */ + { "Tn", macro_mandoc_utility_handler }, /* mandoc: Trade or type name (small Caps). */ + { "ti", macro_ignore_handler }, /* groff: temporary indent */ + { "tr", macro_ignore_handler }, /* groff: translate characters */ + { "TS", macro_ignore_handler }, /* ms: table with optional header */ + { "Va", macro_mandoc_utility_handler }, /* mandoc: Variable name */ + { "Vb", macro_verbatim_handler }, /* pod2man: start of verbatim text */ + { "Ve", macro_verbatim_handler }, /* pod2man: end of verbatim text */ + { "Vt", macro_mandoc_utility_handler }, /* mandoc: Variable type (Fortran only) */ + { "Xr", macro_mandoc_utility_handler }, /* mandoc: Manual page cross reference */ + { NULL, NULL } +}; + +static void +parser_handle_linetag (YelpManParser *parser) { + gchar c, *str, *ptr, *arg; + GSList *arglist = NULL; + GSList *listptr = NULL; + MacroFunc handler_func = NULL; + + static GHashTable *macro_hash = NULL; + + /* check if we've created the hash of macros yet. If not, make it */ + if (!macro_hash) { + gint i; + + macro_hash = g_hash_table_new (g_str_hash, g_str_equal); + + for (i=0; macro_handlers[i].macro != NULL; i++) { + g_hash_table_insert (macro_hash, + macro_handlers[i].macro, + macro_handlers[i].handler); + } } - else if (g_str_equal (str, "El")) { - tmpNode = parser_stack_pop_node (parser, "Bl"); - if (tmpNode == NULL) - g_warning ("Found unexpected tag: '%s'\n", str); - else - parser->ins = tmpNode; + /* FIXME: figure out a better way to handle these cases */ + /* special case, if the line is simply ".\n" then return */ + if (*(parser->cur+1) == '\n') { + ++parser->cur; + parser->anc = ++parser->cur; + return; + } + /* special case, if the line is simply "..\n" then return */ + else if (*(parser->cur+1) == '.' && *(parser->cur+2) == '\n') { + ++parser->cur; + ++parser->cur; + parser->anc = ++parser->cur; + } + + /* skip any spaces after the control character . */ + while (PARSER_CUR && *(parser->cur) == ' ') + parser->cur++; + + while (PARSER_CUR + && *(parser->cur) != ' ' + && ( (*parser->cur != '\\') || ((*parser->cur == '\\') && (*(parser->cur+1) == '\"')) ) + && *(parser->cur) != '\n') { + if ((*parser->cur == '\\') && (*(parser->cur+1) == '\"')) { + parser->cur += 2; + break; + } + parser->cur++; + } - g_free (str); + /* copy the macro/request into str */ + c = *(parser->cur); + *(parser->cur) = '\0'; + str = g_strdup (parser->anc + 1); /* skip control character '.' by adding one */ + *(parser->cur) = c; + parser->anc = parser->cur; + + /* FIXME: need to handle escaped characters */ + /* perform argument parsing and store argument in a singly linked list */ + while (PARSER_CUR && *(parser->cur) != '\n') { + ptr = NULL; + arg = NULL; + + /* skip any whitespace */ + while (PARSER_CUR && *(parser->cur) == ' ') + parser->anc = ++parser->cur; + +get_argument: + /* search until we hit whitespace or an " */ + while (PARSER_CUR && + *(parser->cur) != '\n' && + *(parser->cur) != ' ' && + *(parser->cur) != '\"') + parser->cur++; + + /* this checks for escaped spaces */ + if (PARSER_CUR && + ((parser->cur - parser->buffer) > 0) && + *(parser->cur) == ' ' && + *(parser->cur-1) == '\\') { + parser->cur++; + goto get_argument; + } + + if (*(parser->cur) == '\n' && (parser->cur == parser->anc)) { + break; + } + + if (*(parser->cur) == '\"' && *(parser->cur-1) == ' ') { + /* quoted argument */ + ptr = strchr (parser->cur+1, '\"'); + if (ptr != NULL) { + c = *(ptr); + *(ptr) = '\0'; + arg = g_strdup (parser->anc+1); + *(ptr) = c; + parser->cur = ptr; + parser->anc = ++parser->cur; + } else { + /* unmatched double quote: include the " as part of the argument */ + parser->cur++; + goto get_argument; + } + } + else if (*(parser->cur) == '\"') { + /* quote in the middle of an argument */ + c = *(parser->cur+1); + *(parser->cur+1) = '\0'; + arg = g_strdup (parser->anc); + *(parser->cur+1) = c; + parser->anc = ++parser->cur; + } + else if (*(parser->cur) == ' ') { + /* normal space separated argument */ + c = *(parser->cur); + *(parser->cur) = '\0'; + arg = g_strdup (parser->anc); + *(parser->cur) = c; + parser->anc = ++parser->cur; + } + else if (*(parser->cur) == '\n' && *(parser->cur-1) != ' ') { + /* special case for EOL */ + c = *(parser->cur); + *(parser->cur) = '\0'; + arg = g_strdup (parser->anc); + *(parser->cur) = c; + parser->anc = parser->cur; + } else + ;//g_warning ("FIXME: need to take into account this case...\n"); + + arglist = g_slist_append (arglist, arg); } - else if (g_str_equal (str, "It")) { - parser->ins = parser_append_node (parser, str); - g_free(str); + + /* g_print ("handling macro (%s)\n", str); + + listptr = arglist; + while (listptr && listptr->data) { + g_print (" arg = %s\n", (gchar *)listptr->data); + listptr = g_slist_next (listptr); } + */ + + /* lookup the macro handler and call that function */ + handler_func = g_hash_table_lookup (macro_hash, str); + if (handler_func) + (*handler_func) (parser, str, arglist); + + /* in case macro is not defined in hash table, ignore rest of line */ + else + macro_ignore_handler (parser, str, arglist); + g_free (str); + + listptr = arglist; + while (listptr && listptr->data) { + g_free (listptr->data); + listptr = g_slist_next (listptr); + } + + return; + + if (0) { + } /* Table (tbl) macros */ else if (g_str_equal (str, "TS")) { parser->ins = parser_append_node (parser, "TABLE"); @@ -556,28 +1169,9 @@ parser_handle_linetag (YelpManParser *parser) { } else if (g_str_equal (str, "TE")) { /* We should only see this from within parser_parse_table */ - g_warning ("Found unexpected tag: '%s'\n", str); + d (g_warning ("Found unexpected tag: '%s'\n", str)); g_free (str); } - - /* this is a macro used for making index entries - * in a table of contents; ignore it for now, and skip until - * end of line; definition in /usr/share/groff/<version>/tmac/m.tmac */ - else if (g_str_equal (str, "IX")) { - /* ignore the rest of the line */ - while (PARSER_CUR) - parser->anc = ++parser->cur; - } - - /* these are pod2man extensions which are usually defined early in - * the "preamble" of the man file. They are usually defined - * for verbatim text; ignore them for now */ - else if (g_str_equal (str, "Vb") || g_str_equal (str, "Ve")) { - /* ignore the rest of the line */ - while (PARSER_CUR) - parser->anc = ++parser->cur; - } - /* "ie" and "if" are conditional macros in groff * "ds" is to define a variable; see groff(7) * ignore anything between the \{ \}, otherwise ignore until @@ -607,7 +1201,6 @@ parser_handle_linetag (YelpManParser *parser) { parser->anc = ++parser->cur; } } - /* else conditional macro */ else if (g_str_equal (str, "el")) { /* check to see if the next two characters are the @@ -623,40 +1216,6 @@ parser_handle_linetag (YelpManParser *parser) { } } - /* this is used to define or redefine a macro until ".." - * is reached. */ - else if (g_str_equal (str, "de")) { - gchar *ptr = NULL; - ptr = strstr (parser->cur, ".."); - if (ptr) { - parser->cur = (ptr+2); - parser->anc = parser->cur; - } else { - /* set the flag to ignore input until ".." */ - parser->ignore = TRUE; - parser->token = g_strdup(".."); - } - } - - /* - * From man(7): macros that many processors will simply ignore... - * so lets do that for now. - */ - else if (g_str_equal (str, "ad") || g_str_equal (str, "bp") - || g_str_equal (str, "ce") - || g_str_equal (str, "fi") || g_str_equal (str, "ft") - || g_str_equal (str, "hy") || g_str_equal (str, "ig") - || g_str_equal (str, "in") || g_str_equal (str, "na") - || g_str_equal (str, "ne") || g_str_equal (str, "nf") - || g_str_equal (str, "nh") || g_str_equal (str, "ps") - || g_str_equal (str, "so") || g_str_equal (str, "ti") - || g_str_equal (str, "tr")) { - /* Do nothing */ - } - - else { - g_warning ("No rule matching the tag '%s'\n", str); - } } static void @@ -672,16 +1231,23 @@ static void parser_read_until (YelpManParser *parser, gchar delim) { + gchar c; + while (PARSER_CUR && *(parser->cur) != '\n' && *(parser->cur) != delim) { - if (*(parser->cur) == '\\') - parser_handle_inline (parser); - else if (*(parser->cur) == '(' && parser->make_links) - parser_make_link (parser); - else parser->cur++; } + + if (parser->anc == parser->cur) + return; + + c = *(parser->cur); + *(parser->cur) = '\0'; + parser_append_given_text_handle_escapes (parser, parser->anc, TRUE); + *(parser->cur) = c; + + parser->anc = parser->cur; } static void @@ -693,9 +1259,10 @@ parser_escape_tags (YelpManParser *parser, xmlNodePtr node = NULL; xmlNodePtr cur = parser->ins; GSList *path = NULL; - + /* Find the top node we can escape from */ - while (cur->parent != (xmlNodePtr) parser->doc) { + while (cur && cur != (xmlNodePtr)parser->doc && + cur->parent && cur->parent != (xmlNodePtr) parser->doc) { for (i = 0; i < ntags; i++) if (!xmlStrcmp (cur->name, BAD_CAST tags[i])) { node = cur; @@ -730,167 +1297,229 @@ parser_escape_tags (YelpManParser *parser, } static void -parser_append_token (YelpManParser *parser) +parser_append_given_text_handle_escapes (YelpManParser *parser, gchar *text, gboolean make_links) { - while (*(parser->cur) == ' ') - parser->anc = ++parser->cur; - - if (*(parser->cur) == '"') { - parser->anc = ++parser->cur; - parser_read_until (parser, '"'); - } else { - parser_read_until (parser, ' '); - } - - parser_append_text (parser); - - if (*(parser->cur) == '"') - parser->anc = ++parser->cur; -} - -static void -parser_handle_inline (YelpManParser *parser) -{ - gchar c, *str; - gchar **escape; - - parser_append_text (parser); - parser->anc = ++parser->cur; - - switch (*(parser->cur)) { - case '\0': - break; - case '-': - case '\\': - parser->cur++; - parser_append_text (parser); - parser->anc = parser->cur; - break; - case 'f': - parser->cur++; - if (!PARSER_CUR) break; - parser->cur++; + gchar *escape[] = { "fI", "fB" }; + gchar *baseptr, *ptr, *anc, *str; + gint c, len; - c = *(parser->cur); - *(parser->cur) = '\0'; - str = g_strdup (parser->anc); - *(parser->cur) = c; - - escape = g_new0 (gchar *, 2); - escape[0] = "fB"; - escape[1] = "fI"; - parser_escape_tags (parser, escape, 2); - g_free (escape); - - /* the \f escape sequence changes the font - R is Roman, - * B is Bold, and I is italic */ - if (g_str_equal (str, "fI") || g_str_equal (str, "fB")) - parser->ins = parser_append_node (parser, str); - else if (!g_str_equal (str, "fR") && !g_str_equal (str, "fP")) - g_warning ("No rule matching the tag '%s'\n", str); - - g_free (str); - parser->anc = parser->cur; - break; - case '(': - parser->cur++; - if (!PARSER_CUR) break; - parser->cur++; - if (!PARSER_CUR) break; - parser->cur++; - - c = *(parser->cur); - *(parser->cur) = '\0'; - str = g_strdup (parser->anc); - *(parser->cur) = c; - - if (g_str_equal (str, "(co")) - parser_append_given_text (parser, "©"); - else if (g_str_equal (str, "(bu")) - parser_append_given_text (parser, "•"); - else if (g_str_equal (str, "(em")) - parser_append_given_text (parser, "—"); - - g_free (str); - parser->anc = parser->cur; - break; - case '*': - parser->cur++; - if (!PARSER_CUR) break; + g_return_if_fail (parser != NULL); + + if (!text) + return; - if (*(parser->cur) == 'R') { - parser_append_given_text (parser, "®"); - parser->cur++; - } else if (*(parser->cur) == '(') { - parser->cur++; - if (!PARSER_CUR) break; - parser->cur++; - if (!PARSER_CUR) break; - parser->cur++; + baseptr = g_strdup (text); + ptr = baseptr; + anc = baseptr; + len = strlen (baseptr); + + while (ptr && *ptr != '\0') { + + if (*ptr == '\\') { - c = *(parser->cur); - *(parser->cur) = '\0'; - str = g_strdup (parser->anc); - *(parser->cur) = c; - - if (g_str_equal (str, "*(Tm")) - parser_append_given_text (parser, "™"); - else if (g_str_equal (str, "*(lq")) - parser_append_given_text (parser, "“"); - else if (g_str_equal (str, "*(rq")) - parser_append_given_text (parser, "”"); + c = *ptr; + *ptr = '\0'; + parser_append_given_text (parser, anc); + *ptr = c; + + anc = ++ptr; + + switch (*ptr) { + case '\0': + break; + case '-': + case '\\': + ptr++; + c = *ptr; + *ptr = '\0'; + parser_append_given_text (parser, anc); + *ptr = c; + anc = ptr; + break; + case 'f': + ptr++; + if ((ptr - baseptr) > len || *ptr == '\0') break; + ptr++; + + c = *(ptr); + *(ptr) = '\0'; + str = g_strdup (anc); + *(ptr) = c; + + parser_ensure_P (parser); + parser_escape_tags (parser, escape, 2); + + /* the \f escape sequence changes the font - R is Roman, + * B is Bold, and I is italic */ + if (g_str_equal (str, "fI") || g_str_equal (str, "fB")) + parser->ins = parser_append_node (parser, str); + else if (!g_str_equal (str, "fR") && !g_str_equal (str, "fP")) + d (g_warning ("No rule matching the tag '%s'\n", str)); + + g_free (str); + anc = ptr; + break; + case '(': + ptr++; + if ((ptr - baseptr) > len || *ptr == '\0') break; + ptr++; + if ((ptr - baseptr) > len || *ptr == '\0') break; + ptr++; + + c = *(ptr); + *(ptr) = '\0'; + str = g_strdup (anc); + *(ptr) = c; + + if (g_str_equal (str, "(co")) + parser_append_given_text (parser, "©"); + else if (g_str_equal (str, "(bu")) + parser_append_given_text (parser, "•"); + else if (g_str_equal (str, "(em")) + parser_append_given_text (parser, "—"); + + g_free (str); + anc = ptr; + break; + case '*': + ptr++; + if ((ptr - baseptr) > len || *ptr == '\0') break; + + if (*(ptr) == 'R') { + parser_append_given_text (parser, "®"); + ptr++; + } else if (*(ptr) == '(') { + ptr++; + if ((ptr - baseptr) > len || *ptr == '\0') break; + ptr++; + if ((ptr - baseptr) > len || *ptr == '\0') break; + ptr++; + + c = *(ptr); + *(ptr) = '\0'; + str = g_strdup (anc); + *(ptr) = c; + + if (g_str_equal (str, "*(Tm")) + parser_append_given_text (parser, "™"); + else if (g_str_equal (str, "*(lq")) + parser_append_given_text (parser, "“"); + else if (g_str_equal (str, "*(rq")) + parser_append_given_text (parser, "”"); + + g_free (str); + } + + anc = ++ptr; + break; + case 'e': + anc = ++ptr; + parser_append_given_text (parser, "\\"); + break; + case '&': + anc = ++ptr; + break; + case 's': + /* this handles (actually ignores) the groff macros \s[+-][0-9] */ + ptr++; + if (*(ptr) == '+' || *(ptr) == '-') { + ptr++; + if (g_ascii_isdigit (*ptr)) { + ptr++; + } + } else if (g_ascii_isdigit (*ptr)) { + ptr++; + } + anc = ptr; + break; + case '"': + /* Marks comments till end of line. so we can ignore it. */ + while (ptr && *ptr != '\0') + ptr++; + anc = ptr; + break; + case '^': + case '|': + /* 1/12th and 1/16th em respectively - ignore this and simply output a space */ + anc = ++ptr; + break; + default: + ptr++; + c = *(ptr); + *(ptr) = '\0'; + parser_append_given_text (parser, anc); + *(ptr) = c; - g_free (str); + anc++; + break; + } + } + else if ((make_links) && (*ptr == '(')) { + gchar *space_pos; + gchar *tmp_cur; + gchar *url; + gchar c; + + space_pos = ptr; + + while (space_pos != anc && *(space_pos - 1) != ' ') { + space_pos--; + } + + if (space_pos != ptr && + g_ascii_isdigit(*(ptr+1)) && + *(ptr+2) == ')') { + + ptr+=3; + + parser_ensure_P (parser); + + tmp_cur = ptr; + ptr = space_pos; + + c = (*ptr); + *ptr = '\0'; + parser_append_given_text (parser, anc); + *ptr = c; + anc = ptr; + + ptr = tmp_cur; + + c = *(ptr); + *(ptr) = '\0'; + url = g_strdup_printf ("man:%s", anc); + + parser->ins = parser_append_node (parser, "UR"); - parser->cur++; - parser->anc = parser->cur; - break; - case 'e': - parser->cur++; - parser->anc = parser->cur; - parser_append_given_text (parser, "\\"); - break; - case '&': - parser->cur++; - parser->anc = parser->cur; - break; - case 's': - /* this handles (actually ignores) the troff macros \s[+-][0-9] */ - parser->cur++; - if (*(parser->cur) == '+' || *(parser->cur) == '-') { - parser->cur++; - /* can I replace with isdigit() and add #include <ctype.h> */ - if (*(parser->cur) >= 0x30 && *(parser->cur) <= 0x39) { - parser->cur++; - } - } else if (*(parser->cur) >= 0x30 && *(parser->cur) <= 0x39) { - parser->cur++; - } - parser->anc = parser->cur; - break; - case '"': - /* Marks comments till end of line. so we can ignore it. */ - parser_read_until (parser, '\n'); - parser->anc = parser->cur; - break; - default: - parser->cur++; - - c = *(parser->cur); - *(parser->cur) = '\0'; - str = g_strdup (parser->anc); - *(parser->cur) = c; + parser->ins = parser_append_node (parser, "URI"); + parser_append_given_text (parser, url); + parser->ins = parser->ins->parent; + + parser_append_given_text (parser, anc); + parser->ins = parser->ins->parent; + + *(ptr) = c; + anc = ptr; + + g_free (url); - parser->anc++; - parser_append_text (parser); + } else { + ptr++; + } + } + else { + ptr++; + } - g_warning ("No rule matching the inline tag '%s' " - "(assuming escaped text)\n", str); + } // end while - g_free (str); - break; - } + c = *(ptr); + *(ptr) = '\0'; + parser_append_given_text (parser, anc); + *(ptr) = c; + + g_free (baseptr); } static xmlNodePtr @@ -936,9 +1565,22 @@ static xmlNodePtr parser_append_node (YelpManParser *parser, gchar *name) { - xmlNodePtr node; + if (!name) + return NULL; + + return xmlNewChild (parser->ins, NULL, BAD_CAST name, NULL); +} +static xmlNodePtr +parser_append_node_attr (YelpManParser *parser, + gchar *name, + gchar *attr, + gchar *value) +{ + xmlNodePtr node = NULL; + node = xmlNewChild (parser->ins, NULL, BAD_CAST name, NULL); + xmlNewProp (node, BAD_CAST attr, BAD_CAST value); return node; } @@ -1059,7 +1701,7 @@ parser_parse_table (YelpManParser *parser) if (*(parser->buffer + 1) == 'T' && *(parser->buffer + 2) == 'E') { if (parser_stack_pop_node (parser, "TABLE") == NULL) - g_warning ("Found unexpected tag: 'TE'\n"); + d (g_warning ("Found unexpected tag: 'TE'\n")); else { parser->ins = table_start; @@ -1071,7 +1713,7 @@ parser_parse_table (YelpManParser *parser) && *(parser->buffer + 2) == 'H') { /* Do nothing */ empty_row = TRUE; - }else { + } else { parser_handle_linetag (parser); break; } @@ -1100,64 +1742,3 @@ parser_parse_table (YelpManParser *parser) } } } - -static void -parser_make_link (YelpManParser *parser) -{ - gchar *space_pos; - gchar *tmp_cur; - gchar *url; - gchar c; - - space_pos = parser->cur; - - while (space_pos != parser->anc - && *(space_pos - 1) != ' ') { - space_pos--; - } - - if (space_pos == parser->cur) { - parser->cur++; - return; - } - - /* Let's assume there are only 9 manual sections */ - parser->cur++; - - if (!g_ascii_isdigit (*(parser->cur))) - return; - - parser->cur++; - - if (*(parser->cur) != ')') - return; - - parser->cur++; - - tmp_cur = parser->cur; - parser->cur = space_pos; - - parser_ensure_P (parser); - - parser_append_text (parser); - - parser->cur = tmp_cur; - - c = *(parser->cur); - *(parser->cur) = '\0'; - - url = g_strdup_printf ("man:%s", parser->anc); - - *(parser->cur) = c; - - parser->ins = parser_append_node (parser, "UR"); - parser->ins = parser_append_node (parser, "URI"); - - parser_append_given_text (parser, url); - parser->ins = parser->ins->parent; - - parser_append_text (parser); - parser->ins = parser->ins->parent; - - g_free (url); -} diff --git a/stylesheets/man2html.xsl b/stylesheets/man2html.xsl index 71786c64..e422eca8 100644 --- a/stylesheets/man2html.xsl +++ b/stylesheets/man2html.xsl @@ -87,7 +87,10 @@ div[class~="SH"] { margin-left: 1.2em; } div[class~="SS"] { margin-left: 1.6em; } + span[class~="R"] { font-family: serif; } span[class~="Section"] { margin-left: 0.4em; } + + dd { padding-bottom: 10px; } </xsl:text> </xsl:template> @@ -130,8 +133,10 @@ <xsl:apply-templates/><br/> </xsl:template> -<!-- ignore anything in the Indent element for now --> +<!-- ignore anything in the Indent,Count,sp element for now --> <xsl:template match="Indent" /> +<xsl:template match="Count" /> +<xsl:template match="sp" /> <xsl:template match="B | fB"> <b><xsl:apply-templates/></b> @@ -145,6 +150,33 @@ <i><xsl:apply-templates/></i> </xsl:template> +<xsl:template match="R | fR"> + <span class="R"><xsl:apply-templates/></span> +</xsl:template> + +<xsl:template match="Verbatim"> + <pre> + <xsl:choose> + <xsl:when test="node()[1]/self::text()"> + <xsl:variable name="node" select="node()[1]"/> + <xsl:choose> + <xsl:when test="starts-with(string($node), '
')"> + <xsl:value-of select="substring-after(string($node), '
')"/> + <xsl:apply-templates select="node()[position() != 1]"/> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="string($node)"/> + <xsl:apply-templates select="node()[position() != 1]"/> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:otherwise> + <xsl:apply-templates /> + </xsl:otherwise> + </xsl:choose> + </pre> +</xsl:template> + <xsl:template match="IP"> <xsl:choose> <xsl:when test="preceding-sibling::*[1][self::IP]"/> @@ -158,7 +190,14 @@ <xsl:template mode="IP.mode" match="IP"> <dt> - <xsl:apply-templates select="Tag"/> + <xsl:choose> + <xsl:when test="Tag"> + <xsl:apply-templates select="Tag"/> + </xsl:when> + <xsl:otherwise> + <xsl:apply-templates/> + </xsl:otherwise> + </xsl:choose> </dt> <dd> <xsl:apply-templates select="Tag/following-sibling::node()"/> @@ -185,11 +224,13 @@ <xsl:choose> <xsl:when test="$nextSS"> <xsl:apply-templates - select="following-sibling::*[following-sibling::SS[1] = $nextSS]"/> + select="following-sibling::*[following-sibling::SS[1] = $nextSS and + following-sibling::SS[1]/@id = $nextSS/@id]"/> </xsl:when> <xsl:when test="$nextSH"> <xsl:apply-templates - select="following-sibling::*[following-sibling::SH[1] = $nextSH]"/> + select="following-sibling::*[following-sibling::SH[1] = $nextSH and + following-sibling::SH[1]/@id = $nextSH/@id]"/> </xsl:when> <xsl:otherwise> <xsl:apply-templates select="following-sibling::*"/> @@ -208,12 +249,14 @@ <xsl:choose> <xsl:when test="$nextSS"> <xsl:apply-templates - select="following-sibling::*[following-sibling::SS[1] = $nextSS[1]]"/> + select="following-sibling::*[following-sibling::SS[1] = $nextSS[1] and + following-sibling::SS[1]/@id = $nextSS[1]/@id]"/> <xsl:apply-templates select="$nextSS"/> </xsl:when> <xsl:when test="$nextSH"> <xsl:apply-templates - select="following-sibling::*[following-sibling::SH[1] = $nextSH]"/> + select="following-sibling::*[following-sibling::SH[1] = $nextSH and + following-sibling::SH[1]/@id = $nextSH/@id]"/> </xsl:when> <xsl:otherwise> <xsl:apply-templates select="following-sibling::*"/> @@ -254,6 +297,108 @@ <xsl:template match="URI"/> +<xsl:template match="UN"> + <a name="text()" id="text()"/> +</xsl:template> + +<!-- these are all for mdoc (BSD) man page support --> + +<!-- these are just printed out --> +<xsl:template match="An | Dv | Er | Ev | Ic | Li | St"> + <xsl:text> +</xsl:text> + <xsl:apply-templates/> +</xsl:template> + +<!-- these are italicized --> +<xsl:template match="Ad | Ar | Fa | Ot | Pa | Va | Vt"> + <i><xsl:apply-templates/></i> +</xsl:template> + +<!-- these are bold --> +<xsl:template match="Cd | Cm | Fd | Ic | Nm"> + <b><xsl:apply-templates/></b> +</xsl:template> + +<!-- Function call - TODO need to do the ( , ) here --> +<xsl:template match="Fn | Fo | Fc"> + <i><xsl:apply-templates/></i> +</xsl:template> + +<!-- Cross reference --> +<xsl:template match="Xr"> + <xsl:variable name="manpage" select="substring-before(string(.), ' ')"/> + <xsl:variable name="section" select="substring-before(substring-after(string(.), ' '), ' ')"/> + <xsl:variable name="extra" select="substring-after(substring-after(string(.), ' '), ' ')"/> + <a> + <xsl:attribute name="href"> + <xsl:text>man:</xsl:text> + <xsl:value-of select="$manpage"/> + <xsl:text>(</xsl:text> + <xsl:value-of select="$section"/> + <xsl:text>)</xsl:text> + </xsl:attribute> + <xsl:value-of select="$manpage"/> + <xsl:text>(</xsl:text> + <xsl:value-of select="$section"/> + <xsl:text>)</xsl:text> + </a> + <xsl:value-of select="$extra"/> +</xsl:template> + +<!-- Option --> +<xsl:template match="Op | Oo | Oc"> + <xsl:text> [</xsl:text> + <xsl:apply-templates/> + <xsl:text>]</xsl:text> +</xsl:template> + +<!-- Trade or type name (small Caps). --> +<xsl:template match="Tn"> + <xsl:variable name="txt" select="string(child::text())"/> + <xsl:text> </xsl:text> + <xsl:value-of select="translate($txt, 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')"/> + <xsl:apply-templates select="*"/> +</xsl:template> + +<xsl:template match="Nd"> + <xsl:text> - </xsl:text> + <xsl:apply-templates /> +</xsl:template> + +<xsl:template match="Fl"> + <xsl:text>-</xsl:text> + <b><xsl:apply-templates select="child::text()"/></b> + <xsl:apply-templates select="*"/> +</xsl:template> + +<xsl:template match="Bl"> + <dl> + <xsl:for-each select="It"> + <xsl:choose> + <xsl:when test="ItTag"> + <dt><xsl:apply-templates select="ItTag"/></dt> + <dd> + <xsl:apply-templates select="ItTag/following-sibling::node()"/> + </dd> + </xsl:when> + <xsl:otherwise> + <dt> + <xsl:text>•</xsl:text> + </dt> + <dd> + <xsl:apply-templates /> + </dd> + </xsl:otherwise> + </xsl:choose> + </xsl:for-each> + </dl> +</xsl:template> + +<xsl:template match="ItTag"> + <xsl:apply-templates/> +</xsl:template> + <xsl:template match="*"> <xsl:message> <xsl:text>Unmatched element: </xsl:text> |