diff options
author | Rupert Swarbrick <rswarbrick@gmail.com> | 2010-12-21 23:59:43 +0000 |
---|---|---|
committer | Shaun McCance <shaunm@gnome.org> | 2011-01-10 09:33:39 -0500 |
commit | 520e28b8171cfa2ef954347d6a0726a116b66f03 (patch) | |
tree | c8549c07596ccd29cac4d443fc570da984d5d1a9 | |
parent | 8aad9a6c291dc12fe56d6e75d5fad19efa261132 (diff) | |
download | yelp-520e28b8171cfa2ef954347d6a0726a116b66f03.tar.gz |
Make links from strings of the form "blah(2)" in man pages.
Note that there's not a space before the (2): it seems that strings
like that are used for Copyright (C) etc. and not links).
-rw-r--r-- | libyelp/yelp-man-parser.c | 322 | ||||
-rw-r--r-- | stylesheets/man2html.xsl.in | 12 |
2 files changed, 333 insertions, 1 deletions
diff --git a/libyelp/yelp-man-parser.c b/libyelp/yelp-man-parser.c index 501b9183..2e381f85 100644 --- a/libyelp/yelp-man-parser.c +++ b/libyelp/yelp-man-parser.c @@ -27,6 +27,7 @@ #include <glib.h> #include <glib/gi18n.h> #include <libxml/tree.h> +#include <libxml/xpath.h> #include <gio/gio.h> #include <gio/gunixinputstream.h> #include <string.h> @@ -191,6 +192,31 @@ static void cleanup_parsed_page (YelpManParser *parser); static gboolean parse_last_line (YelpManParser *parser, gchar* line); static void unicode_strstrip (gchar *str); +/* + A link_inserter takes + (1) an array of offsets for the different spans within the string + (2) the match info from the regex match + + It's then responsible for mangling the XML tree to insert the actual + link. Finally, it should return the offset into the string of the + end of what it's just dealt with. If necessary, it should also fix + up offsets to point correctly at the last node inserted. + */ +typedef struct { + gsize start, end; + xmlNodePtr elt; +} offset_elt_pair; + +typedef gsize (*link_inserter)(offset_elt_pair *, + const GMatchInfo *); + +static void fixup_links (YelpManParser *parser, + const GRegex *matcher, + link_inserter inserter); + +static gsize man_link_inserter (offset_elt_pair *offsets, + const GMatchInfo *match_info); + /******************************************************************************/ /* Translations for the 'C' command. This is indeed hackish, but the * -Tutf8 output doesn't seem to give include files so we can do this @@ -1065,6 +1091,7 @@ cleanup_parsed_page (YelpManParser *parser) * tag) */ gchar *lastline; + GRegex *regex; if (xmlChildElementCount (parser->section_node) == 1) { lastline = (gchar *)xmlNodeGetContent (parser->section_node); @@ -1087,6 +1114,17 @@ cleanup_parsed_page (YelpManParser *parser) xmlFree (lastline); } + + /* Next job: Go through and stick the links in. Text that looks + * like man(1) should be converted to a link to man:man(1) and + * urls should also be linkified. + */ + regex = g_regex_new ("([a-zA-Z0-9\\-_.]+)" + "\\(([a-zA-Z0-9]{1,2})\\)", + 0, 0, NULL); + g_return_if_fail (regex); + fixup_links (parser, regex, man_link_inserter); + g_regex_unref (regex); } static gchar * @@ -1198,3 +1236,287 @@ unicode_strstrip (gchar *str) g_memmove (str, start, end - start); *(str + (end - start)) = '\0'; } + +static void +sheet_fixup_links (xmlNodePtr sheet, + const GRegex *regex, link_inserter inserter) +{ + /* + This works as follows: grab (<span>) nodes from a sheet in + order and stick their contents into a string. Since a sheet + won't be ludicrously long, we can just grab everything and then + work over it, but we need to keep track of which node points at + which bit of the string so we can call inserter helpfully. To do + so, use byte offsets, since that seems less likely to go + horribly wrong! + */ + GString *accumulator = g_string_new (""); + xmlNodePtr span; + xmlChar *tmp; + gsize offset = 0; + gsize len; + offset_elt_pair pair; + GMatchInfo *match_info; + + /* Make pairs zero-terminated so that code can iterate through it + * looking for something with elt = NULL. */ + GArray *pairs = g_array_new (TRUE, FALSE, + sizeof (offset_elt_pair)); + + g_return_if_fail (regex); + g_return_if_fail (inserter); + g_return_if_fail (sheet); + + for (span = sheet->children; span != NULL; span = span->next) { + if (span->type != XML_ELEMENT_NODE) continue; + + if (strcmp ((const char*) span->name, "span") != 0) { + + if ((strcmp ((const char*) span->name, "br") == 0) || + (strcmp ((const char*) span->name, "a") == 0)) + continue; + + g_warning ("Expected all child elements to be " + "<span>, <br> or <a>, but " + "have found a <%s>.", + (gchar *) span->name); + continue; + } + + tmp = xmlNodeGetContent (span); + g_string_append (accumulator, (gchar *) tmp); + len = strlen ((const char*) tmp); + + pair.start = offset; + pair.end = offset + len; + pair.elt = span; + + g_array_append_val (pairs, pair); + + offset += len; + xmlFree (tmp); + } + + /* We've got the data. Now try to match the regex against it as + * many times as possible + */ + offset = 0; + g_regex_match_full (regex, accumulator->str, + -1, offset, 0, &match_info, NULL); + while (g_match_info_matches (match_info)) { + offset = inserter ((offset_elt_pair *)pairs->data, + match_info); + + g_match_info_free (match_info); + + g_regex_match_full (regex, accumulator->str, + -1, offset, 0, &match_info, NULL); + } + + g_string_free (accumulator, TRUE); + g_array_unref (pairs); +} + +static void +fixup_links (YelpManParser *parser, + const GRegex *regex, link_inserter inserter) +{ + /* Iterate over all the <sheet>'s in the xml document */ + xmlXPathContextPtr context; + xmlXPathObjectPtr path_obj; + xmlNodeSetPtr nodeset; + guint i; + + context = xmlXPathNewContext (parser->doc); + g_return_if_fail (context); + + path_obj = xmlXPathEvalExpression (BAD_CAST "//sheet", context); + g_return_if_fail (path_obj); + + nodeset = path_obj->nodesetval; + g_return_if_fail (nodeset); + + for (i = 0; i < nodeset->nodeNr; ++i) { + sheet_fixup_links (nodeset->nodeTab[i], regex, inserter); + } + + xmlXPathFreeObject (path_obj); + xmlXPathFreeContext (context); +} + +/* + This inserts new_child under parent. If older_sibling is non-NULL, + we stick it immediately after it. Otherwise, insert as the first + child of the parent. + + Returns the inserted child. + */ +static xmlNodePtr +insert_child_after (xmlNodePtr parent, xmlNodePtr older_sibling, + xmlNodePtr new_child) +{ + g_return_val_if_fail (parent && new_child, new_child); + + if (older_sibling) { + xmlAddNextSibling (older_sibling, new_child); + } + else if (parent->children == NULL) { + xmlAddChild (parent, new_child); + } + else { + xmlAddPrevSibling (parent->children, new_child); + } + + return new_child; +} + +static void +copy_prop (xmlNodePtr to, xmlNodePtr from, const xmlChar *name) +{ + xmlChar *prop = xmlGetProp (from, name); + g_return_if_fail (prop); + xmlSetProp (to, name, prop); + xmlFree (prop); +} + +static gsize +do_node_replacement (xmlNodePtr anchor_node, + offset_elt_pair *offsets, + gsize startpos, gsize endpos) +{ + xmlNodePtr node, sibling_before; + gchar *gtmp; + xmlChar *xtmp, *xshort; + gsize look_from; + + /* Find the first element by searching through offsets. I suppose + * a binary search would be cleverer, but I doubt that this will + * take significant amounts of time. + * + * We should never fall off the end, but (just in case) the GArray + * that holds the offsets is zero-terminated and elt should never + * be NULL so we can stop if necessary + */ + while ((offsets->end <= startpos) && offsets->elt) { + offsets++; + } + g_return_val_if_fail (offsets->elt, endpos); + + /* xtmp is NULL by default, but we do this here so that if we read + * the node in the if block below, we don't have to do it a second + * time. + */ + xtmp = NULL; + sibling_before = offsets->elt->prev; + look_from = startpos; + + /* Maybe there's text in the relevant span before the start of + * the stuff we want to replace with a link. + */ + if (startpos > offsets->start) { + node = xmlNewNode (NULL, BAD_CAST "span"); + copy_prop (node, offsets->elt, BAD_CAST "class"); + + xtmp = xmlNodeGetContent (offsets->elt); + gtmp = g_strndup ((const gchar*)xtmp, startpos - offsets->start); + xmlNodeAddContent (node, BAD_CAST gtmp); + g_free (gtmp); + + sibling_before = insert_child_after (offsets->elt->parent, + sibling_before, node); + } + + insert_child_after (offsets->elt->parent, + sibling_before, anchor_node); + + /* The main loop. Here we work over each span that overlaps with + * the link we're adding. We add a similar span as a child of the + * anchor node and then delete the existing one. */ + while (look_from < endpos) { + if (!xtmp) xtmp = xmlNodeGetContent (offsets->elt); + + if (endpos < offsets->end) { + xshort = BAD_CAST g_strndup ((const gchar*)xtmp, + endpos - offsets->start); + + node = xmlNewChild (anchor_node, NULL, BAD_CAST "span", + xshort + (look_from-offsets->start)); + copy_prop (node, offsets->elt, BAD_CAST "class"); + + node = xmlNewNode (NULL, BAD_CAST "span"); + xmlNodeAddContent (node, + xtmp + (endpos - offsets->start)); + copy_prop (node, offsets->elt, BAD_CAST "class"); + xmlAddNextSibling (anchor_node, node); + + xmlFree (xshort); + + xmlUnlinkNode (offsets->elt); + xmlFreeNode (offsets->elt); + xmlFree (xtmp); + xtmp = NULL; + + offsets->start = endpos; + offsets->elt = node; + } + else { + node = xmlNewChild (anchor_node, NULL, BAD_CAST "span", + xtmp + (look_from - offsets->start)); + copy_prop (node, offsets->elt, BAD_CAST "class"); + + xmlUnlinkNode (offsets->elt); + xmlFreeNode (offsets->elt); + xmlFree (xtmp); + xtmp = NULL; + offsets++; + } + + if (!offsets->elt) { + /* We got to the end of a sheet and of the stuff we're + * doing at the same time + */ + return endpos; + } + + look_from = offsets->start; + } + + return offsets->start; +} + +static gsize +do_link_insertion (const gchar *url, + offset_elt_pair *offsets, + gsize startpos, gsize endpos) +{ + xmlNodePtr anchor_node = xmlNewNode (NULL, BAD_CAST "a"); + + xmlNewProp (anchor_node, BAD_CAST "href", BAD_CAST url); + + return do_node_replacement (anchor_node, offsets, + startpos, endpos); +} + +static gsize +man_link_inserter (offset_elt_pair *offsets, + const GMatchInfo *match_info) +{ + gchar *name, *section; + gchar url[1024]; + + gint startpos, endpos; + + g_match_info_fetch_pos (match_info, 0, &startpos, &endpos); + + name = g_match_info_fetch (match_info, 1); + section = g_match_info_fetch (match_info, 2); + + g_return_val_if_fail (name && section, endpos); + + snprintf (url, 1024, "man:%s(%s)", name, section); + + g_free (name); + g_free (section); + + return do_link_insertion (url, offsets, startpos, endpos); +} diff --git a/stylesheets/man2html.xsl.in b/stylesheets/man2html.xsl.in index cc97e8aa..8785195b 100644 --- a/stylesheets/man2html.xsl.in +++ b/stylesheets/man2html.xsl.in @@ -74,7 +74,7 @@ margin-bottom: 0px; margin-top: <xsl:value-of select="@jump"/>em; </xsl:attribute> - <p><xsl:apply-templates select="span|br"/></p> + <p><xsl:apply-templates select="span|br|a"/></p> </xsl:element> </xsl:template> @@ -97,4 +97,14 @@ </xsl:element> </xsl:template> +<xsl:template match="a"> + <xsl:element name="a"> + <xsl:attribute name="href"> + <xsl:value-of select="@href"/> + </xsl:attribute> + + <xsl:apply-templates select="span|br"/> + </xsl:element> +</xsl:template> + </xsl:stylesheet> |