diff options
author | Rupert Swarbrick <rswarbrick@gmail.com> | 2010-12-16 00:46:23 +0000 |
---|---|---|
committer | Shaun McCance <shaunm@gnome.org> | 2010-12-20 10:45:24 -0500 |
commit | c02ee6105f5fce508bf74935c538058a6e417e7e (patch) | |
tree | 840841d9f9a41562cab2cbec56bef583d3dd64ce /libyelp/yelp-man-parser.c | |
parent | 3c258b109b4bdba894df608f5b1e2d2896c0080a (diff) | |
download | yelp-c02ee6105f5fce508bf74935c538058a6e417e7e.tar.gz |
Treat 'C' and 'N' lines more carefully. Yay! Russian works properly at last!
Diffstat (limited to 'libyelp/yelp-man-parser.c')
-rw-r--r-- | libyelp/yelp-man-parser.c | 121 |
1 files changed, 80 insertions, 41 deletions
diff --git a/libyelp/yelp-man-parser.c b/libyelp/yelp-man-parser.c index 350dc814..a821d3a8 100644 --- a/libyelp/yelp-man-parser.c +++ b/libyelp/yelp-man-parser.c @@ -182,6 +182,10 @@ static void new_sheet (YelpManParser *parser); static void register_title (YelpManParser *parser, const gchar* name, const gchar* section); static void right_truncate_common (gchar *dst, const gchar *src); +static gboolean cheeky_call_parse_line (YelpManParser *parser, + GError **error, + gchar first_char, + const gchar *text); /******************************************************************************/ /* Translations for the 'C' command. This is indeed hackish, but the @@ -607,7 +611,13 @@ parse_text (YelpManParser *parser, GError **error) gchar *text, *section, *tmp; const gchar *acc; - g_assert (parser->buffer[0] == 't'); + /* + Sneakily, this might get called with something other than t + starting the buffer: see parse_C and parse_N. + */ + if (parser->buffer[0] == 't') { + parser->N_count = 0; + } if (parser->state == START) { /* This should be the 'Title String(1)' line. It might come in @@ -669,29 +679,6 @@ parse_text (YelpManParser *parser, GError **error) } } -/* - w is a sort of prefix argument. It indicates a space, so we register - that here, then call parser_parse_line again on the rest of the - string to deal with that. - */ -static gboolean -parse_w (YelpManParser *parser, GError **error) -{ - gboolean ret; - - if (parser->state != START) { - g_string_append_c (parser->accumulator, ' '); - } - - parser->buffer++; - parser->last_char_was_space = TRUE; - - ret = parser_parse_line (parser, error); - - parser->buffer--; - return ret; -} - static gboolean parse_body_text (YelpManParser *parser, GError **error) { @@ -708,7 +695,8 @@ parse_body_text (YelpManParser *parser, GError **error) It's possible to have spaces in section titles, so we carry on accumulating the section title until the next newline. */ - if (parser->section_state != SECTION_TITLE && parser->hpos == 0) { + if (parser->section_state == SECTION_BODY && + (!parser->section_node || (parser->hpos == 0))) { g_string_truncate (parser->accumulator, 0); /* End the current sheet & section */ parser->section_state = SECTION_TITLE; @@ -726,12 +714,35 @@ parse_body_text (YelpManParser *parser, GError **error) /* Move hpos forward per char */ parser->hpos += strlen (parser->buffer+1) * parser->char_width; + parser->last_char_was_space = FALSE; - parser->N_count = 0; return TRUE; } +/* + w is a sort of prefix argument. It indicates a space, so we register + that here, then call parser_parse_line again on the rest of the + string to deal with that. + */ +static gboolean +parse_w (YelpManParser *parser, GError **error) +{ + gboolean ret; + + if (parser->state != START) { + g_string_append_c (parser->accumulator, ' '); + } + + parser->buffer++; + parser->last_char_was_space = TRUE; + + ret = parser_parse_line (parser, error); + + parser->buffer--; + return ret; +} + static gboolean parse_n (YelpManParser *parser, GError **error) { @@ -767,12 +778,13 @@ parse_n (YelpManParser *parser, GError **error) parser->accumulator->str); g_string_truncate (parser->accumulator, 0); parser->state = BODY; + parser->section_state = SECTION_BODY; return TRUE; } /* parser->state == BODY */ - if (parser->section_state == SECTION_TITLE) { + g_strchomp (parser->accumulator->str); xmlNewTextChild (parser->section_node, NULL, BAD_CAST "title", parser->accumulator->str); @@ -824,6 +836,8 @@ static gboolean parse_N (YelpManParser *parser, GError **error) { gint n; + gchar tmp[2]; + if (SSCANF ("N%i", 1, &n)) { RAISE_PARSE_ERROR ("Strange format for N line: %s"); } @@ -840,13 +854,15 @@ parse_N (YelpManParser *parser, GError **error) if (n < 0) { append_nbsps (parser, -n); parser->N_count += -n; - } - else { - g_string_append_c (parser->accumulator, (gchar)n); - parser->N_count++; + return TRUE; } - return TRUE; + parser->N_count++; + + tmp[0] = (gchar)n; + tmp[1] = '\0'; + + return cheeky_call_parse_line (parser, error, 'N', tmp); } static void @@ -887,17 +903,13 @@ parse_C (YelpManParser *parser, GError **error) code = 65533; /* Unicode replacement character */ } - deal_with_newlines (parser); - parser->last_char_was_space = FALSE; - /* Output buffer must be length >= 6. 16 >= 6, so we're ok. */ len = g_unichar_to_utf8 (code, name); name[len] = '\0'; - g_string_append (parser->accumulator, name); parser->N_count++; - return TRUE; + return cheeky_call_parse_line (parser, error, 'C', name); } static void @@ -967,10 +979,10 @@ parse_p (YelpManParser *parser, GError **error) static void new_sheet (YelpManParser *parser) { - /* We don't need to worry about finishing the current sheet, - since the accumulator etc. get cleared on newlines and we - know we're at the start of a line. - */ + /* We don't need to worry about finishing the current sheet, + since the accumulator etc. get cleared on newlines and we + know we're at the start of a line. + */ parser->sheet_node = xmlAddChild (parser->section_node, xmlNewNode (NULL, BAD_CAST "sheet")); @@ -1007,3 +1019,30 @@ right_truncate_common (gchar *dst, const gchar *src) src--; } } + +static gboolean +cheeky_call_parse_line (YelpManParser *parser, GError **error, + gchar first_char, const gchar* text) +{ + /* Do a cunning trick. There's all sorts of code that parse_text + * does, which we don't want to duplicate in parse_N and + * parse_C. So feed a buffer back to parse_text. Tada! Start it + * with "C" or "N" rather than "t" so clever stuff in parse_text + * can tell the difference. + */ + gchar *tmp; + gboolean ret; + guint len = strlen (text); + + tmp = parser->buffer; + parser->buffer = g_new (gchar, 2 + len); + parser->buffer[0] = first_char; + strncpy (parser->buffer + 1, text, len + 1); + + ret = parse_text (parser, error); + + g_free (parser->buffer); + parser->buffer = tmp; + + return ret; +} |