diff options
author | Bastien Nocera <hadess@hadess.net> | 2014-08-29 00:01:31 +0200 |
---|---|---|
committer | Bastien Nocera <hadess@hadess.net> | 2014-08-31 23:14:10 +0200 |
commit | 3e993a9e59a32f5ce29206c8e3b917ae3a2ca346 (patch) | |
tree | a0e409e19c8f2240bfd020e2aaf1301533cdc8ed | |
parent | ba23d6eb960c18e2e92bfe808db364be09b4480f (diff) | |
download | tracker-3e993a9e59a32f5ce29206c8e3b917ae3a2ca346.tar.gz |
tracker-extract: Try harder when getting EPub contents
GMarkup is really not that good at parsing XML, so we need to try
harder to ignore errors parsing the contents of EPub files, and
populate the index with *some* data.
https://bugzilla.gnome.org/show_bug.cgi?id=735645
-rw-r--r-- | src/tracker-extract/tracker-extract-epub.c | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/src/tracker-extract/tracker-extract-epub.c b/src/tracker-extract/tracker-extract-epub.c index 40d9fdcb9..64b0859f3 100644 --- a/src/tracker-extract/tracker-extract-epub.c +++ b/src/tracker-extract/tracker-extract-epub.c @@ -551,7 +551,6 @@ extract_opf_contents (const gchar *uri, GList *content_files) { OPFContentData content_data = { 0 }; - GMarkupParseContext *context; TrackerConfig *config; GError *error = NULL; GList *l; @@ -562,7 +561,6 @@ extract_opf_contents (const gchar *uri, }; config = tracker_main_get_config (); - context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL); content_data.contents = g_string_new (""); content_data.limit = (gsize) tracker_config_get_max_bytes (config); @@ -570,28 +568,30 @@ extract_opf_contents (const gchar *uri, g_debug ("Extracting up to %" G_GSIZE_FORMAT " bytes of content", content_data.limit); for (l = content_files; l; l = l->next) { + GMarkupParseContext *context; gchar *path; + context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL); + /* Page file is relative to OPF file location */ path = g_build_filename (content_prefix, l->data, NULL); tracker_gsf_parse_xml_in_zip (uri, path, context, &error); if (error) { - g_warning ("Error extracting EPUB contents (%s): %s\n", - path, error->message); - g_free (path); - break; + g_warning ("Error extracting EPUB contents (%s): %s", + path, error->message); + g_clear_error (&error); } g_free (path); + g_markup_parse_context_free (context); + if (content_data.limit <= 0) { /* Reached plain text extraction limit */ break; } } - g_markup_parse_context_free (context); - return g_string_free (content_data.contents, FALSE); } |