summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBastien Nocera <hadess@hadess.net>2014-08-29 00:01:31 +0200
committerBastien Nocera <hadess@hadess.net>2014-08-31 23:14:10 +0200
commit3e993a9e59a32f5ce29206c8e3b917ae3a2ca346 (patch)
treea0e409e19c8f2240bfd020e2aaf1301533cdc8ed
parentba23d6eb960c18e2e92bfe808db364be09b4480f (diff)
downloadtracker-3e993a9e59a32f5ce29206c8e3b917ae3a2ca346.tar.gz
tracker-extract: Try harder when getting EPub contents
GMarkup is really not that good at parsing XML, so we need to try harder to ignore errors parsing the contents of EPub files, and populate the index with *some* data. https://bugzilla.gnome.org/show_bug.cgi?id=735645
-rw-r--r--src/tracker-extract/tracker-extract-epub.c16
1 files changed, 8 insertions, 8 deletions
diff --git a/src/tracker-extract/tracker-extract-epub.c b/src/tracker-extract/tracker-extract-epub.c
index 40d9fdcb9..64b0859f3 100644
--- a/src/tracker-extract/tracker-extract-epub.c
+++ b/src/tracker-extract/tracker-extract-epub.c
@@ -551,7 +551,6 @@ extract_opf_contents (const gchar *uri,
GList *content_files)
{
OPFContentData content_data = { 0 };
- GMarkupParseContext *context;
TrackerConfig *config;
GError *error = NULL;
GList *l;
@@ -562,7 +561,6 @@ extract_opf_contents (const gchar *uri,
};
config = tracker_main_get_config ();
- context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL);
content_data.contents = g_string_new ("");
content_data.limit = (gsize) tracker_config_get_max_bytes (config);
@@ -570,28 +568,30 @@ extract_opf_contents (const gchar *uri,
g_debug ("Extracting up to %" G_GSIZE_FORMAT " bytes of content", content_data.limit);
for (l = content_files; l; l = l->next) {
+ GMarkupParseContext *context;
gchar *path;
+ context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL);
+
/* Page file is relative to OPF file location */
path = g_build_filename (content_prefix, l->data, NULL);
tracker_gsf_parse_xml_in_zip (uri, path, context, &error);
if (error) {
- g_warning ("Error extracting EPUB contents (%s): %s\n",
- path, error->message);
- g_free (path);
- break;
+ g_warning ("Error extracting EPUB contents (%s): %s",
+ path, error->message);
+ g_clear_error (&error);
}
g_free (path);
+ g_markup_parse_context_free (context);
+
if (content_data.limit <= 0) {
/* Reached plain text extraction limit */
break;
}
}
- g_markup_parse_context_free (context);
-
return g_string_free (content_data.contents, FALSE);
}