tracker-extract: Try harder when getting EPub contents

GMarkup is really not that good at parsing XML, so we need to try harder to ignore errors parsing the contents of EPub files, and populate the index with *some* data. https://bugzilla.gnome.org/show_bug.cgi?id=735645
author: Bastien Nocera <hadess@hadess.net> 2014-08-29 00:01:31 +0200
committer: Bastien Nocera <hadess@hadess.net> 2014-08-31 23:14:10 +0200
commit: 3e993a9e59a32f5ce29206c8e3b917ae3a2ca346 (patch)
tree: a0e409e19c8f2240bfd020e2aaf1301533cdc8ed
parent: ba23d6eb960c18e2e92bfe808db364be09b4480f (diff)
download: tracker-3e993a9e59a32f5ce29206c8e3b917ae3a2ca346.tar.gz
1 files changed, 8 insertions, 8 deletions
diff --git a/src/tracker-extract/tracker-extract-epub.c b/src/tracker-extract/tracker-extract-epub.c
index 40d9fdcb9..64b0859f3 100644
--- a/src/tracker-extract/tracker-extract-epub.c
+++ b/src/tracker-extract/tracker-extract-epub.c
@@ -551,7 +551,6 @@ extract_opf_contents (const gchar *uri,
                       GList       *content_files)
 {
 	OPFContentData content_data = { 0 };
-	GMarkupParseContext *context;
 	TrackerConfig *config;
 	GError *error = NULL;
 	GList *l;
@@ -562,7 +561,6 @@ extract_opf_contents (const gchar *uri,
 	};
 
 	config = tracker_main_get_config ();
-	context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL);
 
 	content_data.contents = g_string_new ("");
 	content_data.limit = (gsize) tracker_config_get_max_bytes (config);
@@ -570,28 +568,30 @@ extract_opf_contents (const gchar *uri,
 	g_debug ("Extracting up to %" G_GSIZE_FORMAT " bytes of content", content_data.limit);
 
 	for (l = content_files; l; l = l->next) {
+		GMarkupParseContext *context;
 		gchar *path;
 
+		context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL);
+
 		/* Page file is relative to OPF file location */
 		path = g_build_filename (content_prefix, l->data, NULL);
 		tracker_gsf_parse_xml_in_zip (uri, path, context, &error);
 
 		if (error) {
-			g_warning ("Error extracting EPUB contents (%s): %s\n",
-			           path, error->message);
-			g_free (path);
-			break;
+			g_warning ("Error extracting EPUB contents (%s): %s",
+				   path, error->message);
+			g_clear_error (&error);
 		}
 		g_free (path);
 
+		g_markup_parse_context_free (context);
+
 		if (content_data.limit <= 0) {
 			/* Reached plain text extraction limit */
 			break;
 		}
 	}
 
-	g_markup_parse_context_free (context);
-
 	return g_string_free (content_data.contents, FALSE);
 }
author	Bastien Nocera <hadess@hadess.net>	2014-08-29 00:01:31 +0200
committer	Bastien Nocera <hadess@hadess.net>	2014-08-31 23:14:10 +0200
commit	3e993a9e59a32f5ce29206c8e3b917ae3a2ca346 (patch)
tree	a0e409e19c8f2240bfd020e2aaf1301533cdc8ed
parent	ba23d6eb960c18e2e92bfe808db364be09b4480f (diff)
download	tracker-3e993a9e59a32f5ce29206c8e3b917ae3a2ca346.tar.gz