summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksander Morgado <aleksander@lanedo.com>2010-11-24 13:07:27 +0100
committerAleksander Morgado <aleksander@lanedo.com>2010-11-24 13:23:39 +0100
commitb636b3c7b2ef656f5cb5b4ae065eb053adea7c7a (patch)
treeff709834dd577713dfd2d1b8bba9d2bbf87a7979
parentd633d02dc9bfa35a4d7abc5f82fb72bcd12c9242 (diff)
downloadtracker-b636b3c7b2ef656f5cb5b4ae065eb053adea7c7a.tar.gz
tracker-extract, msoffice-xml: If max bytes reached don't even start parser
-rw-r--r--src/tracker-extract/tracker-extract-msoffice-xml.c47
1 files changed, 21 insertions, 26 deletions
diff --git a/src/tracker-extract/tracker-extract-msoffice-xml.c b/src/tracker-extract/tracker-extract-msoffice-xml.c
index 7746aec72..79d275564 100644
--- a/src/tracker-extract/tracker-extract-msoffice-xml.c
+++ b/src/tracker-extract/tracker-extract-msoffice-xml.c
@@ -606,13 +606,11 @@ msoffice_xml_content_types_parse_start (GMarkupParseContext *context,
gpointer user_data,
GError **error)
{
- MsOfficeXMLParserInfo *info;
+ MsOfficeXMLParserInfo *info = user_data;
const gchar *part_name = NULL;
const gchar *content_type = NULL;
gint i;
- info = user_data;
-
if (g_ascii_strcasecmp (element_name, "Override") != 0) {
return;
}
@@ -641,32 +639,29 @@ msoffice_xml_content_types_parse_start (GMarkupParseContext *context,
return;
}
- /* Content part? */
- switch (info->file_type) {
- case FILE_TYPE_DOCX:
- if (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") == 0) {
- xml_read (info, part_name + 1, MS_OFFICE_XML_TAG_DOCUMENT_TEXT_DATA);
- }
- break;
-
- case FILE_TYPE_PPTX:
- case FILE_TYPE_PPSX:
- if ((g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.presentationml.slide+xml") == 0) ||
- (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml") == 0)) {
- xml_read (info, part_name + 1, MS_OFFICE_XML_TAG_DOCUMENT_TEXT_DATA);
- }
- break;
+ /* If the file type is unknown, skip trying to extract content */
+ if (info->file_type == FILE_TYPE_INVALID) {
+ g_message ("Invalid file type, not extracting content from '%s'",
+ part_name + 1);
+ return;
+ }
- case FILE_TYPE_XLSX:
- if ((g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml") == 0) ||
- (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml") == 0)) {
+ /* Content part? */
+ if ((info->file_type == FILE_TYPE_DOCX &&
+ g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") == 0) ||
+ ((info->file_type == FILE_TYPE_PPTX || info->file_type == FILE_TYPE_PPSX) &&
+ (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.presentationml.slide+xml") == 0 ||
+ g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml") == 0)) ||
+ (info->file_type == FILE_TYPE_XLSX &&
+ (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml") == 0 ||
+ g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml") == 0))) {
+ /* If reached max bytes to extract, don't event start parsing the file... just return */
+ if (info->bytes_pending == 0) {
+ g_debug ("Skipping '%s' as already reached max bytes to extract",
+ part_name + 1);
+ } else {
xml_read (info, part_name + 1, MS_OFFICE_XML_TAG_DOCUMENT_TEXT_DATA);
}
- break;
-
- case FILE_TYPE_INVALID:
- g_message ("Invalid file type:'%d'", info->file_type);
- break;
}
}