diff options
author | Aleksander Morgado <aleksander@lanedo.com> | 2010-11-24 13:07:27 +0100 |
---|---|---|
committer | Aleksander Morgado <aleksander@lanedo.com> | 2010-11-24 13:23:39 +0100 |
commit | b636b3c7b2ef656f5cb5b4ae065eb053adea7c7a (patch) | |
tree | ff709834dd577713dfd2d1b8bba9d2bbf87a7979 | |
parent | d633d02dc9bfa35a4d7abc5f82fb72bcd12c9242 (diff) | |
download | tracker-b636b3c7b2ef656f5cb5b4ae065eb053adea7c7a.tar.gz |
tracker-extract, msoffice-xml: If max bytes reached don't even start parser
-rw-r--r-- | src/tracker-extract/tracker-extract-msoffice-xml.c | 47 |
1 files changed, 21 insertions, 26 deletions
diff --git a/src/tracker-extract/tracker-extract-msoffice-xml.c b/src/tracker-extract/tracker-extract-msoffice-xml.c index 7746aec72..79d275564 100644 --- a/src/tracker-extract/tracker-extract-msoffice-xml.c +++ b/src/tracker-extract/tracker-extract-msoffice-xml.c @@ -606,13 +606,11 @@ msoffice_xml_content_types_parse_start (GMarkupParseContext *context, gpointer user_data, GError **error) { - MsOfficeXMLParserInfo *info; + MsOfficeXMLParserInfo *info = user_data; const gchar *part_name = NULL; const gchar *content_type = NULL; gint i; - info = user_data; - if (g_ascii_strcasecmp (element_name, "Override") != 0) { return; } @@ -641,32 +639,29 @@ msoffice_xml_content_types_parse_start (GMarkupParseContext *context, return; } - /* Content part? */ - switch (info->file_type) { - case FILE_TYPE_DOCX: - if (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") == 0) { - xml_read (info, part_name + 1, MS_OFFICE_XML_TAG_DOCUMENT_TEXT_DATA); - } - break; - - case FILE_TYPE_PPTX: - case FILE_TYPE_PPSX: - if ((g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.presentationml.slide+xml") == 0) || - (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml") == 0)) { - xml_read (info, part_name + 1, MS_OFFICE_XML_TAG_DOCUMENT_TEXT_DATA); - } - break; + /* If the file type is unknown, skip trying to extract content */ + if (info->file_type == FILE_TYPE_INVALID) { + g_message ("Invalid file type, not extracting content from '%s'", + part_name + 1); + return; + } - case FILE_TYPE_XLSX: - if ((g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml") == 0) || - (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml") == 0)) { + /* Content part? */ + if ((info->file_type == FILE_TYPE_DOCX && + g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") == 0) || + ((info->file_type == FILE_TYPE_PPTX || info->file_type == FILE_TYPE_PPSX) && + (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.presentationml.slide+xml") == 0 || + g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml") == 0)) || + (info->file_type == FILE_TYPE_XLSX && + (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml") == 0 || + g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml") == 0))) { + /* If reached max bytes to extract, don't event start parsing the file... just return */ + if (info->bytes_pending == 0) { + g_debug ("Skipping '%s' as already reached max bytes to extract", + part_name + 1); + } else { xml_read (info, part_name + 1, MS_OFFICE_XML_TAG_DOCUMENT_TEXT_DATA); } - break; - - case FILE_TYPE_INVALID: - g_message ("Invalid file type:'%d'", info->file_type); - break; } } |