diff options
author | Aleksander Morgado <aleksander@lanedo.com> | 2010-11-24 11:15:54 +0100 |
---|---|---|
committer | Aleksander Morgado <aleksander@lanedo.com> | 2010-11-24 13:23:39 +0100 |
commit | c9029dac1e4a752aa950fab2aae3c553e1f6809e (patch) | |
tree | e44937acf64443359832e4fa193e5b185c3cbe79 | |
parent | b98fa7891390ef13f4478319afcf4952b41d7578 (diff) | |
download | tracker-c9029dac1e4a752aa950fab2aae3c553e1f6809e.tar.gz |
tracker-extract, msoffice-xml: Move querying content type to a new method
-rw-r--r-- | src/tracker-extract/tracker-extract-msoffice-xml.c | 99 |
1 files changed, 57 insertions, 42 deletions
diff --git a/src/tracker-extract/tracker-extract-msoffice-xml.c b/src/tracker-extract/tracker-extract-msoffice-xml.c index 7e34dd34b..a8652a444 100644 --- a/src/tracker-extract/tracker-extract-msoffice-xml.c +++ b/src/tracker-extract/tracker-extract-msoffice-xml.c @@ -214,7 +214,7 @@ xml_end_element_handler_document_data (GMarkupParseContext *context, } static void -xml_start_element_handler_core_data (GMarkupParseContext *context, +xml_start_element_handler_core_data (GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, @@ -600,69 +600,84 @@ xml_start_element_handler_content_types (GMarkupParseContext *context, } } -static void -extract_msoffice_xml (const gchar *uri, - TrackerSparqlBuilder *preupdate, - TrackerSparqlBuilder *metadata) +static MsOfficeXMLFileType +msoffice_xml_get_file_type (const gchar *uri) { - MsOfficeXMLParserInfo info; - MsOfficeXMLFileType file_type; - TrackerConfig *config; GFile *file; GFileInfo *file_info; - GMarkupParseContext *context = NULL; - GError *error = NULL; - gulong total_bytes; - GMarkupParser parser = { - xml_start_element_handler_content_types, - xml_end_element_handler_document_data, - NULL, - NULL, - NULL - }; const gchar *mime_used; - if (G_UNLIKELY (maximum_size_error_quark == 0)) { - maximum_size_error_quark = g_quark_from_static_string ("maximum_size_error"); - } - + /* Get GFile from uri... */ file = g_file_new_for_uri (uri); - if (!file) { - g_warning ("Could not create GFile for URI:'%s'", - uri); - return; + g_warning ("Could not create GFile for URI:'%s'", uri); + return FILE_TYPE_INVALID; } + /* Get GFileInfo from GFile... (synchronous) */ file_info = g_file_query_info (file, G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE, G_FILE_QUERY_INFO_NONE, NULL, NULL); g_object_unref (file); - if (!file_info) { - g_warning ("Could not get GFileInfo for URI:'%s'", - uri); - return; + g_warning ("Could not get GFileInfo for URI:'%s'", uri); + return FILE_TYPE_INVALID; } + /* Get Content Type from GFileInfo */ mime_used = g_file_info_get_content_type (file_info); + g_object_unref (file_info); + /* MsOffice Word document? */ if (g_ascii_strcasecmp (mime_used, "application/vnd.openxmlformats-officedocument.wordprocessingml.document") == 0) { - file_type = FILE_TYPE_DOCX; - } else if (g_ascii_strcasecmp (mime_used, "application/vnd.openxmlformats-officedocument.presentationml.presentation") == 0) { - file_type = FILE_TYPE_PPTX; - } else if (g_ascii_strcasecmp (mime_used, "application/vnd.openxmlformats-officedocument.presentationml.slideshow") == 0) { - file_type = FILE_TYPE_PPSX; - } else if (g_ascii_strcasecmp (mime_used, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") == 0) { - file_type = FILE_TYPE_XLSX; - } else { - g_message ("Mime type was not recognised:'%s'", mime_used); - file_type = FILE_TYPE_INVALID; + return FILE_TYPE_DOCX; } - g_object_unref (file_info); + /* MsOffice Powerpoint document? */ + if (g_ascii_strcasecmp (mime_used, "application/vnd.openxmlformats-officedocument.presentationml.presentation") == 0) { + return FILE_TYPE_PPTX; + } + if (g_ascii_strcasecmp (mime_used, "application/vnd.openxmlformats-officedocument.presentationml.slideshow") == 0) { + return FILE_TYPE_PPSX; + } + + /* MsOffice Excel document? */ + if (g_ascii_strcasecmp (mime_used, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") == 0) { + return FILE_TYPE_XLSX; + } + + g_message ("Mime type was not recognised:'%s'", mime_used); + return FILE_TYPE_INVALID; +} + +static void +extract_msoffice_xml (const gchar *uri, + TrackerSparqlBuilder *preupdate, + TrackerSparqlBuilder *metadata) +{ + MsOfficeXMLParserInfo info; + MsOfficeXMLFileType file_type; + TrackerConfig *config; + + GMarkupParseContext *context = NULL; + GError *error = NULL; + gulong total_bytes; + GMarkupParser parser = { + xml_start_element_handler_content_types, + xml_end_element_handler_document_data, + NULL, + NULL, + NULL + }; + + if (G_UNLIKELY (maximum_size_error_quark == 0)) { + maximum_size_error_quark = g_quark_from_static_string ("maximum_size_error"); + } + + /* Get current Content Type */ + file_type = msoffice_xml_get_file_type (uri); /* Setup conf */ config = tracker_main_get_config (); @@ -688,7 +703,7 @@ extract_msoffice_xml (const gchar *uri, tracker_gsf_parse_xml_in_zip (uri, "[Content_Types].xml", context, - &error); + &error); /* If we got any content, add it */ if (info.content) { |