/* * Copyright (C) 2011, Nokia * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * * Author: Martyn Russell */ #include "config.h" #include #include #include "tracker-miner-user-guides.h" #define LOCALE_FILENAME "locale-for-miner-user-guides.txt" // FIXME: get this value from tracker conf #define MAX_EXTRACT_SIZE 1024 * 1024 // 1 MiB #define MAX_TITLE_LENGTH 1000 typedef struct { gchar *uri; GString *title; gboolean in_text; gboolean in_title; GString *plain_text; gssize max_length; } ParserContext; typedef struct { TrackerMinerFS *miner; GFile *file; TrackerSparqlBuilder *sparql; GCancellable *cancellable; gchar *type; } ProcessUserguideData; static void miner_userguides_initable_iface_init (GInitableIface *iface); static gboolean miner_userguides_initable_init (GInitable *initable, GCancellable *cancellable, GError **error); static gboolean miner_userguides_process_file (TrackerMinerFS *fs, GFile *file, TrackerSparqlBuilder *sparql, GCancellable *cancellable); static gboolean miner_userguides_process_file_attributes (TrackerMinerFS *fs, GFile *file, TrackerSparqlBuilder *sparql, GCancellable *cancellable); static void parser_get_file_content (const gchar *uri, gssize max_extract_size, gchar **content, gchar **title); static GQuark miner_userguides_error_quark = 0; static GInitableIface* miner_userguides_initable_parent_iface; G_DEFINE_TYPE_WITH_CODE (TrackerMinerUserguides, tracker_miner_userguides, TRACKER_TYPE_MINER_FS, G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE, miner_userguides_initable_iface_init)); static void tracker_miner_userguides_class_init (TrackerMinerUserguidesClass *klass) { TrackerMinerFSClass *miner_fs_class = TRACKER_MINER_FS_CLASS (klass); miner_fs_class->process_file = miner_userguides_process_file; miner_fs_class->process_file_attributes = miner_userguides_process_file_attributes; miner_userguides_error_quark = g_quark_from_static_string ("TrackerMinerUserguides"); } static void tracker_miner_userguides_init (TrackerMinerUserguides *ma) { } static void miner_userguides_initable_iface_init (GInitableIface *iface) { miner_userguides_initable_parent_iface = g_type_interface_peek_parent (iface); iface->init = miner_userguides_initable_init; } static inline gboolean miner_userguides_basedir_add_path (TrackerMinerFS *fs, const gchar *path, const gchar *locale) { if (g_file_test (path, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR)) { TrackerIndexingTree *indexing_tree; GFile *file; indexing_tree = tracker_miner_fs_get_indexing_tree (fs); g_message (" Adding:'%s'", path); file = g_file_new_for_path (path); tracker_indexing_tree_add (indexing_tree, file, TRACKER_DIRECTORY_FLAG_RECURSE | TRACKER_DIRECTORY_FLAG_MONITOR | TRACKER_DIRECTORY_FLAG_CHECK_MTIME, "tracker-miner-user-guides"); g_object_unref (file); return TRUE; } g_message (" No user guide found for locale:'%s' in this prefix", locale); return FALSE; } static void miner_userguides_basedir_add (TrackerMinerFS *fs, const gchar *basedir) { gchar *path; gboolean added = FALSE; /* Do preliminary check on basedir */ path = g_build_filename (basedir, "userguide", "contents", NULL); if (!g_file_test (path, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR)) { g_message (" No user guides in prefix:'%s'", path); g_free (path); return; } g_free (path); /* Without MeeGoTouch, we simply index ALL content. */ #ifdef HAVE_MEEGOTOUCH gchar *locale; locale = tracker_meego_get_locale (); /* Order of which we try here: * 1, make sure locale is set, otherwise default to 'en' * 2, try the "xx_YY" of the current locale * 3, try the "xx" of the current locale * 4, default to 'en' for cases where current locale has no userguide */ /* Step 1 */ if (locale == NULL || *locale == '\0') { g_message (" Locale was not set which is unexpected, defaulting to 'en'"); g_free (locale); locale = g_strdup ("en"); } /* Step 2 */ path = g_build_filename (basedir, "userguide", "contents", locale, NULL); added = miner_userguides_basedir_add_path (fs, path, locale); g_free (path); /* Step 3 */ if (!added && strlen (locale) > 2) { locale[2] = '\0'; path = g_build_filename (basedir, "userguide", "contents", locale, NULL); added = miner_userguides_basedir_add_path (fs, path, locale); g_free (path); } /* Step 4 */ if (!added) { g_message (" Locale has no user guide currently, defaulting to 'en'"); path = g_build_filename (basedir, "userguide", "contents", "en", NULL); added = miner_userguides_basedir_add_path (fs, path, "en"); g_free (path); } if (!added) { g_message (" Default locale 'en' has no user guide in this prefix"); } g_free (locale); #endif /* HAVE_MEEGOTOUCH */ if (!added) { /* Add $dir/userguide/contents */ g_message (" MeeGoTouch is disabled, indexing all user guides..."); path = g_build_filename (basedir, "userguide", "contents", NULL); miner_userguides_basedir_add_path (fs, path, ""); g_free (path); } } static void miner_userguides_add_directories (TrackerMinerFS *fs) { const gchar * const *xdg_dirs; gint i; g_message ("Setting up user guides to iterate from XDG system directories"); /* Add all XDG system and local dirs */ xdg_dirs = g_get_system_data_dirs (); for (i = 0; xdg_dirs[i]; i++) { miner_userguides_basedir_add (fs, xdg_dirs[i]); } } static void miner_finished_cb (TrackerMinerFS *fs, gdouble seconds_elapsed, guint total_directories_found, guint total_directories_ignored, guint total_files_found, guint total_files_ignored, gpointer user_data) { /* Save locale, if it changes the variation in the desktop * file languages needs to be re-indexed. */ GError *error = NULL; gchar *locale = tracker_locale_get (TRACKER_LOCALE_LANGUAGE); gchar *locale_file = g_build_filename (g_get_user_cache_dir (), "tracker", LOCALE_FILENAME, NULL); g_message ("Saving locale used to index applications"); g_message (" Creating locale file '%s'", locale_file); if (locale == NULL) { locale = g_strdup (""); } if (!g_file_set_contents (locale_file, locale, -1, &error)) { g_message (" Could not set file contents, %s", error ? error->message : "no error given"); g_clear_error (&error); } g_free (locale); g_free (locale_file); } /* If a reset is requested, we will remove from the store all items previously * inserted by the tracker-miner-userguides, this is: * (a) Remove all resources which are a nfo:HelpDocument * (b) Remove all unnecessary directories */ static void miner_userguides_reset (TrackerMiner *miner) { GError *error = NULL; TrackerSparqlBuilder *sparql; sparql = tracker_sparql_builder_new_update (); /* (a) Remove all resources which are a nfo:HelpDocument */ tracker_sparql_builder_delete_open (sparql, TRACKER_OWN_GRAPH_URN); tracker_sparql_builder_subject_variable (sparql, "userguide"); tracker_sparql_builder_predicate (sparql, "a"); tracker_sparql_builder_object (sparql, "rdfs:Resource"); tracker_sparql_builder_delete_close (sparql); tracker_sparql_builder_where_open (sparql); tracker_sparql_builder_subject_variable (sparql, "userguide"); tracker_sparql_builder_predicate (sparql, "a"); tracker_sparql_builder_object (sparql, "nfo:HelpDocument"); tracker_sparql_builder_where_close (sparql); /* (b) Remove all unnecessary directories */ /* TODO: Finish */ /* Execute a sync update, we don't want the userguides miner to start before * we finish this. */ tracker_sparql_connection_update (tracker_miner_get_connection (miner), tracker_sparql_builder_get_result (sparql), G_PRIORITY_HIGH, NULL, &error); if (error) { /* Some error happened performing the query, not good */ g_critical ("Couldn't reset mined userguides: %s", error ? error->message : "unknown error"); g_error_free (error); } g_object_unref (sparql); } static gboolean detect_locale_changed (TrackerMiner *miner) { gchar *locale_file; gchar *previous_locale = NULL; gchar *current_locale; gboolean changed; locale_file = g_build_filename (g_get_user_cache_dir (), "tracker", LOCALE_FILENAME, NULL); if (G_LIKELY (g_file_test (locale_file, G_FILE_TEST_EXISTS))) { gchar *contents; /* Check locale is correct */ if (G_LIKELY (g_file_get_contents (locale_file, &contents, NULL, NULL))) { if (contents && contents[0] == '\0') { g_critical (" Empty locale file found at '%s'", locale_file); g_free (contents); } else { /* Re-use contents */ previous_locale = contents; } } else { g_critical (" Could not get content of file '%s'", locale_file); } } else { g_message (" Could not find locale file:'%s'", locale_file); } g_free (locale_file); current_locale = tracker_locale_get (TRACKER_LOCALE_LANGUAGE); /* Note that having both to NULL is actually valid, they would default * to the unicode collation without locale-specific stuff. */ if (g_strcmp0 (previous_locale, current_locale) != 0) { g_message ("Locale change detected from '%s' to '%s'...", previous_locale, current_locale); changed = TRUE; } else { g_message ("Current and previous locales match: '%s'", previous_locale); changed = FALSE; } g_free (current_locale); g_free (previous_locale); if (changed) { g_message ("Locale change detected, so resetting miner to " "remove all previously created items..."); miner_userguides_reset (miner); } return changed; } static gboolean miner_userguides_initable_init (GInitable *initable, GCancellable *cancellable, GError **error) { TrackerMinerFS *fs; GError *inner_error = NULL; TrackerIndexingTree *indexing_tree; fs = TRACKER_MINER_FS (initable); indexing_tree = tracker_miner_fs_get_indexing_tree (fs); /* Set up files filter, deny every file, but * those with a .desktop/directory extension */ tracker_indexing_tree_set_default_policy (indexing_tree, TRACKER_FILTER_FILE, TRACKER_FILTER_POLICY_DENY); tracker_indexing_tree_add_filter (indexing_tree, TRACKER_FILTER_FILE, "*.html"); /* Chain up parent's initable callback before calling child's one */ if (!miner_userguides_initable_parent_iface->init (initable, cancellable, &inner_error)) { g_propagate_error (error, inner_error); return FALSE; } g_signal_connect (fs, "finished", G_CALLBACK (miner_finished_cb), NULL); miner_userguides_add_directories (fs); /* If the locales changed, we need to reset things first */ detect_locale_changed (TRACKER_MINER (fs)); return TRUE; } static const gchar * get_file_urn (TrackerMinerFS *miner, GFile *file, gboolean *is_iri) { const gchar *urn; urn = tracker_miner_fs_get_urn (miner, file); *is_iri = TRUE; if (!urn) { /* This is a new insertion, use anonymous URNs to store files */ urn = "_:file"; *is_iri = FALSE; } return urn; } static inline void process_item (ProcessUserguideData *data, GFileInfo *file_info, gboolean is_dir, GError **error) { TrackerSparqlBuilder *sparql; gchar *uri; const gchar *mime_type; const gchar *urn; const gchar *parent_urn; gboolean is_iri; guint64 time_; sparql = data->sparql; uri = g_file_get_uri (data->file); mime_type = g_file_info_get_content_type (file_info); urn = get_file_urn (data->miner, data->file, &is_iri); tracker_sparql_builder_insert_silent_open (sparql, NULL); tracker_sparql_builder_graph_open (sparql, TRACKER_OWN_GRAPH_URN); if (is_iri) { tracker_sparql_builder_subject_iri (sparql, urn); } else { tracker_sparql_builder_subject (sparql, urn); } tracker_sparql_builder_predicate (sparql, "a"); tracker_sparql_builder_object (sparql, "nfo:FileDataObject"); tracker_sparql_builder_object (sparql, "nie:InformationElement"); if (is_dir) { tracker_sparql_builder_object (sparql, "nfo:Folder"); } else { tracker_sparql_builder_object (sparql, "nfo:HelpDocument"); } parent_urn = tracker_miner_fs_get_parent_urn (TRACKER_MINER_FS (data->miner), data->file); if (parent_urn) { tracker_sparql_builder_predicate (sparql, "nfo:belongsToContainer"); tracker_sparql_builder_object_iri (sparql, parent_urn); } tracker_sparql_builder_predicate (sparql, "nfo:fileName"); tracker_sparql_builder_object_string (sparql, g_file_info_get_display_name (file_info)); tracker_sparql_builder_predicate (sparql, "nfo:fileSize"); tracker_sparql_builder_object_int64 (sparql, g_file_info_get_size (file_info)); time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_MODIFIED); tracker_sparql_builder_predicate (sparql, "nfo:fileLastModified"); tracker_sparql_builder_object_date (sparql, (time_t *) &time_); time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_ACCESS); tracker_sparql_builder_predicate (sparql, "nfo:fileLastAccessed"); tracker_sparql_builder_object_date (sparql, (time_t *) &time_); /* Laying the link between the IE and the DO. We use IE = DO */ tracker_sparql_builder_predicate (sparql, "nie:isStoredAs"); if (is_iri) { tracker_sparql_builder_object_iri (sparql, urn); } else { tracker_sparql_builder_object (sparql, urn); } /* The URL of the DataObject (because IE = DO, this is correct) */ tracker_sparql_builder_predicate (sparql, "nie:url"); tracker_sparql_builder_object_string (sparql, uri); tracker_sparql_builder_predicate (sparql, "nie:mimeType"); tracker_sparql_builder_object_string (sparql, mime_type); /* FIXME: Add nie:dataSource for switching different userguides? */ tracker_sparql_builder_predicate (sparql, "tracker:available"); tracker_sparql_builder_object_boolean (sparql, TRUE); if (!is_dir) { gchar *content = NULL; gchar *title = NULL; /* Get content */ parser_get_file_content (uri, MAX_EXTRACT_SIZE, &content, &title); g_message ("Adding userguide:'%s', uri:'%s'", title, uri); if (title && title[0]) { tracker_sparql_builder_predicate (sparql, "nie:title"); tracker_sparql_builder_object_unvalidated (sparql, title); } if (content && content[0]) { tracker_sparql_builder_predicate (sparql, "nie:plainTextContent"); tracker_sparql_builder_object_unvalidated (sparql, content); } g_free (content); g_free (title); } else { g_message ("Adding userguide directory:'%s'", uri); } tracker_sparql_builder_graph_close (sparql); tracker_sparql_builder_insert_close (sparql); g_free (uri); } static void process_userguide_data_free (ProcessUserguideData *data) { g_object_unref (data->miner); g_object_unref (data->file); g_object_unref (data->sparql); g_object_unref (data->cancellable); g_slice_free (ProcessUserguideData, data); } static void process_file_cb (GObject *object, GAsyncResult *result, gpointer user_data) { ProcessUserguideData *data; GFileInfo *file_info; GError *error = NULL; GFile *file; gboolean is_dir; data = user_data; file = G_FILE (object); file_info = g_file_query_info_finish (file, result, &error); if (error) { tracker_miner_fs_file_notify (TRACKER_MINER_FS (data->miner), file, error); process_userguide_data_free (data); g_error_free (error); return; } is_dir = g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY; process_item (data, file_info, is_dir, &error); tracker_miner_fs_file_notify (TRACKER_MINER_FS (data->miner), data->file, error); process_userguide_data_free (data); if (error) { g_error_free (error); } if (file_info) { g_object_unref (file_info); } } static gboolean miner_userguides_process_file (TrackerMinerFS *fs, GFile *file, TrackerSparqlBuilder *sparql, GCancellable *cancellable) { ProcessUserguideData *data; const gchar *attrs; data = g_slice_new0 (ProcessUserguideData); data->miner = g_object_ref (fs); data->sparql = g_object_ref (sparql); data->file = g_object_ref (file); data->cancellable = g_object_ref (cancellable); attrs = G_FILE_ATTRIBUTE_STANDARD_TYPE "," G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE "," G_FILE_ATTRIBUTE_STANDARD_DISPLAY_NAME "," G_FILE_ATTRIBUTE_STANDARD_SIZE "," G_FILE_ATTRIBUTE_TIME_MODIFIED "," G_FILE_ATTRIBUTE_TIME_ACCESS; g_file_query_info_async (file, attrs, G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, G_PRIORITY_DEFAULT, cancellable, process_file_cb, data); return TRUE; } static gboolean miner_userguides_process_file_attributes (TrackerMinerFS *fs, GFile *file, TrackerSparqlBuilder *sparql, GCancellable *cancellable) { gchar *uri; /* We don't care about file attribute changes here */ uri = g_file_get_uri (file); g_debug ("Ignoring file attribute changes in '%s'", uri); g_free (uri); return FALSE; } static void parser_start_element (void *ctx, const xmlChar *name, const xmlChar **atts G_GNUC_UNUSED) { const gchar *elem = (const gchar *) name; ParserContext *pctx = ctx; if (g_strcmp0 (elem, "title") == 0) { pctx->in_title = TRUE; return; } if (g_strcmp0 (elem, "body") == 0) { pctx->in_text = TRUE; return; } } static void parser_end_element (void *ctx, const xmlChar *name) { const gchar *elem = (const gchar *) name; ParserContext *pctx = ctx; if (g_strcmp0 (elem, "title") == 0) { pctx->in_title = FALSE; } } static void parser_characters (void *ctx, const xmlChar *ch, int len G_GNUC_UNUSED) { ParserContext *pctx = ctx; const gchar *str; int len_to_append; if (pctx->in_title) { const gchar *title = (const gchar *) ch; if (title[0]) { g_string_append_len (pctx->title, title, MIN(strlen (title), MAX_TITLE_LENGTH)); } } if (!pctx->in_text) { return; } if ((gssize) pctx->plain_text->len >= pctx->max_length) { return; } str = (const gchar *) ch; if (!str || !str[0]) { return; } len_to_append = strlen (str); if ((gssize) pctx->plain_text->len + len_to_append > pctx->max_length) { len_to_append = pctx->max_length - pctx->plain_text->len; } g_string_append_len (pctx->plain_text, str, len_to_append); } static void parser_error (void *ctx, const char *msg, ...) { ParserContext *pctx = ctx; va_list args; gchar *str; va_start (args, msg); str = g_strdup_vprintf (msg, args); va_end (args); g_critical ("Could not parse file '%s': %s", pctx->uri, str); g_free (str); } static void parser_get_file_content (const gchar *uri, gssize max_extract_size, gchar **content, gchar **title) { GError *error = NULL; gchar *filename; ParserContext parser_ctx = { 0 }; htmlSAXHandler sax_handler = { 0 }; htmlDocPtr doc; /* TODO: utf8 sanitization */ filename = g_filename_from_uri (uri, NULL, &error); if (error) { g_message ("Could not open '%s': %s", uri, error->message); g_error_free (error); g_free (filename); return; } parser_ctx.uri = g_strdup (uri); parser_ctx.title = g_string_new (NULL); parser_ctx.in_text = FALSE; parser_ctx.in_title = FALSE; parser_ctx.plain_text = g_string_new (NULL); /* leave space for terminating 0 char */ parser_ctx.max_length = max_extract_size - 1; sax_handler.startElement = parser_start_element; sax_handler.endElement = parser_end_element; sax_handler.characters = parser_characters; sax_handler.error = parser_error; doc = NULL; doc = htmlSAXParseFile (filename, "utf-8", &sax_handler, &parser_ctx); g_free (filename); if (doc) { xmlFreeDoc (doc); } g_free (parser_ctx.uri); *title = g_string_free (parser_ctx.title, FALSE); g_strstrip (*title); *content = g_string_free (parser_ctx.plain_text, FALSE); g_strstrip (*content); } TrackerMiner * tracker_miner_userguides_new (GError **error) { return g_initable_new (TRACKER_TYPE_MINER_USERGUIDES, NULL, error, "name", "Userguides", "processing-pool-wait-limit", 10, "processing-pool-ready-limit", 100, NULL); }