diff options
Diffstat (limited to 'src/trackerd/tracker-query-tree.c')
-rw-r--r-- | src/trackerd/tracker-query-tree.c | 810 |
1 files changed, 0 insertions, 810 deletions
diff --git a/src/trackerd/tracker-query-tree.c b/src/trackerd/tracker-query-tree.c deleted file mode 100644 index 898a9a7c5..000000000 --- a/src/trackerd/tracker-query-tree.c +++ /dev/null @@ -1,810 +0,0 @@ -/* Tracker - indexer and metadata database engine - * Copyright (C) 2007-2008 Nokia - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include <glib-object.h> -#include <string.h> -#include <math.h> -#include <depot.h> -#include "tracker-query-tree.h" -#include "tracker-parser.h" -#include "tracker-utils.h" -#include "tracker-service-manager.h" - -#define TRACKER_QUERY_TREE_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_QUERY_TREE, TrackerQueryTreePrivate)) -#define MAX_HIT_BUFFER 480000 -#define SCORE_MULTIPLIER 100000 - -typedef enum OperationType OperationType; -typedef enum TreeNodeType TreeNodeType; -typedef struct TreeNode TreeNode; -typedef struct TrackerQueryTreePrivate TrackerQueryTreePrivate; -typedef struct ComposeHitsData ComposeHitsData; -typedef struct SearchHitData SearchHitData; - -enum OperationType { - OP_NONE, - OP_AND, - OP_OR -}; - -struct TreeNode { - TreeNode *left; - TreeNode *right; - OperationType op; - gchar *term; -}; - -struct TrackerQueryTreePrivate { - gchar *query_str; - TreeNode *tree; - Indexer *indexer; - GArray *services; -}; - -struct ComposeHitsData { - OperationType op; - GHashTable *other_table; - GHashTable *dest_table; -}; - -struct SearchHitData { - guint32 service_type_id; - guint32 score; -}; - -enum { - PROP_0, - PROP_QUERY, - PROP_INDEXER, - PROP_SERVICES -}; - -static void tracker_query_tree_class_init (TrackerQueryTreeClass *class); -static void tracker_query_tree_init (TrackerQueryTree *tree); -static void tracker_query_tree_finalize (GObject *object); - -static void tracker_query_tree_set_property (GObject *object, - guint prop_id, - const GValue *value, - GParamSpec *pspec); -static void tracker_query_tree_get_property (GObject *object, - guint prop_id, - GValue *value, - GParamSpec *pspec); - - -G_DEFINE_TYPE (TrackerQueryTree, tracker_query_tree, G_TYPE_OBJECT) - -static void -tracker_query_tree_class_init (TrackerQueryTreeClass *klass) -{ - GObjectClass *object_class = G_OBJECT_CLASS (klass); - - object_class->finalize = tracker_query_tree_finalize; - object_class->set_property = tracker_query_tree_set_property; - object_class->get_property = tracker_query_tree_get_property; - - g_object_class_install_property (object_class, - PROP_QUERY, - g_param_spec_string ("query", - "Query", - "Query", - NULL, - G_PARAM_READWRITE)); - g_object_class_install_property (object_class, - PROP_INDEXER, - g_param_spec_pointer ("indexer", - "Indexer", - "Indexer", - G_PARAM_READWRITE)); - g_object_class_install_property (object_class, - PROP_SERVICES, - g_param_spec_pointer ("services", - "Services", - "GArray of services", - G_PARAM_READWRITE)); - g_type_class_add_private (object_class, - sizeof (TrackerQueryTreePrivate)); -} - -static void -tracker_query_tree_init (TrackerQueryTree *tree) -{ -} - -static TreeNode * -tree_node_leaf_new (const gchar *term) -{ - TreeNode *node; - - node = g_slice_new0 (TreeNode); - node->term = g_strdup (term); - node->op = OP_NONE; - - return node; -} - -static TreeNode * -tree_node_operator_new (OperationType op) -{ - TreeNode *node; - - node = g_slice_new0 (TreeNode); - node->op = op; - - return node; -} - -static void -tree_node_free (TreeNode *node) -{ - if (!node) - return; - - /* Free string if any */ - g_free (node->term); - - /* free subnodes */ - tree_node_free (node->left); - tree_node_free (node->right); - - g_slice_free (TreeNode, node); -} - -static void -tracker_query_tree_finalize (GObject *object) -{ - TrackerQueryTreePrivate *priv; - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (object); - - tree_node_free (priv->tree); - g_free (priv->query_str); - - G_OBJECT_CLASS (tracker_query_tree_parent_class)->finalize (object); -} - -static void -tracker_query_tree_set_property (GObject *object, - guint prop_id, - const GValue *value, - GParamSpec *pspec) -{ - switch (prop_id) { - case PROP_QUERY: - tracker_query_tree_set_query (TRACKER_QUERY_TREE (object), - g_value_get_string (value)); - break; - case PROP_INDEXER: - tracker_query_tree_set_indexer (TRACKER_QUERY_TREE (object), - g_value_get_pointer (value)); - break; - case PROP_SERVICES: - tracker_query_tree_set_services (TRACKER_QUERY_TREE (object), - g_value_get_pointer (value)); - break; - default: - G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); - } -} - -static void -tracker_query_tree_get_property (GObject *object, - guint prop_id, - GValue *value, - GParamSpec *pspec) -{ - TrackerQueryTreePrivate *priv; - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (object); - - switch (prop_id) { - case PROP_QUERY: - g_value_set_string (value, priv->query_str); - break; - case PROP_INDEXER: - g_value_set_pointer (value, priv->indexer); - break; - case PROP_SERVICES: - g_value_set_pointer (value, priv->services); - break; - default: - G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); - } -} - -TrackerQueryTree * -tracker_query_tree_new (const gchar *query_str, - Indexer *indexer, - GArray *services) -{ - g_return_val_if_fail (query_str != NULL, NULL); - g_return_val_if_fail (indexer != NULL, NULL); - - return g_object_new (TRACKER_TYPE_QUERY_TREE, - "query", query_str, - "indexer", indexer, - "services", services, - NULL); -} - -# if 0 -static void -print_tree (TreeNode *node) -{ - if (!node) { - g_print ("NULL "); - return; - } - - switch (node->op) { - case OP_AND: - g_print ("AND "); - print_tree (node->left); - print_tree (node->right); - break; - case OP_OR: - g_print ("OR "); - print_tree (node->left); - print_tree (node->right); - break; - default: - g_print ("%s ", node->term); - } -} -#endif - -static void -create_query_tree (TrackerQueryTree *tree) -{ - TrackerQueryTreePrivate *priv; - TreeNode *node, *stack_node; - GQueue *queue, *stack; - gboolean last_element_is_term = FALSE; - gchar *parsed_str, **strings; - gint i; - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - - strings = g_strsplit (priv->query_str, " ", -1); - queue = g_queue_new (); - stack = g_queue_new (); - - /* Create a parse tree that recognizes queries such as: - * "foo" - * "foo and bar" - * "foo or bar" - * "foo and bar or baz" - * "foo or bar and baz" - * - * The operator "and" will have higher priority than the operator "or", - * and will also be assumed to exist if there is no operator between two - * search terms. - */ - - /* Step 1. Create polish notation for the search term, the - * stack will be used to store operators temporarily - */ - for (i = 0; strings[i]; i++) { - OperationType op; - - if (!strings[i] || !*strings[i]) - continue; - - /* get operator type */ - if (strcmp (strings[i], "and") == 0) { - op = OP_AND; - } else if (strcmp (strings [i], "or") == 0) { - op = OP_OR; - } else { - if (last_element_is_term) { - /* last element was a search term, assume the "and" - * operator between these two elements, and wait - * for actually parsing the second term */ - op = OP_AND; - i--; - } else { - op = OP_NONE; - } - } - - /* create node for the operation type */ - switch (op) { - case OP_AND: - node = tree_node_operator_new (OP_AND); - stack_node = g_queue_peek_head (stack); - - /* push in the queue operators with fewer priority, "or" in this case */ - while (stack_node && stack_node->op == OP_OR) { - stack_node = g_queue_pop_head (stack); - g_queue_push_head (queue, stack_node); - - stack_node = g_queue_peek_head (stack); - } - - g_queue_push_head (stack, node); - last_element_is_term = FALSE; - break; - case OP_OR: - node = tree_node_operator_new (OP_OR); - g_queue_push_head (stack, node); - last_element_is_term = FALSE; - break; - default: - /* search term */ - parsed_str = tracker_parse_text_to_string (strings[i], TRUE, FALSE, FALSE); - node = tree_node_leaf_new (g_strstrip (parsed_str)); - g_queue_push_head (queue, node); - last_element_is_term = TRUE; - - g_free (parsed_str); - } - } - - /* we've finished parsing, queue all remaining operators in the stack */ - while ((stack_node = g_queue_pop_head (stack)) != NULL) { - g_queue_push_head (queue, stack_node); - } - - /* step 2: run through the reverse polish notation and connect nodes to - * create a tree, the stack will be used to store temporarily nodes - * until they're connected to a parent node */ - while ((node = g_queue_pop_tail (queue)) != NULL) { - switch (node->op) { - case OP_AND: - case OP_OR: - node->left = g_queue_pop_head (stack); - node->right = g_queue_pop_head (stack); - g_queue_push_head (stack, node); - break; - default: - g_queue_push_head (stack, node); - break; - } - - priv->tree = node; - } - - g_strfreev (strings); - g_queue_free (stack); - g_queue_free (queue); -} - -void -tracker_query_tree_set_query (TrackerQueryTree *tree, - const gchar *query_str) -{ - TrackerQueryTreePrivate *priv; - gchar *str; - - g_return_if_fail (TRACKER_IS_QUERY_TREE (tree)); - g_return_if_fail (query_str != NULL); - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - - str = g_strdup (query_str); - g_free (priv->query_str); - priv->query_str = str; - - /* construct the parse tree */ - create_query_tree (tree); - - g_object_notify (G_OBJECT (tree), "query"); -} - -G_CONST_RETURN gchar * -tracker_query_tree_get_query (TrackerQueryTree *tree) -{ - TrackerQueryTreePrivate *priv; - - g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL); - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - return priv->query_str; -} - -void -tracker_query_tree_set_indexer (TrackerQueryTree *tree, - Indexer *indexer) -{ - TrackerQueryTreePrivate *priv; - - g_return_if_fail (TRACKER_IS_QUERY_TREE (tree)); - g_return_if_fail (indexer != NULL); - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - priv->indexer = indexer; - - g_object_notify (G_OBJECT (tree), "indexer"); -} - -Indexer * -tracker_query_tree_get_indexer (TrackerQueryTree *tree) -{ - TrackerQueryTreePrivate *priv; - - g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL); - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - return priv->indexer; -} - -void -tracker_query_tree_set_services (TrackerQueryTree *tree, - GArray *services) -{ - TrackerQueryTreePrivate *priv; - GArray *copy = NULL; - - g_return_if_fail (TRACKER_IS_QUERY_TREE (tree)); - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - - if (priv->services != services) { - if (services) { - copy = g_array_new (TRUE, TRUE, sizeof (gint)); - g_array_append_vals (copy, services->data, services->len); - } - - if (priv->services) - g_array_free (priv->services, TRUE); - - priv->services = copy; - g_object_notify (G_OBJECT (tree), "services"); - } -} - -GArray * -tracker_query_tree_get_services (TrackerQueryTree *tree) -{ - TrackerQueryTreePrivate *priv; - - g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL); - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - return priv->services; -} - -static void -get_tree_words (TreeNode *node, GSList **list) -{ - if (!node) - return; - - if (node->op == OP_NONE) - *list = g_slist_prepend (*list, node->term); - else { - get_tree_words (node->left, list); - get_tree_words (node->right, list); - } -} - -GSList * -tracker_query_tree_get_words (TrackerQueryTree *tree) -{ - TrackerQueryTreePrivate *priv; - GSList *list = NULL; - - g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL); - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - get_tree_words (priv->tree, &list); - - return list; -} - -static gint -get_idf_score (WordDetails *details, float idf) -{ - guint32 score = tracker_word_details_get_score (details); - float f = idf * score * SCORE_MULTIPLIER; - - return (f > 1.0) ? lrintf (f) : 1; -} - -static gboolean -in_array (GArray *array, gint element) -{ - guint i; - - if (!array) - return TRUE; - - for (i = 0; i < array->len; i++) { - if (g_array_index (array, gint, i) == element) - return TRUE; - } - - return FALSE; -} - -static void -search_hit_data_free (SearchHitData *search_hit) -{ - g_slice_free (SearchHitData, search_hit); -} - -static GHashTable * -get_search_term_hits (TrackerQueryTree *tree, - const gchar *term) -{ - TrackerQueryTreePrivate *priv; - GHashTable *result; - WordDetails *details; - guint count, i; - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - result = g_hash_table_new_full (NULL, NULL, NULL, - (GDestroyNotify) search_hit_data_free); - - details = tracker_indexer_get_word_hits (priv->indexer, term, &count); - - if (!details) - return result; - - for (i = 0; i < count; i++) { - SearchHitData *data; - gint service; - - service = tracker_word_details_get_service_type (&details[i]); - - if (in_array (priv->services, service)) { - data = g_slice_new (SearchHitData); - data->service_type_id = service; - data->score = get_idf_score (&details[i], (float) 1 / count); - - g_hash_table_insert (result, GINT_TO_POINTER (details[i].id), data); - } - } - - g_free (details); - - return result; -} - -static void -compose_hits_foreach (gpointer key, - gpointer value, - gpointer user_data) -{ - SearchHitData *hit1, *hit2, *hit; - ComposeHitsData *data; - - data = (ComposeHitsData *) user_data; - hit1 = (SearchHitData *) value; - hit2 = g_hash_table_lookup (data->other_table, key); - - if (data->op == OP_OR) { - /* compose both scores in the same entry */ - hit = g_slice_dup (SearchHitData, hit1); - - if (hit2) - hit->score += hit2->score; - - g_hash_table_insert (data->dest_table, key, hit); - } else if (data->op == OP_AND) { - /* only compose if the key is in both tables */ - if (hit2) { - hit = g_slice_dup (SearchHitData, hit1); - hit->score += hit2->score; - g_hash_table_insert (data->dest_table, key, hit); - } - } else { - g_assert_not_reached (); - } -} - -static GHashTable * -compose_hits (OperationType op, - GHashTable *left_table, - GHashTable *right_table) -{ - ComposeHitsData data; - GHashTable *foreach_table; - - data.op = op; - - /* try to run the foreach in the table with less hits */ - if (g_hash_table_size (left_table) < g_hash_table_size (right_table)) { - foreach_table = left_table; - data.other_table = right_table; - } else { - foreach_table = right_table; - data.other_table = left_table; - } - - if (op == OP_OR) { - data.dest_table = g_hash_table_ref (data.other_table); - } else { - data.dest_table = g_hash_table_new_full (NULL, NULL, NULL, - (GDestroyNotify) search_hit_data_free); - } - - g_hash_table_foreach (foreach_table, (GHFunc) compose_hits_foreach, &data); - - return data.dest_table; -} - -static GHashTable * -get_node_hits (TrackerQueryTree *tree, - TreeNode *node) -{ - GHashTable *left_table, *right_table, *result; - - if (!node) - return NULL; - - switch (node->op) { - case OP_NONE: - result = get_search_term_hits (tree, node->term); - break; - case OP_AND: - case OP_OR: - left_table = get_node_hits (tree, node->left); - right_table = get_node_hits (tree, node->right); - result = compose_hits (node->op, left_table, right_table); - - g_hash_table_unref (left_table); - g_hash_table_unref (right_table); - break; - default: - g_assert_not_reached (); - } - - return result; -} - -static void -get_hits_foreach (gpointer key, - gpointer value, - gpointer user_data) -{ - GArray *array; - TrackerSearchHit hit; - SearchHitData *hit_data; - - array = (GArray *) user_data; - hit_data = (SearchHitData *) value; - - hit.service_id = GPOINTER_TO_UINT (key); - hit.service_type_id = hit_data->service_type_id; - hit.score = hit_data->score; - - g_array_append_val (array, hit); -} - -static gint -compare_search_hits (gconstpointer a, - gconstpointer b) -{ - TrackerSearchHit *ap, *bp; - - ap = (TrackerSearchHit *) a; - bp = (TrackerSearchHit *) b; - - return (bp->score - ap->score); -} - -GArray * -tracker_query_tree_get_hits (TrackerQueryTree *tree, - guint offset, - guint limit) -{ - TrackerQueryTreePrivate *priv; - GHashTable *table; - GArray *array; - - g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL); - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - - g_return_val_if_fail (priv->tree != NULL, NULL); - - table = get_node_hits (tree, priv->tree); - array = g_array_sized_new (TRUE, TRUE, sizeof (TrackerSearchHit), - g_hash_table_size (table)); - - g_hash_table_foreach (table, (GHFunc) get_hits_foreach, array); - g_array_sort (array, compare_search_hits); - - if (offset > 0) { - g_array_remove_range (array, 0, CLAMP (offset, 0, array->len)); - } - - if (limit > 0 && limit < array->len) { - g_array_remove_range (array, limit, array->len - limit); - } - - return array; -} - -gint -tracker_query_tree_get_hit_count (TrackerQueryTree *tree) -{ - TrackerQueryTreePrivate *priv; - GHashTable *table; - gint count; - - g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), 0); - - priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree); - table = get_node_hits (tree, priv->tree); - - if (!table) - return 0; - - count = g_hash_table_size (table); - g_hash_table_destroy (table); - - return count; -} - -static void -get_hit_count_foreach (gpointer key, - gpointer value, - gpointer user_data) -{ - GArray *array = (GArray *) user_data; - TrackerHitCount count; - - count.service_type_id = GPOINTER_TO_INT (key); - count.count = GPOINTER_TO_INT (value); - - g_array_append_val (array, count); -} - -GArray * -tracker_query_tree_get_hit_counts (TrackerQueryTree *tree) -{ - GHashTable *table; - GArray *hits, *counts; - guint i; - - g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL); - - table = g_hash_table_new (NULL, NULL); - hits = tracker_query_tree_get_hits (tree, 0, 0); - counts = g_array_sized_new (TRUE, TRUE, sizeof (TrackerHitCount), 10); - - for (i = 0; i < hits->len; i++) { - TrackerSearchHit hit; - gint count, parent_id; - - hit = g_array_index (hits, TrackerSearchHit, i); - count = GPOINTER_TO_INT (g_hash_table_lookup (table, GINT_TO_POINTER (hit.service_type_id))); - count++; - - g_hash_table_insert (table, GINT_TO_POINTER (hit.service_type_id), GINT_TO_POINTER (count)); - - /* update service's parent count too (if it has a parent) */ - parent_id = tracker_service_manager_get_parent_id_for_service_id (hit.service_type_id); - - if (parent_id != -1) { - count = GPOINTER_TO_INT (g_hash_table_lookup (table, GINT_TO_POINTER (parent_id))); - count++; - - g_hash_table_insert (table, GINT_TO_POINTER (parent_id), GINT_TO_POINTER (count)); - } - } - - g_hash_table_foreach (table, (GHFunc) get_hit_count_foreach, counts); - g_array_free (hits, TRUE); - - return counts; -} |