From 896c8d489c542f1f2f4f802f616ad2ea447327b6 Mon Sep 17 00:00:00 2001 From: Richard Hughes Date: Mon, 14 Nov 2016 20:06:16 +0000 Subject: Deduplicate the AsNode attribute key and value using a hash table This saves about 1Mb of RSS when loading the Fedora AppStream file. --- Makefile.am | 6 +++ libappstream-glib/as-node.c | 121 ++++++++++++++++++++++++++++++++------------ 2 files changed, 96 insertions(+), 31 deletions(-) diff --git a/Makefile.am b/Makefile.am index a513f48..2f116eb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -81,6 +81,12 @@ DISTCHECK_CONFIGURE_FLAGS = \ --enable-builder \ --with-bashcompletiondir=/${prefix}/share/bash-completion/completions +massif: + make && \ + time ./client/appstream-util search gimp --help > /dev/null && \ + valgrind --massif-out-file=massif.out --tool=massif ./client/.libs/lt-appstream-util search gimp --profile && \ + ms_print massif.out | head -n 30 + .PHONY: ChangeLog -include $(top_srcdir)/git.mk diff --git a/libappstream-glib/as-node.c b/libappstream-glib/as-node.c index cbf2364..127081e 100644 --- a/libappstream-glib/as-node.c +++ b/libappstream-glib/as-node.c @@ -41,19 +41,30 @@ #include "as-node-private.h" #include "as-utils-private.h" +typedef struct { + GHashTable *interned_hash; +} AsNodeRoot; + typedef struct { - GList *attrs; - gchar *name; /* only used if tag == AS_TAG_UNKNOWN */ - gchar *cdata; - guint8 cdata_escaped:1; - guint8 cdata_ignore:1; - AsTag tag; + GList *attrs; + union { + AsTag tag; + gchar *name; /* only used if tag_is_valid = FALSE */ + }; + union { + AsNodeRoot *root; /* only used if is_root_node = TRUE */ + gchar *cdata; + }; + guint8 is_root_node:1; + guint8 cdata_escaped:1; + guint8 cdata_ignore:1; + guint8 tag_is_valid:1; } AsNodeData; typedef struct { - const gchar *key; - gchar *value; + const gchar *key; + const gchar *value; } AsNodeAttr; /** @@ -71,23 +82,45 @@ as_node_new (void) AsNodeData *data; data = g_slice_new0 (AsNodeData); data->tag = AS_TAG_LAST; + data->tag_is_valid = TRUE; + data->is_root_node = TRUE; + data->root = g_new0 (AsNodeRoot, 1); + data->root->interned_hash = g_hash_table_new_full (g_str_hash, + g_str_equal, + g_free, + NULL); return g_node_new (data); } static void as_node_attr_free (AsNodeAttr *attr) { - g_free (attr->value); g_slice_free (AsNodeAttr, attr); } +static const gchar * +as_node_intern (GHashTable *hash, const gchar *key) +{ + gchar *value = g_hash_table_lookup (hash, key); + if (value == NULL) { + value = g_strdup (key); + g_hash_table_insert (hash, value, value); + } + return value; +} + static AsNodeAttr * -as_node_attr_insert (AsNodeData *data, const gchar *key, const gchar *value) +as_node_attr_insert (AsNode *root, + AsNodeData *data, + const gchar *key, + const gchar *value) { AsNodeAttr *attr; + AsNodeRoot *root_data = ((AsNodeData *)root->data)->root; + attr = g_slice_new0 (AsNodeAttr); - attr->key = g_intern_string (key); - attr->value = g_strdup (value); + attr->key = as_node_intern (root_data->interned_hash, key); + attr->value = as_node_intern (root_data->interned_hash, value); data->attrs = g_list_prepend (data->attrs, attr); return attr; } @@ -122,8 +155,14 @@ as_node_destroy_node_cb (AsNode *node, gpointer user_data) AsNodeData *data = node->data; if (data == NULL) return FALSE; - g_free (data->name); - g_free (data->cdata); + if (!data->tag_is_valid) + g_free (data->name); + if (data->is_root_node) { + g_hash_table_unref (data->root->interned_hash); + g_free (data->root); + } else { + g_free (data->cdata); + } g_list_free_full (data->attrs, (GDestroyNotify) as_node_attr_free); g_slice_free (AsNodeData, data); return FALSE; @@ -183,6 +222,8 @@ as_node_string_replace_inplace (gchar *text, static void as_node_cdata_to_raw (AsNodeData *data) { + if (data->is_root_node) + return; if (!data->cdata_escaped) return; as_node_string_replace_inplace (data->cdata, "&", '&'); @@ -195,6 +236,8 @@ static void as_node_cdata_to_escaped (AsNodeData *data) { GString *str; + if (data->is_root_node) + return; if (data->cdata_escaped) return; str = g_string_new (data->cdata); @@ -254,7 +297,7 @@ as_node_get_attr_string (AsNodeData *data) static const gchar * as_tag_data_get_name (AsNodeData *data) { - if (data->name == NULL) + if (data->tag_is_valid) return as_tag_to_string (data->tag); return data->name; } @@ -264,12 +307,18 @@ as_node_data_set_name (AsNodeData *data, const gchar *name, AsNodeInsertFlags fl { if ((flags & AS_NODE_INSERT_FLAG_MARK_TRANSLATABLE) == 0) { /* only store the name if the tag is not recognised */ - data->tag = as_tag_from_string (name); - if (data->tag == AS_TAG_UNKNOWN) + AsTag tag = as_tag_from_string (name); + if (tag == AS_TAG_UNKNOWN) { data->name = g_strdup (name); + data->tag_is_valid = FALSE; + } else { + data->tag = tag; + data->tag_is_valid = TRUE; + } } else { /* always store the translated tag */ data->name = g_strdup_printf ("_%s", name); + data->tag_is_valid = FALSE; } } @@ -517,9 +566,11 @@ as_node_start_element_cb (GMarkupParseContext *context, /* create the new node data */ if (!data->cdata_ignore) { + AsNode *root = g_node_get_root (helper->current); + g_assert (root != NULL); as_node_data_set_name (data, element_name, AS_NODE_INSERT_FLAG_NONE); for (i = 0; attribute_names[i] != NULL; i++) { - as_node_attr_insert (data, + as_node_attr_insert (root, data, attribute_names[i], attribute_values[i]); } @@ -929,8 +980,10 @@ as_node_set_name (AsNode *node, const gchar *name) return; /* overwrite */ - g_free (data->name); - data->name = NULL; + if (!data->tag_is_valid) { + g_free (data->name); + data->name = NULL; + } as_node_data_set_name (data, name, AS_NODE_INSERT_FLAG_NONE); } @@ -952,6 +1005,8 @@ as_node_get_data (const AsNode *node) if (node->data == NULL) return NULL; data = (AsNodeData *) node->data; + if (data->is_root_node) + return NULL; if (data->cdata == NULL || data->cdata[0] == '\0') return NULL; as_node_cdata_to_raw (data); @@ -997,7 +1052,7 @@ as_node_get_tag (const AsNode *node) return AS_TAG_UNKNOWN; /* try to match with a fallback */ - if (data->tag == AS_TAG_UNKNOWN) { + if (!data->tag_is_valid) { tmp = as_tag_data_get_name (data); return as_tag_from_string_full (tmp, AS_TAG_FLAG_USE_FALLBACKS); } @@ -1028,6 +1083,8 @@ as_node_set_data (AsNode *node, return; data = (AsNodeData *) node->data; + if (data->is_root_node) + return; g_free (data->cdata); data->cdata = g_strdup (cdata); data->cdata_escaped = insert_flags & AS_NODE_INSERT_FLAG_PRE_ESCAPED; @@ -1067,6 +1124,8 @@ as_node_take_data (const AsNode *node) if (node->data == NULL) return NULL; data = (AsNodeData *) node->data; + if (data->is_root_node) + return NULL; if (data->cdata == NULL || data->cdata[0] == '\0') return NULL; as_node_cdata_to_raw (data); @@ -1185,9 +1244,7 @@ as_node_take_attribute (const AsNode *node, const gchar *key) attr = as_node_attr_find (data, key); if (attr == NULL) return NULL; - tmp = attr->value; - attr->value = NULL; - return tmp; + return g_strdup (attr->value); } /** @@ -1234,7 +1291,7 @@ as_node_add_attribute (AsNode *node, const gchar *value) { AsNodeData *data; - AsNodeAttr *attr; + AsNode *root = g_node_get_root (node); g_return_if_fail (node != NULL); g_return_if_fail (key != NULL); @@ -1242,8 +1299,7 @@ as_node_add_attribute (AsNode *node, if (node->data == NULL) return; data = (AsNodeData *) node->data; - attr = as_node_attr_insert (data, key, NULL); - attr->value = g_strdup (value); + as_node_attr_insert (root, data, key, value); } /** @@ -1393,6 +1449,7 @@ as_node_insert (AsNode *parent, const gchar *key; const gchar *value; AsNodeData *data; + AsNode *root = g_node_get_root (parent); guint i; va_list args; @@ -1417,7 +1474,7 @@ as_node_insert (AsNode *parent, value = va_arg (args, const gchar *); if (value == NULL) break; - as_node_attr_insert (data, key, value); + as_node_attr_insert (root, data, key, value); } va_end (args); @@ -1448,6 +1505,7 @@ as_node_insert_localized (AsNode *parent, AsNodeInsertFlags insert_flags) { AsNodeData *data; + AsNode *root = g_node_get_root (parent); GList *l; const gchar *key; const gchar *value; @@ -1485,7 +1543,7 @@ as_node_insert_localized (AsNode *parent, g_strcmp0 (value_c, value) == 0) continue; data = g_slice_new0 (AsNodeData); - as_node_attr_insert (data, "xml:lang", key); + as_node_attr_insert (root, data, "xml:lang", key); as_node_data_set_name (data, name, insert_flags); if (insert_flags & AS_NODE_INSERT_FLAG_NO_MARKUP) { data->cdata = as_markup_convert_simple (value, NULL); @@ -1518,6 +1576,7 @@ as_node_insert_hash (AsNode *parent, AsNodeInsertFlags insert_flags) { AsNodeData *data; + AsNode *root = g_node_get_root (parent); GList *l; GList *list; const gchar *key; @@ -1537,10 +1596,10 @@ as_node_insert_hash (AsNode *parent, data->cdata_escaped = insert_flags & AS_NODE_INSERT_FLAG_PRE_ESCAPED; if (!swapped) { if (key != NULL && key[0] != '\0') - as_node_attr_insert (data, attr_key, key); + as_node_attr_insert (root, data, attr_key, key); } else { if (value != NULL && value[0] != '\0') - as_node_attr_insert (data, attr_key, value); + as_node_attr_insert (root, data, attr_key, value); } g_node_insert_data (parent, -1, data); } -- cgit v1.2.1