summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Hughes <richard@hughsie.com>2016-11-14 20:06:16 +0000
committerRichard Hughes <richard@hughsie.com>2016-11-14 20:06:16 +0000
commit896c8d489c542f1f2f4f802f616ad2ea447327b6 (patch)
tree7833575f53b8a03901982d5e39bc7ff748fb9d97
parent0d0b735d8a50593c4472c7e86a82e4f78e007797 (diff)
downloadappstream-glib-896c8d489c542f1f2f4f802f616ad2ea447327b6.tar.gz
Deduplicate the AsNode attribute key and value using a hash table
This saves about 1Mb of RSS when loading the Fedora AppStream file.
-rw-r--r--Makefile.am6
-rw-r--r--libappstream-glib/as-node.c121
2 files changed, 96 insertions, 31 deletions
diff --git a/Makefile.am b/Makefile.am
index a513f48..2f116eb 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -81,6 +81,12 @@ DISTCHECK_CONFIGURE_FLAGS = \
--enable-builder \
--with-bashcompletiondir=/${prefix}/share/bash-completion/completions
+massif:
+ make && \
+ time ./client/appstream-util search gimp --help > /dev/null && \
+ valgrind --massif-out-file=massif.out --tool=massif ./client/.libs/lt-appstream-util search gimp --profile && \
+ ms_print massif.out | head -n 30
+
.PHONY: ChangeLog
-include $(top_srcdir)/git.mk
diff --git a/libappstream-glib/as-node.c b/libappstream-glib/as-node.c
index cbf2364..127081e 100644
--- a/libappstream-glib/as-node.c
+++ b/libappstream-glib/as-node.c
@@ -41,19 +41,30 @@
#include "as-node-private.h"
#include "as-utils-private.h"
+typedef struct {
+ GHashTable *interned_hash;
+} AsNodeRoot;
+
typedef struct
{
- GList *attrs;
- gchar *name; /* only used if tag == AS_TAG_UNKNOWN */
- gchar *cdata;
- guint8 cdata_escaped:1;
- guint8 cdata_ignore:1;
- AsTag tag;
+ GList *attrs;
+ union {
+ AsTag tag;
+ gchar *name; /* only used if tag_is_valid = FALSE */
+ };
+ union {
+ AsNodeRoot *root; /* only used if is_root_node = TRUE */
+ gchar *cdata;
+ };
+ guint8 is_root_node:1;
+ guint8 cdata_escaped:1;
+ guint8 cdata_ignore:1;
+ guint8 tag_is_valid:1;
} AsNodeData;
typedef struct {
- const gchar *key;
- gchar *value;
+ const gchar *key;
+ const gchar *value;
} AsNodeAttr;
/**
@@ -71,23 +82,45 @@ as_node_new (void)
AsNodeData *data;
data = g_slice_new0 (AsNodeData);
data->tag = AS_TAG_LAST;
+ data->tag_is_valid = TRUE;
+ data->is_root_node = TRUE;
+ data->root = g_new0 (AsNodeRoot, 1);
+ data->root->interned_hash = g_hash_table_new_full (g_str_hash,
+ g_str_equal,
+ g_free,
+ NULL);
return g_node_new (data);
}
static void
as_node_attr_free (AsNodeAttr *attr)
{
- g_free (attr->value);
g_slice_free (AsNodeAttr, attr);
}
+static const gchar *
+as_node_intern (GHashTable *hash, const gchar *key)
+{
+ gchar *value = g_hash_table_lookup (hash, key);
+ if (value == NULL) {
+ value = g_strdup (key);
+ g_hash_table_insert (hash, value, value);
+ }
+ return value;
+}
+
static AsNodeAttr *
-as_node_attr_insert (AsNodeData *data, const gchar *key, const gchar *value)
+as_node_attr_insert (AsNode *root,
+ AsNodeData *data,
+ const gchar *key,
+ const gchar *value)
{
AsNodeAttr *attr;
+ AsNodeRoot *root_data = ((AsNodeData *)root->data)->root;
+
attr = g_slice_new0 (AsNodeAttr);
- attr->key = g_intern_string (key);
- attr->value = g_strdup (value);
+ attr->key = as_node_intern (root_data->interned_hash, key);
+ attr->value = as_node_intern (root_data->interned_hash, value);
data->attrs = g_list_prepend (data->attrs, attr);
return attr;
}
@@ -122,8 +155,14 @@ as_node_destroy_node_cb (AsNode *node, gpointer user_data)
AsNodeData *data = node->data;
if (data == NULL)
return FALSE;
- g_free (data->name);
- g_free (data->cdata);
+ if (!data->tag_is_valid)
+ g_free (data->name);
+ if (data->is_root_node) {
+ g_hash_table_unref (data->root->interned_hash);
+ g_free (data->root);
+ } else {
+ g_free (data->cdata);
+ }
g_list_free_full (data->attrs, (GDestroyNotify) as_node_attr_free);
g_slice_free (AsNodeData, data);
return FALSE;
@@ -183,6 +222,8 @@ as_node_string_replace_inplace (gchar *text,
static void
as_node_cdata_to_raw (AsNodeData *data)
{
+ if (data->is_root_node)
+ return;
if (!data->cdata_escaped)
return;
as_node_string_replace_inplace (data->cdata, "&amp;", '&');
@@ -195,6 +236,8 @@ static void
as_node_cdata_to_escaped (AsNodeData *data)
{
GString *str;
+ if (data->is_root_node)
+ return;
if (data->cdata_escaped)
return;
str = g_string_new (data->cdata);
@@ -254,7 +297,7 @@ as_node_get_attr_string (AsNodeData *data)
static const gchar *
as_tag_data_get_name (AsNodeData *data)
{
- if (data->name == NULL)
+ if (data->tag_is_valid)
return as_tag_to_string (data->tag);
return data->name;
}
@@ -264,12 +307,18 @@ as_node_data_set_name (AsNodeData *data, const gchar *name, AsNodeInsertFlags fl
{
if ((flags & AS_NODE_INSERT_FLAG_MARK_TRANSLATABLE) == 0) {
/* only store the name if the tag is not recognised */
- data->tag = as_tag_from_string (name);
- if (data->tag == AS_TAG_UNKNOWN)
+ AsTag tag = as_tag_from_string (name);
+ if (tag == AS_TAG_UNKNOWN) {
data->name = g_strdup (name);
+ data->tag_is_valid = FALSE;
+ } else {
+ data->tag = tag;
+ data->tag_is_valid = TRUE;
+ }
} else {
/* always store the translated tag */
data->name = g_strdup_printf ("_%s", name);
+ data->tag_is_valid = FALSE;
}
}
@@ -517,9 +566,11 @@ as_node_start_element_cb (GMarkupParseContext *context,
/* create the new node data */
if (!data->cdata_ignore) {
+ AsNode *root = g_node_get_root (helper->current);
+ g_assert (root != NULL);
as_node_data_set_name (data, element_name, AS_NODE_INSERT_FLAG_NONE);
for (i = 0; attribute_names[i] != NULL; i++) {
- as_node_attr_insert (data,
+ as_node_attr_insert (root, data,
attribute_names[i],
attribute_values[i]);
}
@@ -929,8 +980,10 @@ as_node_set_name (AsNode *node, const gchar *name)
return;
/* overwrite */
- g_free (data->name);
- data->name = NULL;
+ if (!data->tag_is_valid) {
+ g_free (data->name);
+ data->name = NULL;
+ }
as_node_data_set_name (data, name, AS_NODE_INSERT_FLAG_NONE);
}
@@ -952,6 +1005,8 @@ as_node_get_data (const AsNode *node)
if (node->data == NULL)
return NULL;
data = (AsNodeData *) node->data;
+ if (data->is_root_node)
+ return NULL;
if (data->cdata == NULL || data->cdata[0] == '\0')
return NULL;
as_node_cdata_to_raw (data);
@@ -997,7 +1052,7 @@ as_node_get_tag (const AsNode *node)
return AS_TAG_UNKNOWN;
/* try to match with a fallback */
- if (data->tag == AS_TAG_UNKNOWN) {
+ if (!data->tag_is_valid) {
tmp = as_tag_data_get_name (data);
return as_tag_from_string_full (tmp, AS_TAG_FLAG_USE_FALLBACKS);
}
@@ -1028,6 +1083,8 @@ as_node_set_data (AsNode *node,
return;
data = (AsNodeData *) node->data;
+ if (data->is_root_node)
+ return;
g_free (data->cdata);
data->cdata = g_strdup (cdata);
data->cdata_escaped = insert_flags & AS_NODE_INSERT_FLAG_PRE_ESCAPED;
@@ -1067,6 +1124,8 @@ as_node_take_data (const AsNode *node)
if (node->data == NULL)
return NULL;
data = (AsNodeData *) node->data;
+ if (data->is_root_node)
+ return NULL;
if (data->cdata == NULL || data->cdata[0] == '\0')
return NULL;
as_node_cdata_to_raw (data);
@@ -1185,9 +1244,7 @@ as_node_take_attribute (const AsNode *node, const gchar *key)
attr = as_node_attr_find (data, key);
if (attr == NULL)
return NULL;
- tmp = attr->value;
- attr->value = NULL;
- return tmp;
+ return g_strdup (attr->value);
}
/**
@@ -1234,7 +1291,7 @@ as_node_add_attribute (AsNode *node,
const gchar *value)
{
AsNodeData *data;
- AsNodeAttr *attr;
+ AsNode *root = g_node_get_root (node);
g_return_if_fail (node != NULL);
g_return_if_fail (key != NULL);
@@ -1242,8 +1299,7 @@ as_node_add_attribute (AsNode *node,
if (node->data == NULL)
return;
data = (AsNodeData *) node->data;
- attr = as_node_attr_insert (data, key, NULL);
- attr->value = g_strdup (value);
+ as_node_attr_insert (root, data, key, value);
}
/**
@@ -1393,6 +1449,7 @@ as_node_insert (AsNode *parent,
const gchar *key;
const gchar *value;
AsNodeData *data;
+ AsNode *root = g_node_get_root (parent);
guint i;
va_list args;
@@ -1417,7 +1474,7 @@ as_node_insert (AsNode *parent,
value = va_arg (args, const gchar *);
if (value == NULL)
break;
- as_node_attr_insert (data, key, value);
+ as_node_attr_insert (root, data, key, value);
}
va_end (args);
@@ -1448,6 +1505,7 @@ as_node_insert_localized (AsNode *parent,
AsNodeInsertFlags insert_flags)
{
AsNodeData *data;
+ AsNode *root = g_node_get_root (parent);
GList *l;
const gchar *key;
const gchar *value;
@@ -1485,7 +1543,7 @@ as_node_insert_localized (AsNode *parent,
g_strcmp0 (value_c, value) == 0)
continue;
data = g_slice_new0 (AsNodeData);
- as_node_attr_insert (data, "xml:lang", key);
+ as_node_attr_insert (root, data, "xml:lang", key);
as_node_data_set_name (data, name, insert_flags);
if (insert_flags & AS_NODE_INSERT_FLAG_NO_MARKUP) {
data->cdata = as_markup_convert_simple (value, NULL);
@@ -1518,6 +1576,7 @@ as_node_insert_hash (AsNode *parent,
AsNodeInsertFlags insert_flags)
{
AsNodeData *data;
+ AsNode *root = g_node_get_root (parent);
GList *l;
GList *list;
const gchar *key;
@@ -1537,10 +1596,10 @@ as_node_insert_hash (AsNode *parent,
data->cdata_escaped = insert_flags & AS_NODE_INSERT_FLAG_PRE_ESCAPED;
if (!swapped) {
if (key != NULL && key[0] != '\0')
- as_node_attr_insert (data, attr_key, key);
+ as_node_attr_insert (root, data, attr_key, key);
} else {
if (value != NULL && value[0] != '\0')
- as_node_attr_insert (data, attr_key, value);
+ as_node_attr_insert (root, data, attr_key, value);
}
g_node_insert_data (parent, -1, data);
}