summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCarsten Haitzler (Rasterman) <raster@rasterman.com>2021-10-23 12:15:17 +0100
committerCarsten Haitzler (Rasterman) <raster@rasterman.com>2021-10-23 12:15:17 +0100
commitd08bb7425552853eba6e1c27a33021741b7cb9b1 (patch)
tree8dbcfc05c31be61faf8a44d37b5eddf6be6b0e35 /src
parent05bee4b2abcf321a8c9c67d953159d94cd40285b (diff)
downloadefl-d08bb7425552853eba6e1c27a33021741b7cb9b1.tar.gz
eet - optimize eet dictionary building
this massively speeds up efreet's icon cache building for huge icon themes... in my nasty test case of some insanely huge icon themes that have like 50,000 - 100,000 files... each - and multiple where the icon cache has to scan all of them and build the cache files... i see a speedup of going from 80 seconds to build down to 15-16 seconds. so over 5 times faster. This builds the dictionary in a temporary eina superfast string hash in ram and then just before writing flattens it out into a regular eet dict format. @opt
Diffstat (limited to 'src')
-rw-r--r--src/lib/eet/Eet_private.h31
-rw-r--r--src/lib/eet/eet_dictionary.c156
-rw-r--r--src/lib/eet/eet_lib.c1
-rw-r--r--src/lib/eet/eet_utils.c19
4 files changed, 170 insertions, 37 deletions
diff --git a/src/lib/eet/Eet_private.h b/src/lib/eet/Eet_private.h
index ef274a07ad..8b85a9377f 100644
--- a/src/lib/eet/Eet_private.h
+++ b/src/lib/eet/Eet_private.h
@@ -40,19 +40,29 @@ struct _Eet_Dictionary
unsigned char *all_hash;
unsigned char *all_allocated;
+ const char *start;
+ const char *end;
+
Eina_Hash *converts;
Eina_RWLock rwlock;
int size;
int offset;
+ int count;
+ int total;
int hash[256];
- int count;
- int total;
+ Eina_Hash *add_hash;
- const char *start;
- const char *end;
+ // This is a quick and dirty speedup when building a dictionary
+ // or looking stuff up especially when looking up the same set of
+ // strings again and again one after the other
+ struct {
+ int hash, len, current, previous;
+ const char *str;
+ } cache[16];
+ int cache_id;
};
struct _Eet_Node
@@ -240,13 +250,13 @@ int
eet_dictionary_string_get_size_unlocked(const Eet_Dictionary *ed,
int index);
int
-eet_dictionary_string_get_size(const Eet_Dictionary *ed,
+eet_dictionary_string_get_size(Eet_Dictionary *ed,
int index);
const char *
eet_dictionary_string_get_char_unlocked(const Eet_Dictionary *ed,
int index);
const char *
-eet_dictionary_string_get_char(const Eet_Dictionary *ed,
+eet_dictionary_string_get_char(Eet_Dictionary *ed,
int index);
Eina_Bool
eet_dictionary_string_get_float_unlocked(const Eet_Dictionary *ed,
@@ -276,9 +286,16 @@ int
eet_dictionary_string_get_hash_unlocked(const Eet_Dictionary *ed,
int index);
int
-eet_dictionary_string_get_hash(const Eet_Dictionary *ed,
+eet_dictionary_string_get_hash(Eet_Dictionary *ed,
int index);
+void
+eet_dictionary_write_prepare(Eet_Dictionary *ed);
+
+int
+_eet_hash_gen_len(const char *key,
+ int hash_size,
+ int *len_ret);
int _eet_hash_gen(const char *key,
int hash_size);
diff --git a/src/lib/eet/eet_dictionary.c b/src/lib/eet/eet_dictionary.c
index c8756f7b00..a334167075 100644
--- a/src/lib/eet/eet_dictionary.c
+++ b/src/lib/eet/eet_dictionary.c
@@ -43,9 +43,77 @@ eet_dictionary_free(Eet_Dictionary *ed)
free(ed->all_allocated);
if (ed->converts) eina_hash_free(ed->converts);
+ if (ed->add_hash) eina_hash_free(ed->add_hash);
eet_dictionary_mp_free(ed);
}
+void
+eet_dictionary_lock_read(const Eet_Dictionary *ed)
+{
+ eina_rwlock_take_read((Eina_RWLock *)&ed->rwlock);
+}
+
+void
+eet_dictionary_lock_write(Eet_Dictionary *ed)
+{
+ eina_rwlock_take_write((Eina_RWLock *)&ed->rwlock);
+}
+
+void
+eet_dictionary_unlock(const Eet_Dictionary *ed)
+{
+ eina_rwlock_release((Eina_RWLock *)&ed->rwlock);
+}
+
+static Eina_Bool
+_eet_dictionary_write_prepare_hash_cb(const Eina_Hash *hashtab EINA_UNUSED, const void *key, void *value, void *data)
+{
+ Eet_Dictionary *ed = data;
+ const char *str, *string = key;
+ Eet_String *current;
+ int hash, len, idx = (int)((uintptr_t)value) - 1;
+
+ hash = _eet_hash_gen_len(string, 8, &len);
+ len++;
+
+ str = eina_stringshare_add(string);
+ if (!str) goto on_error;
+
+ current = ed->all + idx;
+
+ ed->all_allocated[idx >> 3] |= (1 << (idx & 0x7));
+ ed->all_hash[idx] = hash;
+
+ current->str = str;
+ current->len = len;
+
+ current->next = ed->hash[hash];
+ ed->hash[hash] = idx;
+
+on_error:
+ return EINA_TRUE;
+}
+
+void
+eet_dictionary_write_prepare(Eet_Dictionary *ed)
+{
+ eina_rwlock_take_write(&ed->rwlock);
+ if (!ed->add_hash)
+ {
+ eina_rwlock_release(&ed->rwlock);
+ return;
+ }
+
+ ed->all = malloc(ed->count * sizeof(Eet_String));
+ ed->all_hash = malloc(ed->count);
+ ed->all_allocated = malloc(((ed->count >> 3) + 1));
+
+ eina_hash_foreach(ed->add_hash, _eet_dictionary_write_prepare_hash_cb, ed);
+ eina_hash_free(ed->add_hash);
+ ed->add_hash = NULL;
+ eina_rwlock_release(&ed->rwlock);
+}
+
static int
_eet_dictionary_lookup(Eet_Dictionary *ed,
const char *string,
@@ -53,7 +121,21 @@ _eet_dictionary_lookup(Eet_Dictionary *ed,
int hash,
int *previous)
{
- int prev = -1, current;
+ int prev = -1, current, i;
+
+ for (i = 0; i < 16; i++)
+ {
+ if ((ed->cache[i].hash == hash) &&
+ (((ed->cache[i].str) &&
+ (ed->cache[i].str == string)) ||
+ ((ed->cache[i].len == len) &&
+ (!strcmp(ed->cache[i].str, string)))))
+ {
+ if (previous) *previous = ed->cache[i].previous;
+ current = ed->cache[i].current;
+ return current;
+ }
+ }
current = ed->hash[hash];
while (current != -1)
@@ -63,6 +145,13 @@ _eet_dictionary_lookup(Eet_Dictionary *ed,
((ed->all[current].len == len) &&
(!strcmp(ed->all[current].str, string)))))
{
+ ed->cache[ed->cache_id].hash = hash;
+ ed->cache[ed->cache_id].current = current;
+ ed->cache[ed->cache_id].previous = prev;
+ ed->cache[ed->cache_id].str = ed->all[current].str;
+ ed->cache[ed->cache_id].len = len;
+ ed->cache_id++;
+ if (ed->cache_id >= 16) ed->cache_id = 0;
break;
}
prev = current;
@@ -72,24 +161,6 @@ _eet_dictionary_lookup(Eet_Dictionary *ed,
return current;
}
-void
-eet_dictionary_lock_read(const Eet_Dictionary *ed)
-{
- eina_rwlock_take_read((Eina_RWLock *)&ed->rwlock);
-}
-
-void
-eet_dictionary_lock_write(Eet_Dictionary *ed)
-{
- eina_rwlock_take_write((Eina_RWLock *)&ed->rwlock);
-}
-
-void
-eet_dictionary_unlock(const Eet_Dictionary *ed)
-{
- eina_rwlock_release((Eina_RWLock *)&ed->rwlock);
-}
-
int
eet_dictionary_string_add(Eet_Dictionary *ed,
const char *string)
@@ -97,11 +168,38 @@ eet_dictionary_string_add(Eet_Dictionary *ed,
Eet_String *current;
const char *str;
int hash, idx, pidx, len, cnt;
+ uintptr_t ret;
if (!ed) return -1;
- hash = _eet_hash_gen(string, 8);
- len = strlen(string) + 1;
+ // fast path in initial dict build - add hashes to eina hash not eet one
+ // as eina is much faster - prepare for write later
+ eina_rwlock_take_write(&ed->rwlock);
+ if (ed->count == 0)
+ {
+ if (!ed->add_hash) ed->add_hash = eina_hash_string_superfast_new(NULL);
+ }
+ if (ed->add_hash)
+ {
+ ret = (uintptr_t)eina_hash_find(ed->add_hash, string);
+ if (ret > 0)
+ {
+ idx = (int)(ret - 1);
+ eina_rwlock_release(&ed->rwlock);
+ return idx;
+ }
+ ret = ed->count + 1;
+ eina_hash_add(ed->add_hash, string, (void *)ret);
+ ed->count++;
+ eina_rwlock_release(&ed->rwlock);
+ return (int)(ret - 1);
+ }
+ eina_rwlock_release(&ed->rwlock);
+
+ // fall back - we converted/prepared the dict for writing so go to slow
+ // mode - we still have a little cache for looking it up though
+ hash = _eet_hash_gen_len(string, 8, &len);
+ len++;
eina_rwlock_take_read(&ed->rwlock);
@@ -175,11 +273,12 @@ done:
}
int
-eet_dictionary_string_get_size(const Eet_Dictionary *ed,
- int idx)
+eet_dictionary_string_get_size(Eet_Dictionary *ed,
+ int idx)
{
int length;
+ eet_dictionary_write_prepare(ed);
eina_rwlock_take_read((Eina_RWLock *)&ed->rwlock);
length = eet_dictionary_string_get_size_unlocked(ed, idx);
eina_rwlock_release((Eina_RWLock *)&ed->rwlock);
@@ -214,11 +313,12 @@ done:
}
int
-eet_dictionary_string_get_hash(const Eet_Dictionary *ed,
- int idx)
+eet_dictionary_string_get_hash(Eet_Dictionary *ed,
+ int idx)
{
int hash;
+ eet_dictionary_write_prepare(ed);
eina_rwlock_take_read((Eina_RWLock *)&ed->rwlock);
hash = eet_dictionary_string_get_hash_unlocked(ed, idx);
eina_rwlock_release((Eina_RWLock *)&ed->rwlock);
@@ -251,11 +351,12 @@ done:
}
const char *
-eet_dictionary_string_get_char(const Eet_Dictionary *ed,
- int idx)
+eet_dictionary_string_get_char(Eet_Dictionary *ed,
+ int idx)
{
const char *s = NULL;
+ eet_dictionary_write_prepare(ed);
eina_rwlock_take_read((Eina_RWLock *)&ed->rwlock);
s = eet_dictionary_string_get_char_unlocked(ed, idx);
eina_rwlock_release((Eina_RWLock *)&ed->rwlock);
@@ -497,6 +598,7 @@ eet_dictionary_string_check(Eet_Dictionary *ed,
if ((!ed) || (!string)) return 0;
+ eet_dictionary_write_prepare(ed);
eina_rwlock_take_read((Eina_RWLock *)&ed->rwlock);
if ((ed->start <= string) && (string < ed->end)) res = 1;
diff --git a/src/lib/eet/eet_lib.c b/src/lib/eet/eet_lib.c
index 1aa2a1c9bb..0ed786e1d7 100644
--- a/src/lib/eet/eet_lib.c
+++ b/src/lib/eet/eet_lib.c
@@ -358,6 +358,7 @@ eet_flush2(Eet_File *ef)
}
if (ef->ed)
{
+ eet_dictionary_write_prepare(ef->ed);
num_dictionary_entries = ef->ed->count;
for (i = 0; i < num_dictionary_entries; ++i)
diff --git a/src/lib/eet/eet_utils.c b/src/lib/eet/eet_utils.c
index b04ad1b525..6951eeec7e 100644
--- a/src/lib/eet/eet_utils.c
+++ b/src/lib/eet/eet_utils.c
@@ -9,8 +9,9 @@
#include "Eet_private.h"
int
-_eet_hash_gen(const char *key,
- int hash_size)
+_eet_hash_gen_len(const char *key,
+ int hash_size,
+ int *len_ret)
{
int hash_num = 0;
int value, i;
@@ -19,13 +20,17 @@ _eet_hash_gen(const char *key,
/* no string - index 0 */
if (!key)
- return 0;
+ {
+ *len_ret = 0;
+ return 0;
+ }
/* calc hash num */
for (i = 0, ptr = (unsigned char *)key, value = (int)(*ptr);
value;
ptr++, i++, value = (int)(*ptr))
hash_num ^= (value | (value << 8)) >> (i & 0x7);
+ *len_ret = i;
/* mask it */
mask = (1 << hash_size) - 1;
@@ -34,3 +39,11 @@ _eet_hash_gen(const char *key,
return hash_num;
}
+int
+_eet_hash_gen(const char *key,
+ int hash_size)
+{
+ int len;
+ return _eet_hash_gen_len(key, hash_size, &len);
+}
+