summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRan Benita <ran@unusedvar.com>2021-03-29 16:05:14 +0300
committerRan Benita <ran@unusedvar.com>2021-03-31 12:10:52 +0300
commit02b9cabf9827dd004912fb0b002c2a37a1aa90e0 (patch)
tree68366cff22aea14237138d1f38ac88ff61ee4121 /src
parent3a6c3b2c48614d6ce94cc89a349c3f422e429931 (diff)
downloadxorg-lib-libxkbcommon-02b9cabf9827dd004912fb0b002c2a37a1aa90e0.tar.gz
compose: use a ternary tree instead of a regular trie
Previously we used a simple trie with a linked list for each chain. Unfortunately most compose files have very long chains which means the constructions performs an almost quadratic number of comparisons. Switch to using a ternary search tree instead. This is very similar to a trie, only the linked list is essentially replaced with a binary tree. On the en_US/Compose file, the perf diff is the following (the modified function is `parse`): Event 'cycles:u' Baseline Delta Abs Shared Object Symbol ........ ......... ................ ................................. 39.91% -17.62% bench-compose [.] parse.constprop.0 20.54% +6.47% bench-compose [.] lex 17.28% +5.55% libc-2.33.so [.] __strcmp_avx2 12.78% +4.01% bench-compose [.] xkb_keysym_from_name 2.30% +0.83% libc-2.33.so [.] __GI_____strtoull_l_internal 3.36% +0.78% bench-compose [.] strcmp@plt Thanks to some careful packing, the memory usage is pretty much the same. Signed-off-by: Ran Benita <ran@unusedvar.com>
Diffstat (limited to 'src')
-rw-r--r--src/compose/parser.c165
-rw-r--r--src/compose/state.c17
-rw-r--r--src/compose/table.c15
-rw-r--r--src/compose/table.h70
4 files changed, 139 insertions, 128 deletions
diff --git a/src/compose/parser.c b/src/compose/parser.c
index a8458ac..a47ae4b 100644
--- a/src/compose/parser.c
+++ b/src/compose/parser.c
@@ -327,114 +327,107 @@ struct production {
xkb_mod_mask_t mods;
};
-static uint16_t
-add_node(struct xkb_compose_table *table, xkb_keysym_t keysym)
-{
- struct compose_node new = {
- .keysym = keysym,
- .next = 0,
- .is_leaf = true,
- };
- darray_append(table->nodes, new);
- return darray_size(table->nodes) - 1;
-}
-
static void
add_production(struct xkb_compose_table *table, struct scanner *s,
const struct production *production)
{
- unsigned lhs_pos;
- uint16_t curr;
- struct compose_node *node;
+ unsigned lhs_pos = 0;
+ uint16_t curr = darray_size(table->nodes) == 1 ? 0 : 1;
+ uint16_t *pptr = NULL;
+ struct compose_node *node = NULL;
if (darray_size(table->nodes) + 1 == MAX_COMPOSE_NODES)
scanner_warn(s, "too many sequences for one Compose file; will ignore further lines");
if (darray_size(table->nodes) >= MAX_COMPOSE_NODES)
return;
- curr = 0;
- node = &darray_item(table->nodes, curr);
-
/*
- * Insert the sequence to the trie, creating new nodes as needed.
+ * Insert the sequence to the ternary search tree, creating new nodes as
+ * needed.
*
- * TODO: This can be sped up a bit by first trying the path that the
- * previous production took, and only then doing the linear search
- * through the trie levels. This will work because sequences in the
- * Compose files are often clustered by a common prefix; especially
- * in the 1st and 2nd keysyms, which is where the largest variation
- * (thus, longest search) is.
+ * TODO: We insert in the order given, this means some inputs can create
+ * long O(n) chains, which results in total O(n^2) parsing time. We should
+ * ensure the tree is reasonably balanced somehow.
*/
- for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) {
- while (production->lhs[lhs_pos] != node->keysym) {
- if (node->next == 0) {
- uint16_t next = add_node(table, production->lhs[lhs_pos]);
- /* Refetch since add_node could have realloc()ed. */
- node = &darray_item(table->nodes, curr);
- node->next = next;
+ while (true) {
+ const xkb_keysym_t keysym = production->lhs[lhs_pos];
+ const bool last = lhs_pos + 1 == production->len;
+
+ if (curr == 0) {
+ /*
+ * Create a new node and update the parent pointer to it.
+ * Update the pointer first because the append invalidates it.
+ */
+ struct compose_node new = {
+ .keysym = keysym,
+ .lokid = 0,
+ .hikid = 0,
+ .internal = {
+ .eqkid = 0,
+ .is_leaf = false,
+ },
+ };
+ curr = darray_size(table->nodes);
+ if (pptr != NULL) {
+ *pptr = curr;
+ pptr = NULL;
}
-
- curr = node->next;
- node = &darray_item(table->nodes, curr);
+ darray_append(table->nodes, new);
}
- if (lhs_pos + 1 == production->len)
- break;
+ node = &darray_item(table->nodes, curr);
- if (node->is_leaf) {
- if (node->leaf.utf8 != 0 ||
- node->leaf.keysym != XKB_KEY_NoSymbol) {
+ if (keysym < node->keysym) {
+ pptr = &node->lokid;
+ curr = node->lokid;
+ } else if (keysym > node->keysym) {
+ pptr = &node->hikid;
+ curr = node->hikid;
+ } else if (!last) {
+ if (node->is_leaf) {
scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
- node->leaf.utf8 = 0;
- node->leaf.keysym = XKB_KEY_NoSymbol;
+ node->internal.eqkid = node->lokid = node->hikid = 0;
+ node->internal.is_leaf = false;
}
-
- {
- uint16_t successor = add_node(table, production->lhs[lhs_pos + 1]);
- /* Refetch since add_node could have realloc()ed. */
- node = &darray_item(table->nodes, curr);
- node->is_leaf = false;
- node->internal.successor = successor;
+ lhs_pos++;
+ pptr = &node->internal.eqkid;
+ curr = node->internal.eqkid;
+ } else {
+ if (node->is_leaf) {
+ bool same_string =
+ (node->leaf.utf8 == 0 && !production->has_string) ||
+ (
+ node->leaf.utf8 != 0 && production->has_string &&
+ streq(&darray_item(table->utf8, node->leaf.utf8),
+ production->string)
+ );
+ bool same_keysym =
+ (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
+ (
+ node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
+ node->leaf.keysym == production->keysym
+ );
+ if (same_string && same_keysym) {
+ scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
+ return;
+ } else {
+ scanner_warn(s, "this compose sequence already exists; overriding");
+ }
+ } else if (node->internal.eqkid != 0) {
+ scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
+ return;
+ }
+ node->is_leaf = true;
+ if (production->has_string) {
+ node->leaf.utf8 = darray_size(table->utf8);
+ darray_append_items(table->utf8, production->string,
+ strlen(production->string) + 1);
+ }
+ if (production->has_keysym) {
+ node->leaf.keysym = production->keysym;
}
- }
-
- curr = node->internal.successor;
- node = &darray_item(table->nodes, curr);
- }
-
- if (!node->is_leaf) {
- scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
- return;
- }
-
- if (node->leaf.utf8 != 0 || node->leaf.keysym != XKB_KEY_NoSymbol) {
- bool same_string =
- (node->leaf.utf8 == 0 && !production->has_string) ||
- (
- node->leaf.utf8 != 0 && production->has_string &&
- streq(&darray_item(table->utf8, node->leaf.utf8),
- production->string)
- );
- bool same_keysym =
- (node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
- (
- node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
- node->leaf.keysym == production->keysym
- );
- if (same_string && same_keysym) {
- scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
return;
}
- scanner_warn(s, "this compose sequence already exists; overriding");
- }
-
- if (production->has_string) {
- node->leaf.utf8 = darray_size(table->utf8);
- darray_append_items(table->utf8, production->string,
- strlen(production->string) + 1);
- }
- if (production->has_keysym) {
- node->leaf.keysym = production->keysym;
}
}
diff --git a/src/compose/state.c b/src/compose/state.c
index de08a90..6ba0344 100644
--- a/src/compose/state.c
+++ b/src/compose/state.c
@@ -109,17 +109,20 @@ xkb_compose_state_feed(struct xkb_compose_state *state, xkb_keysym_t keysym)
node = &darray_item(state->table->nodes, state->context);
- context = (node->is_leaf ? 0 : node->internal.successor);
- node = &darray_item(state->table->nodes, context);
+ context = (node->is_leaf ? 1 : node->internal.eqkid);
+ if (context == 1 && darray_size(state->table->nodes) == 1)
+ context = 0;
- while (node->keysym != keysym && node->next != 0) {
- context = node->next;
+ while (context != 0) {
node = &darray_item(state->table->nodes, context);
+ if (keysym < node->keysym)
+ context = node->lokid;
+ else if (keysym > node->keysym)
+ context = node->hikid;
+ else
+ break;
}
- if (node->keysym != keysym)
- context = 0;
-
state->prev_context = state->context;
state->context = context;
return XKB_COMPOSE_FEED_ACCEPTED;
diff --git a/src/compose/table.c b/src/compose/table.c
index dbd255e..1691555 100644
--- a/src/compose/table.c
+++ b/src/compose/table.c
@@ -1,5 +1,5 @@
/*
- * Copyright © 2013 Ran Benita <ran234@gmail.com>
+ * Copyright © 2013,2021 Ran Benita <ran234@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -36,7 +36,7 @@ xkb_compose_table_new(struct xkb_context *ctx,
{
char *resolved_locale;
struct xkb_compose_table *table;
- struct compose_node root;
+ struct compose_node dummy;
resolved_locale = resolve_locale(locale);
if (!resolved_locale)
@@ -58,12 +58,11 @@ xkb_compose_table_new(struct xkb_context *ctx,
darray_init(table->nodes);
darray_init(table->utf8);
- root.keysym = XKB_KEY_NoSymbol;
- root.next = 0;
- root.is_leaf = true;
- root.leaf.utf8 = 0;
- root.leaf.keysym = XKB_KEY_NoSymbol;
- darray_append(table->nodes, root);
+ dummy.keysym = XKB_KEY_NoSymbol;
+ dummy.leaf.is_leaf = true;
+ dummy.leaf.utf8 = 0;
+ dummy.leaf.keysym = XKB_KEY_NoSymbol;
+ darray_append(table->nodes, dummy);
darray_append(table->utf8, '\0');
diff --git a/src/compose/table.h b/src/compose/table.h
index 467ccf7..6be4348 100644
--- a/src/compose/table.h
+++ b/src/compose/table.h
@@ -1,5 +1,5 @@
/*
- * Copyright © 2013 Ran Benita <ran234@gmail.com>
+ * Copyright © 2013,2021 Ran Benita <ran234@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -29,36 +29,43 @@
#include "context.h"
/*
- * The compose table data structure is a simple trie. An example will
- * help. Given these sequences:
+ * The compose table data structure is a ternary search tree.
*
- * <A> <B> : "first" dead_a
- * <A> <C> <D> : "second" dead_b
- * <E> <F> : "third" dead_c
+ * Reference: https://www.drdobbs.com/database/ternary-search-trees/184410528
+ * Visualization: https://www.cs.usfca.edu/~galles/visualization/TST.html
*
- * the trie would look like:
+ * Short example. Given these sequences:
+ *
+ * <B> <C> : "first" dead_a
+ * <B> <D> <E> : "second" dead_b
+ * <A> <F> : "third" dead_c
+ *
+ * the tree would look like:
+ *
+ * -------- [<B>]---------
+ * | | #
+ * v V
+ * -- [<A>] -- [<C>] --------
+ * # | # | |
+ * v # -- [<D>] --
+ * -- [<F>] -- # | #
+ * # | # v
+ * # -- [<E>] --
+ * # | #
+ * #
*
- * [root] ---> [<A>] -----------------> [<E>] -#
- * | | |
- * # v v
- * [<B>] ---> [<C>] -# [<F>] -#
- * | | -
- * # v #
- * [<D>] -#
- * |
- * #
* where:
- * - [root] is a special empty root node.
* - [<X>] is a node for a sequence keysym <X>.
- * - right arrows are `next` pointers.
- * - down arrows are `successor` pointers.
+ * - right arrows are `hikid` pointers.
+ * - left arrows are `lokid` pointers.
+ * - down arrows are `eqkid` pointers.
* - # is a nil pointer.
*
* The nodes are all kept in a contiguous array. Pointers are represented
* as integer offsets into this array. A nil pointer is represented as 0
- * (which, helpfully, is the offset of the empty root node).
+ * (which, helpfully, is the offset of an empty dummy node).
*
- * Nodes without a successor are leaf nodes. Since a sequence cannot be a
+ * Nodes without an eqkid are leaf nodes. Since a sequence cannot be a
* prefix of another, these are exactly the nodes which terminate the
* sequences (in a bijective manner).
*
@@ -73,18 +80,27 @@
struct compose_node {
xkb_keysym_t keysym;
- /* Offset into xkb_compose_table::nodes. */
- uint16_t next;
- bool is_leaf;
+
+ /* Offset into xkb_compose_table::nodes or 0. */
+ uint16_t lokid;
+ /* Offset into xkb_compose_table::nodes or 0. */
+ uint16_t hikid;
union {
struct {
- /* Offset into xkb_compose_table::nodes. */
- uint16_t successor;
+ uint32_t _pad:31;
+ bool is_leaf:1;
+ };
+ struct {
+ uint32_t _pad:31;
+ bool is_leaf:1;
+ /* Offset into xkb_compose_table::nodes or 0. */
+ uint16_t eqkid;
} internal;
struct {
/* Offset into xkb_compose_table::utf8. */
- uint32_t utf8;
+ uint32_t utf8:31;
+ bool is_leaf:1;
xkb_keysym_t keysym;
} leaf;
};