summaryrefslogtreecommitdiff
path: root/subversion/libsvn_subr/prefix_string.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/libsvn_subr/prefix_string.c')
-rw-r--r--subversion/libsvn_subr/prefix_string.c315
1 files changed, 315 insertions, 0 deletions
diff --git a/subversion/libsvn_subr/prefix_string.c b/subversion/libsvn_subr/prefix_string.c
new file mode 100644
index 0000000..fcf11bd
--- /dev/null
+++ b/subversion/libsvn_subr/prefix_string.c
@@ -0,0 +1,315 @@
+/* prefix_string.c --- implement strings based on a prefix tree
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+#include <assert.h>
+#include "private/svn_string_private.h"
+
+/* A node in the tree represents a common prefix. The root node is the
+ * empty prefix. Nodes may have up to 256 sub-nodes, each starting with
+ * a different character (possibly '\0').
+ *
+ * The nodes in the tree store only up to 8 chars of the respective common
+ * prefix, i.e. longer common prefixes must be drawn out over multiple
+ * hierarchy levels. This is a space <-> efficiency trade-off.
+ *
+ * Strings are the leaf nodes in the tree and use a specialized, smaller
+ * data structure. They may add 0 to 7 extra chars to the prefix. Both
+ * data types can be discerned by the last char in the data buffer. This
+ * must be 0 for strings (leaves) and non-0 otherwise. Please note that
+ * ordinary nodes have a length information so that no terminating 0 is
+ * required for them.
+ */
+
+/* forward declaration */
+typedef struct node_t node_t;
+
+/* String type and tree leaf.
+ */
+struct svn_prefix_string__t
+{
+ /* mandatory prefix */
+ node_t *prefix;
+
+ /* 0 ..7 chars to add the the prefix. NUL-terminated. */
+ char data[8];
+};
+
+/* A node inside the tree, i.e. not a string and not a leaf (unless this is
+ * the root node).
+ *
+ * Note: keep the ordering to minimize size / alignment overhead on 64 bit
+ * machines.
+ */
+struct node_t
+{
+ /* pointer to the parent prefix plus the 1 .. 8 extra chars.
+ * Only the root will provide 0 extra chars. */
+ svn_prefix_string__t key;
+
+ /* Length of the prefix from the root down to and including this one.
+ * 0 for the root node. Only then will key.prefix be NULL. */
+ apr_uint32_t length;
+
+ /* Number of entries used in SUB_NODES. */
+ apr_uint32_t sub_node_count;
+
+ /* The sub-nodes, ordered by first char. node_t and svn_prefix_string__t
+ * may be mixed here. May be NULL.
+ * The number of allocated entries is always a power-of-two and only
+ * given implicitly by SUB_NODE_COUNT. */
+ struct node_t **sub_nodes;
+};
+
+/* The actual tree structure. */
+struct svn_prefix_tree__t
+{
+ /* the common tree root (represents the empty prefix). */
+ node_t *root;
+
+ /* all sub-nodes & strings will be allocated from this pool */
+ apr_pool_t *pool;
+};
+
+/* Return TRUE, iff NODE is a leaf node.
+ */
+static svn_boolean_t
+is_leaf(node_t *node)
+{
+ return node->key.data[7] == 0;
+}
+
+/* Ensure that the sub-nodes array of NODE within TREE has at least one
+ * unused entry. Re-allocate as necessary.
+ */
+static void
+auto_realloc_sub_nodes(svn_prefix_tree__t *tree,
+ node_t *node)
+{
+ if (node->sub_node_count & (node->sub_node_count - 1))
+ return;
+
+ if (node->sub_node_count == 0)
+ {
+ node->sub_nodes = apr_pcalloc(tree->pool, sizeof(*node->sub_nodes));
+ }
+ else
+ {
+ node_t **sub_nodes
+ = apr_pcalloc(tree->pool,
+ 2 * node->sub_node_count * sizeof(*sub_nodes));
+ memcpy(sub_nodes, node->sub_nodes,
+ node->sub_node_count * sizeof(*sub_nodes));
+ node->sub_nodes = sub_nodes;
+ }
+}
+
+/* Given the COUNT pointers in the SUB_NODES array, return the location at
+ * which KEY is either located or would be inserted.
+ */
+static int
+search_lower_bound(node_t **sub_nodes,
+ unsigned char key,
+ int count)
+{
+ int lower = 0;
+ int upper = count - 1;
+
+ /* Binary search for the lowest position at which to insert KEY. */
+ while (lower <= upper)
+ {
+ int current = lower + (upper - lower) / 2;
+
+ if ((unsigned char)sub_nodes[current]->key.data[0] < key)
+ lower = current + 1;
+ else
+ upper = current - 1;
+ }
+
+ return lower;
+}
+
+svn_prefix_tree__t *
+svn_prefix_tree__create(apr_pool_t *pool)
+{
+ svn_prefix_tree__t *tree = apr_pcalloc(pool, sizeof(*tree));
+ tree->pool = pool;
+
+ tree->root = apr_pcalloc(pool, sizeof(*tree->root));
+ tree->root->key.data[7] = '\xff';
+
+ return tree;
+}
+
+svn_prefix_string__t *
+svn_prefix_string__create(svn_prefix_tree__t *tree,
+ const char *s)
+{
+ svn_prefix_string__t *new_string;
+ apr_size_t len = strlen(s);
+ node_t *node = tree->root;
+ node_t *new_node;
+ int idx;
+
+ /* walk the existing tree until we either find S or the node at which S
+ * has to be inserted */
+ while (TRUE)
+ {
+ node_t *sub_node;
+ int match = 1;
+
+ /* index of the matching sub-node */
+ idx = node->sub_node_count
+ ? search_lower_bound(node->sub_nodes,
+ (unsigned char)s[node->length],
+ node->sub_node_count)
+ : 0;
+
+ /* any (partially) matching sub-nodes? */
+ if (idx == (int)node->sub_node_count
+ || node->sub_nodes[idx]->key.data[0] != s[node->length])
+ break;
+
+ sub_node = node->sub_nodes[idx];
+
+ /* fully matching sub-node? */
+ if (is_leaf(sub_node))
+ {
+ if (strcmp(sub_node->key.data, s + node->length) == 0)
+ return &sub_node->key;
+ }
+ else
+ {
+ apr_size_t sub_node_len = sub_node->length - node->length;
+ if (strncmp(sub_node->key.data, s + node->length,
+ sub_node_len) == 0)
+ {
+ node = sub_node;
+ continue;
+ }
+ }
+
+ /* partial match -> split */
+ while (sub_node->key.data[match] == s[node->length + match])
+ ++match;
+
+ new_node = apr_pcalloc(tree->pool, sizeof(*new_node));
+ new_node->key = sub_node->key;
+ new_node->length = node->length + match;
+ new_node->key.data[7] = '\xff';
+ new_node->sub_node_count = 1;
+ new_node->sub_nodes = apr_palloc(tree->pool, sizeof(node_t *));
+ new_node->sub_nodes[0] = sub_node;
+
+ memmove(sub_node->key.data, sub_node->key.data + match, 8 - match);
+
+ /* replace old sub-node with new one and continue lookup */
+ sub_node->key.prefix = new_node;
+ node->sub_nodes[idx] = new_node;
+ node = new_node;
+ }
+
+ /* add sub-node(s) and final string */
+ while (node->length + 7 < len)
+ {
+ new_node = apr_pcalloc(tree->pool, sizeof(*new_node));
+ new_node->key.prefix = node;
+ new_node->length = node->length + 8;
+ memcpy(new_node->key.data, s + node->length, 8);
+
+ auto_realloc_sub_nodes(tree, node);
+ memmove(node->sub_nodes + idx + 1, node->sub_nodes + idx,
+ (node->sub_node_count - idx) * sizeof(node_t *));
+
+ /* replace old sub-node with new one and continue lookup */
+ node->sub_nodes[idx] = new_node;
+ node->sub_node_count++;
+ node = new_node;
+ idx = 0;
+ }
+
+ new_string = apr_pcalloc(tree->pool, sizeof(*new_string));
+ new_string->prefix = node;
+ memcpy(new_string->data, s + node->length, len - node->length);
+
+ auto_realloc_sub_nodes(tree, node);
+ memmove(node->sub_nodes + idx + 1, node->sub_nodes + idx,
+ (node->sub_node_count - idx) * sizeof(node_t *));
+
+ node->sub_nodes[idx] = (node_t *)new_string;
+ node->sub_node_count++;
+ return new_string;
+}
+
+svn_string_t *
+svn_prefix_string__expand(const svn_prefix_string__t *s,
+ apr_pool_t *pool)
+{
+ apr_size_t s_len = strlen(s->data);
+ apr_size_t len = s->prefix->length + s_len;
+ char *buffer = apr_palloc(pool, len + 1);
+
+ svn_string_t *result = apr_pcalloc(pool, sizeof(*result));
+ result->data = buffer;
+ result->len = len;
+ buffer[len] = '\0';
+
+ while (s->prefix)
+ {
+ memcpy(buffer + s->prefix->length, s->data, len - s->prefix->length);
+ len = s->prefix->length;
+ s = &s->prefix->key;
+ }
+
+ return result;
+}
+
+int
+svn_prefix_string__compare(const svn_prefix_string__t *lhs,
+ const svn_prefix_string__t *rhs)
+{
+ const node_t *lhs_parent = lhs->prefix;
+ const node_t *rhs_parent = rhs->prefix;
+
+ if (lhs == rhs)
+ return 0;
+
+ /* find the common root */
+ while (lhs_parent != rhs_parent)
+ {
+ if (lhs_parent->length <= rhs_parent->length)
+ {
+ rhs = &rhs_parent->key;
+ rhs_parent = rhs_parent->key.prefix;
+ }
+ else if (rhs_parent->length <= lhs_parent->length)
+ {
+ lhs = &lhs_parent->key;
+ lhs_parent = lhs_parent->key.prefix;
+ }
+
+ /* same tree? */
+ assert(lhs_parent && rhs_parent);
+ }
+
+ /* at the common root, strings will differ in the first follow-up char */
+ return (int)(unsigned char)lhs->data[0] - (int)(unsigned char)rhs->data[0];
+}