/* prefix_string.c --- implement strings based on a prefix tree * * ==================================================================== * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * ==================================================================== */ #include #include "private/svn_string_private.h" /* A node in the tree represents a common prefix. The root node is the * empty prefix. Nodes may have up to 256 sub-nodes, each starting with * a different character (possibly '\0'). * * The nodes in the tree store only up to 8 chars of the respective common * prefix, i.e. longer common prefixes must be drawn out over multiple * hierarchy levels. This is a space <-> efficiency trade-off. * * Strings are the leaf nodes in the tree and use a specialized, smaller * data structure. They may add 0 to 7 extra chars to the prefix. Both * data types can be discerned by the last char in the data buffer. This * must be 0 for strings (leaves) and non-0 otherwise. Please note that * ordinary nodes have a length information so that no terminating 0 is * required for them. */ /* forward declaration */ typedef struct node_t node_t; /* String type and tree leaf. */ struct svn_prefix_string__t { /* mandatory prefix */ node_t *prefix; /* 0 ..7 chars to add the the prefix. NUL-terminated. */ char data[8]; }; /* A node inside the tree, i.e. not a string and not a leaf (unless this is * the root node). * * Note: keep the ordering to minimize size / alignment overhead on 64 bit * machines. */ struct node_t { /* pointer to the parent prefix plus the 1 .. 8 extra chars. * Only the root will provide 0 extra chars. */ svn_prefix_string__t key; /* Length of the prefix from the root down to and including this one. * 0 for the root node. Only then will key.prefix be NULL. */ apr_uint32_t length; /* Number of entries used in SUB_NODES. */ apr_uint32_t sub_node_count; /* The sub-nodes, ordered by first char. node_t and svn_prefix_string__t * may be mixed here. May be NULL. * The number of allocated entries is always a power-of-two and only * given implicitly by SUB_NODE_COUNT. */ struct node_t **sub_nodes; }; /* The actual tree structure. */ struct svn_prefix_tree__t { /* the common tree root (represents the empty prefix). */ node_t *root; /* all sub-nodes & strings will be allocated from this pool */ apr_pool_t *pool; }; /* Return TRUE, iff NODE is a leaf node. */ static svn_boolean_t is_leaf(node_t *node) { return node->key.data[7] == 0; } /* Ensure that the sub-nodes array of NODE within TREE has at least one * unused entry. Re-allocate as necessary. */ static void auto_realloc_sub_nodes(svn_prefix_tree__t *tree, node_t *node) { if (node->sub_node_count & (node->sub_node_count - 1)) return; if (node->sub_node_count == 0) { node->sub_nodes = apr_pcalloc(tree->pool, sizeof(*node->sub_nodes)); } else { node_t **sub_nodes = apr_pcalloc(tree->pool, 2 * node->sub_node_count * sizeof(*sub_nodes)); memcpy(sub_nodes, node->sub_nodes, node->sub_node_count * sizeof(*sub_nodes)); node->sub_nodes = sub_nodes; } } /* Given the COUNT pointers in the SUB_NODES array, return the location at * which KEY is either located or would be inserted. */ static int search_lower_bound(node_t **sub_nodes, unsigned char key, int count) { int lower = 0; int upper = count - 1; /* Binary search for the lowest position at which to insert KEY. */ while (lower <= upper) { int current = lower + (upper - lower) / 2; if ((unsigned char)sub_nodes[current]->key.data[0] < key) lower = current + 1; else upper = current - 1; } return lower; } svn_prefix_tree__t * svn_prefix_tree__create(apr_pool_t *pool) { svn_prefix_tree__t *tree = apr_pcalloc(pool, sizeof(*tree)); tree->pool = pool; tree->root = apr_pcalloc(pool, sizeof(*tree->root)); tree->root->key.data[7] = '\xff'; return tree; } svn_prefix_string__t * svn_prefix_string__create(svn_prefix_tree__t *tree, const char *s) { svn_prefix_string__t *new_string; apr_size_t len = strlen(s); node_t *node = tree->root; node_t *new_node; int idx; /* walk the existing tree until we either find S or the node at which S * has to be inserted */ while (TRUE) { node_t *sub_node; int match = 1; /* index of the matching sub-node */ idx = node->sub_node_count ? search_lower_bound(node->sub_nodes, (unsigned char)s[node->length], node->sub_node_count) : 0; /* any (partially) matching sub-nodes? */ if (idx == (int)node->sub_node_count || node->sub_nodes[idx]->key.data[0] != s[node->length]) break; sub_node = node->sub_nodes[idx]; /* fully matching sub-node? */ if (is_leaf(sub_node)) { if (strcmp(sub_node->key.data, s + node->length) == 0) return &sub_node->key; } else { apr_size_t sub_node_len = sub_node->length - node->length; if (strncmp(sub_node->key.data, s + node->length, sub_node_len) == 0) { node = sub_node; continue; } } /* partial match -> split */ while (sub_node->key.data[match] == s[node->length + match]) ++match; new_node = apr_pcalloc(tree->pool, sizeof(*new_node)); new_node->key = sub_node->key; new_node->length = node->length + match; new_node->key.data[7] = '\xff'; new_node->sub_node_count = 1; new_node->sub_nodes = apr_palloc(tree->pool, sizeof(node_t *)); new_node->sub_nodes[0] = sub_node; memmove(sub_node->key.data, sub_node->key.data + match, 8 - match); /* replace old sub-node with new one and continue lookup */ sub_node->key.prefix = new_node; node->sub_nodes[idx] = new_node; node = new_node; } /* add sub-node(s) and final string */ while (node->length + 7 < len) { new_node = apr_pcalloc(tree->pool, sizeof(*new_node)); new_node->key.prefix = node; new_node->length = node->length + 8; memcpy(new_node->key.data, s + node->length, 8); auto_realloc_sub_nodes(tree, node); memmove(node->sub_nodes + idx + 1, node->sub_nodes + idx, (node->sub_node_count - idx) * sizeof(node_t *)); /* replace old sub-node with new one and continue lookup */ node->sub_nodes[idx] = new_node; node->sub_node_count++; node = new_node; idx = 0; } new_string = apr_pcalloc(tree->pool, sizeof(*new_string)); new_string->prefix = node; memcpy(new_string->data, s + node->length, len - node->length); auto_realloc_sub_nodes(tree, node); memmove(node->sub_nodes + idx + 1, node->sub_nodes + idx, (node->sub_node_count - idx) * sizeof(node_t *)); node->sub_nodes[idx] = (node_t *)new_string; node->sub_node_count++; return new_string; } svn_string_t * svn_prefix_string__expand(const svn_prefix_string__t *s, apr_pool_t *pool) { apr_size_t s_len = strlen(s->data); apr_size_t len = s->prefix->length + s_len; char *buffer = apr_palloc(pool, len + 1); svn_string_t *result = apr_pcalloc(pool, sizeof(*result)); result->data = buffer; result->len = len; buffer[len] = '\0'; while (s->prefix) { memcpy(buffer + s->prefix->length, s->data, len - s->prefix->length); len = s->prefix->length; s = &s->prefix->key; } return result; } int svn_prefix_string__compare(const svn_prefix_string__t *lhs, const svn_prefix_string__t *rhs) { const node_t *lhs_parent = lhs->prefix; const node_t *rhs_parent = rhs->prefix; if (lhs == rhs) return 0; /* find the common root */ while (lhs_parent != rhs_parent) { if (lhs_parent->length <= rhs_parent->length) { rhs = &rhs_parent->key; rhs_parent = rhs_parent->key.prefix; } else if (rhs_parent->length <= lhs_parent->length) { lhs = &lhs_parent->key; lhs_parent = lhs_parent->key.prefix; } /* same tree? */ assert(lhs_parent && rhs_parent); } /* at the common root, strings will differ in the first follow-up char */ return (int)(unsigned char)lhs->data[0] - (int)(unsigned char)rhs->data[0]; }