summaryrefslogtreecommitdiff
path: root/subversion/svn/similarity.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/svn/similarity.c')
-rw-r--r--subversion/svn/similarity.c126
1 files changed, 126 insertions, 0 deletions
diff --git a/subversion/svn/similarity.c b/subversion/svn/similarity.c
new file mode 100644
index 0000000..0bcf0f5
--- /dev/null
+++ b/subversion/svn/similarity.c
@@ -0,0 +1,126 @@
+/*
+ * similarity.c: Utility functions for finding similar strings in lists
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+/* ==================================================================== */
+
+
+
+/*** Includes. ***/
+
+#include <stdlib.h>
+
+#include "svn_string.h"
+#include "cl.h"
+
+#include "private/svn_string_private.h"
+
+#include "svn_private_config.h"
+
+
+/* Context for token similarity checking */
+struct svn_cl__simcheck_context_t
+{
+ svn_string_t key; /* The token we're comparing with */
+ svn_membuf_t buffer; /* Buffer for similarity testing */
+};
+
+
+/* Similarity test between two property names */
+static APR_INLINE apr_size_t
+simcheck_key_diff(const svn_string_t *key, const svn_string_t *ctx,
+ svn_membuf_t *buffer, apr_size_t *diff)
+{
+ apr_size_t lcs;
+ const apr_size_t score = svn_string__similarity(key, ctx, buffer, &lcs);
+ if (key->len > ctx->len)
+ *diff = key->len - lcs;
+ else
+ *diff = ctx->len - lcs;
+ return score;
+}
+
+
+/* Key comparator for qsort for svn_cl__simcheck_t */
+static int
+simcheck_compare(const void *pkeya, const void *pkeyb)
+{
+ svn_cl__simcheck_t *const keya = *(svn_cl__simcheck_t *const *)pkeya;
+ svn_cl__simcheck_t *const keyb = *(svn_cl__simcheck_t *const *)pkeyb;
+ svn_cl__simcheck_context_t *const context = keya->context;
+
+ if (keya->score == -1)
+ keya->score = simcheck_key_diff(&keya->token, &context->key,
+ &context->buffer, &keya->diff);
+ if (keyb->score == -1)
+ keyb->score = simcheck_key_diff(&keyb->token, &context->key,
+ &context->buffer, &keyb->diff);
+
+ return (keya->score < keyb->score ? 1
+ : (keya->score > keyb->score ? -1
+ : (keya->diff > keyb->diff ? 1
+ : (keya->diff < keyb->diff ? -1 : 0))));
+}
+
+apr_size_t
+svn_cl__similarity_check(const char *key,
+ svn_cl__simcheck_t **tokens,
+ apr_size_t token_count,
+ apr_pool_t *scratch_pool)
+{
+ apr_size_t result;
+ apr_size_t i;
+
+ svn_cl__simcheck_context_t context;
+ context.key.data = key;
+ context.key.len = strlen(key);
+ svn_membuf__create(&context.buffer, 0, scratch_pool);
+
+ /* Populate the score, diff and context members. */
+ for (i = 0; i < token_count; ++i)
+ {
+ svn_cl__simcheck_t *const token = tokens[i];
+ token->score = -1;
+ token->diff = 0;
+ token->context = &context;
+ }
+
+ /* Sort the tokens by similarity. */
+ qsort(tokens, token_count, sizeof(*tokens), simcheck_compare);
+
+ /* Remove references to the context, since it points to the stack,
+ and calculate the number of results that are at least two-thirds
+ similar to the key. */
+ for (i = 0, result = 1; i < token_count; ++i)
+ {
+ svn_cl__simcheck_t *const token = tokens[i];
+ token->context = NULL;
+ /* If you update this factor, consider updating
+ * ../libsvn_subr/cmdline.c:most_similar(). */
+ if (token->score >= (2 * SVN_STRING__SIM_RANGE_MAX + 1) / 3)
+ ++result;
+ }
+
+ if (0 == tokens[0]->diff)
+ return 0; /* We found an exact match. */
+ return result;
+}