From 85976974581060716311d6807b03a671cb71cbde Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 24 May 2005 12:09:32 -0700 Subject: [PATCH] Update rename/copy similarity estimator. The second round similarity estimator simply used the size of the xdelta itself to estimate the extent of damage. This patch keeps that logic to detect big insertions to terminate the check early, but otherwise looks at the generated delta in order to estimate the extent of edit more accurately. Signed-off-by: Junio C Hamano Signed-off-by: Linus Torvalds --- diffcore-rename.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'diffcore-rename.c') diff --git a/diffcore-rename.c b/diffcore-rename.c index 34e83dac8d..07782f4b7b 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -5,6 +5,7 @@ #include "diff.h" #include "diffcore.h" #include "delta.h" +#include "count-delta.h" /* Table of rename/copy destinations */ @@ -158,13 +159,18 @@ static int estimate_similarity(struct diff_filespec *src, delta = diff_delta(src->data, src->size, dst->data, dst->size, &delta_size); - /* - * We currently punt here, but we may later end up parsing the - * delta to really assess the extent of damage. A big consecutive - * remove would produce small delta_size that affects quite a - * big portion of the file. + + /* A delta that has a lot of literal additions would have + * big delta_size no matter what else it does. */ + if (minimum_score < MAX_SCORE * delta_size / base_size) + return 0; + + /* Estimate the edit size by interpreting delta. */ + delta_size = count_delta(delta, delta_size); free(delta); + if (delta_size == UINT_MAX) + return 0; /* * Now we will give some score to it. 100% edit gets 0 points -- cgit v1.2.1