git notes merge: Add another auto-resolving strategy: "cat_sort_uniq"

This new strategy is similar to "concatenate", but in addition to concatenating the two note candidates, this strategy sorts the resulting lines, and removes duplicate lines from the result. This is equivalent to applying the "cat | sort | uniq" shell pipeline to the two note candidates. This strategy is useful if the notes follow a line-based format where one wants to avoid duplicate lines in the merge result. Note that if either of the note candidates contain duplicate lines _prior_ to the merge, these will also be removed by this merge strategy. The patch also contains tests and documentation for the new strategy. Signed-off-by: Johan Herland <johan@herland.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
author: Johan Herland <johan@herland.net> 2010-11-15 00:57:17 +0100
committer: Junio C Hamano <gitster@pobox.com> 2010-11-17 13:22:53 -0800
commit: a6a09095a08339afc8468d053ff978ed4662a1d5 (patch)
tree: 915beb0e1f4fbe5fe5932b8188a1ac983c7ed786 /notes.c
parent: 6cfd6a9dea889707fa207ee2003010c3b56b2131 (diff)
download: git-a6a09095a08339afc8468d053ff978ed4662a1d5.tar.gz
1 files changed, 76 insertions, 0 deletions
diff --git a/notes.c b/notes.c
index 09a93abca1..96cde42134 100644
--- a/notes.c
+++ b/notes.c
@@ -845,6 +845,82 @@ int combine_notes_ignore(unsigned char *cur_sha1,
 	return 0;
 }
 
+static int string_list_add_note_lines(struct string_list *sort_uniq_list,
+				      const unsigned char *sha1)
+{
+	char *data;
+	unsigned long len;
+	enum object_type t;
+	struct strbuf buf = STRBUF_INIT;
+	struct strbuf **lines = NULL;
+	int i, list_index;
+
+	if (is_null_sha1(sha1))
+		return 0;
+
+	/* read_sha1_file NUL-terminates */
+	data = read_sha1_file(sha1, &t, &len);
+	if (t != OBJ_BLOB || !data || !len) {
+		free(data);
+		return t != OBJ_BLOB || !data;
+	}
+
+	strbuf_attach(&buf, data, len, len + 1);
+	lines = strbuf_split(&buf, '\n');
+
+	for (i = 0; lines[i]; i++) {
+		if (lines[i]->buf[lines[i]->len - 1] == '\n')
+			strbuf_setlen(lines[i], lines[i]->len - 1);
+		if (!lines[i]->len)
+			continue; /* skip empty lines */
+		list_index = string_list_find_insert_index(sort_uniq_list,
+							   lines[i]->buf, 0);
+		if (list_index < 0)
+			continue; /* skip duplicate lines */
+		string_list_insert_at_index(sort_uniq_list, list_index,
+					    lines[i]->buf);
+	}
+
+	strbuf_list_free(lines);
+	strbuf_release(&buf);
+	return 0;
+}
+
+static int string_list_join_lines_helper(struct string_list_item *item,
+					 void *cb_data)
+{
+	struct strbuf *buf = cb_data;
+	strbuf_addstr(buf, item->string);
+	strbuf_addch(buf, '\n');
+	return 0;
+}
+
+int combine_notes_cat_sort_uniq(unsigned char *cur_sha1,
+		const unsigned char *new_sha1)
+{
+	struct string_list sort_uniq_list = { NULL, 0, 0, 1 };
+	struct strbuf buf = STRBUF_INIT;
+	int ret = 1;
+
+	/* read both note blob objects into unique_lines */
+	if (string_list_add_note_lines(&sort_uniq_list, cur_sha1))
+		goto out;
+	if (string_list_add_note_lines(&sort_uniq_list, new_sha1))
+		goto out;
+
+	/* create a new blob object from sort_uniq_list */
+	if (for_each_string_list(&sort_uniq_list,
+				 string_list_join_lines_helper, &buf))
+		goto out;
+
+	ret = write_sha1_file(buf.buf, buf.len, blob_type, cur_sha1);
+
+out:
+	strbuf_release(&buf);
+	string_list_clear(&sort_uniq_list, 0);
+	return ret;
+}
+
 static int string_list_add_one_ref(const char *path, const unsigned char *sha1,
 				   int flag, void *cb)
 {
author	Johan Herland <johan@herland.net>	2010-11-15 00:57:17 +0100
committer	Junio C Hamano <gitster@pobox.com>	2010-11-17 13:22:53 -0800
commit	a6a09095a08339afc8468d053ff978ed4662a1d5 (patch)
tree	915beb0e1f4fbe5fe5932b8188a1ac983c7ed786 /notes.c
parent	6cfd6a9dea889707fa207ee2003010c3b56b2131 (diff)
download	git-a6a09095a08339afc8468d053ff978ed4662a1d5.tar.gz