From a97a74686d70a318cd802003498054cc1e8b0ae2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 9 Oct 2009 12:21:57 +0200 Subject: Introduce commit notes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit notes are blobs which are shown together with the commit message. These blobs are taken from the notes ref, which you can configure by the config variable core.notesRef, which in turn can be overridden by the environment variable GIT_NOTES_REF. The notes ref is a branch which contains "files" whose names are the names of the corresponding commits (i.e. the SHA-1). The rationale for putting this information into a ref is this: we want to be able to fetch and possibly union-merge the notes, maybe even look at the date when a note was introduced, and we want to store them efficiently together with the other objects. This patch has been improved by the following contributions: - Thomas Rast: fix core.notesRef documentation - Tor Arne Vestbø: fix printing of multi-line notes - Alex Riesen: Using char array instead of char pointer costs less BSS - Johan Herland: Plug leak when msg is good, but msglen or type causes return Signed-off-by: Johannes Schindelin Signed-off-by: Thomas Rast Signed-off-by: Tor Arne Vestbø Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano get_commit_notes(): Plug memory leak when 'if' triggers, but not because of read_sha1_file() failure --- notes.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 notes.c (limited to 'notes.c') diff --git a/notes.c b/notes.c new file mode 100644 index 0000000000..66379ffd22 --- /dev/null +++ b/notes.c @@ -0,0 +1,70 @@ +#include "cache.h" +#include "commit.h" +#include "notes.h" +#include "refs.h" +#include "utf8.h" +#include "strbuf.h" + +static int initialized; + +void get_commit_notes(const struct commit *commit, struct strbuf *sb, + const char *output_encoding) +{ + static const char utf8[] = "utf-8"; + struct strbuf name = STRBUF_INIT; + unsigned char sha1[20]; + char *msg, *msg_p; + unsigned long linelen, msglen; + enum object_type type; + + if (!initialized) { + const char *env = getenv(GIT_NOTES_REF_ENVIRONMENT); + if (env) + notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT); + else if (!notes_ref_name) + notes_ref_name = GIT_NOTES_DEFAULT_REF; + if (notes_ref_name && read_ref(notes_ref_name, sha1)) + notes_ref_name = NULL; + initialized = 1; + } + + if (!notes_ref_name) + return; + + strbuf_addf(&name, "%s:%s", notes_ref_name, + sha1_to_hex(commit->object.sha1)); + if (get_sha1(name.buf, sha1)) + return; + + if (!(msg = read_sha1_file(sha1, &type, &msglen)) || !msglen || + type != OBJ_BLOB) { + free(msg); + return; + } + + if (output_encoding && *output_encoding && + strcmp(utf8, output_encoding)) { + char *reencoded = reencode_string(msg, output_encoding, utf8); + if (reencoded) { + free(msg); + msg = reencoded; + msglen = strlen(msg); + } + } + + /* we will end the annotation by a newline anyway */ + if (msglen && msg[msglen - 1] == '\n') + msglen--; + + strbuf_addstr(sb, "\nNotes:\n"); + + for (msg_p = msg; msg_p < msg + msglen; msg_p += linelen + 1) { + linelen = strchrnul(msg_p, '\n') - msg_p; + + strbuf_addstr(sb, " "); + strbuf_add(sb, msg_p, linelen); + strbuf_addch(sb, '\n'); + } + + free(msg); +} -- cgit v1.2.1 From fd53c9eb445815696bf84c4701b9af73b5d7f50d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 9 Oct 2009 12:21:59 +0200 Subject: Speed up git notes lookup To avoid looking up each and every commit in the notes ref's tree object, which is very expensive, speed things up by slurping the tree object's contents into a hash_map. The idea for the hashmap singleton is from David Reiss, initial benchmarking by Jeff King. Note: the implementation allows for arbitrary entries in the notes tree object, ignoring those that do not reference a valid object. This allows you to annotate arbitrary branches, or objects. This patch has been improved by the following contributions: - Junio C Hamano: fixed an obvious error in initialize_hash_map() Signed-off-by: Johannes Schindelin Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 102 insertions(+), 10 deletions(-) (limited to 'notes.c') diff --git a/notes.c b/notes.c index 66379ffd22..2b66723f5f 100644 --- a/notes.c +++ b/notes.c @@ -4,15 +4,112 @@ #include "refs.h" #include "utf8.h" #include "strbuf.h" +#include "tree-walk.h" + +struct entry { + unsigned char commit_sha1[20]; + unsigned char notes_sha1[20]; +}; + +struct hash_map { + struct entry *entries; + off_t count, size; +}; static int initialized; +static struct hash_map hash_map; + +static int hash_index(struct hash_map *map, const unsigned char *sha1) +{ + int i = ((*(unsigned int *)sha1) % map->size); + + for (;;) { + unsigned char *current = map->entries[i].commit_sha1; + + if (!hashcmp(sha1, current)) + return i; + + if (is_null_sha1(current)) + return -1 - i; + + if (++i == map->size) + i = 0; + } +} + +static void add_entry(const unsigned char *commit_sha1, + const unsigned char *notes_sha1) +{ + int index; + + if (hash_map.count + 1 > hash_map.size >> 1) { + int i, old_size = hash_map.size; + struct entry *old = hash_map.entries; + + hash_map.size = old_size ? old_size << 1 : 64; + hash_map.entries = (struct entry *) + xcalloc(sizeof(struct entry), hash_map.size); + + for (i = 0; i < old_size; i++) + if (!is_null_sha1(old[i].commit_sha1)) { + index = -1 - hash_index(&hash_map, + old[i].commit_sha1); + memcpy(hash_map.entries + index, old + i, + sizeof(struct entry)); + } + free(old); + } + + index = hash_index(&hash_map, commit_sha1); + if (index < 0) { + index = -1 - index; + hash_map.count++; + } + + hashcpy(hash_map.entries[index].commit_sha1, commit_sha1); + hashcpy(hash_map.entries[index].notes_sha1, notes_sha1); +} + +static void initialize_hash_map(const char *notes_ref_name) +{ + unsigned char sha1[20], commit_sha1[20]; + unsigned mode; + struct tree_desc desc; + struct name_entry entry; + void *buf; + + if (!notes_ref_name || read_ref(notes_ref_name, commit_sha1) || + get_tree_entry(commit_sha1, "", sha1, &mode)) + return; + + buf = fill_tree_descriptor(&desc, sha1); + if (!buf) + die("Could not read %s for notes-index", sha1_to_hex(sha1)); + + while (tree_entry(&desc, &entry)) + if (!get_sha1(entry.path, commit_sha1)) + add_entry(commit_sha1, entry.sha1); + free(buf); +} + +static unsigned char *lookup_notes(const unsigned char *commit_sha1) +{ + int index; + + if (!hash_map.size) + return NULL; + + index = hash_index(&hash_map, commit_sha1); + if (index < 0) + return NULL; + return hash_map.entries[index].notes_sha1; +} void get_commit_notes(const struct commit *commit, struct strbuf *sb, const char *output_encoding) { static const char utf8[] = "utf-8"; - struct strbuf name = STRBUF_INIT; - unsigned char sha1[20]; + unsigned char *sha1; char *msg, *msg_p; unsigned long linelen, msglen; enum object_type type; @@ -23,17 +120,12 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb, notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT); else if (!notes_ref_name) notes_ref_name = GIT_NOTES_DEFAULT_REF; - if (notes_ref_name && read_ref(notes_ref_name, sha1)) - notes_ref_name = NULL; + initialize_hash_map(notes_ref_name); initialized = 1; } - if (!notes_ref_name) - return; - - strbuf_addf(&name, "%s:%s", notes_ref_name, - sha1_to_hex(commit->object.sha1)); - if (get_sha1(name.buf, sha1)) + sha1 = lookup_notes(commit->object.sha1); + if (!sha1) return; if (!(msg = read_sha1_file(sha1, &type, &msglen)) || !msglen || -- cgit v1.2.1 From c56fcc89b951f3e8c9240ea02676b2eef5417da6 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:04 +0200 Subject: Add flags to get_commit_notes() to control the format of the note string This patch adds the following flags to get_commit_notes() for adjusting the format of the produced note string: - NOTES_SHOW_HEADER: Print "Notes:" line before the notes contents - NOTES_INDENT: Indent notes contents by 4 spaces Suggested-by: Johannes Schindelin Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'notes.c') diff --git a/notes.c b/notes.c index 2b66723f5f..b7d79e1900 100644 --- a/notes.c +++ b/notes.c @@ -106,7 +106,7 @@ static unsigned char *lookup_notes(const unsigned char *commit_sha1) } void get_commit_notes(const struct commit *commit, struct strbuf *sb, - const char *output_encoding) + const char *output_encoding, int flags) { static const char utf8[] = "utf-8"; unsigned char *sha1; @@ -148,12 +148,14 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb, if (msglen && msg[msglen - 1] == '\n') msglen--; - strbuf_addstr(sb, "\nNotes:\n"); + if (flags & NOTES_SHOW_HEADER) + strbuf_addstr(sb, "\nNotes:\n"); for (msg_p = msg; msg_p < msg + msglen; msg_p += linelen + 1) { linelen = strchrnul(msg_p, '\n') - msg_p; - strbuf_addstr(sb, " "); + if (flags & NOTES_INDENT) + strbuf_addstr(sb, " "); strbuf_add(sb, msg_p, linelen); strbuf_addch(sb, '\n'); } -- cgit v1.2.1 From 27d57564102a98950bf4398daeeb14a15154478f Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:06 +0200 Subject: Teach notes code to free its internal data structures on request There's no need to be rude to memory-concious callers... This patch has been improved by the following contributions: - Junio C Hamano: avoid old-style declaration Signed-off-by: Junio C Hamano Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'notes.c') diff --git a/notes.c b/notes.c index b7d79e1900..a5d888d772 100644 --- a/notes.c +++ b/notes.c @@ -105,6 +105,13 @@ static unsigned char *lookup_notes(const unsigned char *commit_sha1) return hash_map.entries[index].notes_sha1; } +void free_notes(void) +{ + free(hash_map.entries); + memset(&hash_map, 0, sizeof(struct hash_map)); + initialized = 0; +} + void get_commit_notes(const struct commit *commit, struct strbuf *sb, const char *output_encoding, int flags) { -- cgit v1.2.1 From 23123aecf8418a6b0ec23378555ed78c438ae894 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:07 +0200 Subject: Teach the notes lookup code to parse notes trees with various fanout schemes The semantics used when parsing notes trees (with regards to fanout subtrees) follow Dscho's proposal fairly closely: - No concatenation/merging of notes is performed. If there are several notes objects referencing a given commit, only one of those objects are used. - If a notes object for a given commit is present in the "root" notes tree, no subtrees are consulted; the object in the root tree is used directly. - If there are more than one subtree that prefix-matches the given commit, only the subtree with the longest matching prefix is consulted. This means that if the given commit is e.g. "deadbeef", and the notes tree have subtrees "de" and "dead", then the following paths in the notes tree are searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched. - Fanout directories (subtrees) must references a whole number of bytes from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are acceptable; "d" and "dea" are not. - Multiple levels of fanout are allowed. All the above rules apply recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc. This patch changes the in-memory datastructure for holding parsed notes: Instead of holding all note (and subtree) entries in a hash table, a simple 16-tree structure is used instead. The tree structure consists of 16-arrays as internal nodes, and note/subtree entries as leaf nodes. The tree is traversed by indexing subsequent nibbles of the search key until a leaf node is encountered. If a subtree entry is encountered while searching for a note, the subtree is unpacked into the 16-tree structure, and the search continues into that subtree. The new algorithm performs significantly better in the cases where only a fraction of the notes need to be looked up (this is assumed to be the common case for notes lookup). The new code even performs marginally better in the worst case (where _all_ the notes are looked up). In addition to this, comes the massive performance win associated with organizing the notes tree according to some fanout scheme. Even a simple 2/38 fanout scheme is dramatically quicker to traverse (going from tens of seconds to sub-second runtimes). As for memory usage, the new code is marginally better than the old code in the worst case, but in the case of looking up only some notes from a notes tree with proper fanout, the new code uses only a small fraction of the memory needed to hold the entire notes tree. However, there is one casualty of this patch. The old notes lookup code was able to parse notes that were associated with non-SHA1s (e.g. refs). The new code requires the referenced object to be named by a SHA1 sum. Still, this is not considered a major setback, since the notes infrastructure was not originally intended to annotate objects outside the Git object database. Cc: Johannes Schindelin Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes.c | 317 ++++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 248 insertions(+), 69 deletions(-) (limited to 'notes.c') diff --git a/notes.c b/notes.c index a5d888d772..210c4b2263 100644 --- a/notes.c +++ b/notes.c @@ -6,109 +6,288 @@ #include "strbuf.h" #include "tree-walk.h" -struct entry { - unsigned char commit_sha1[20]; - unsigned char notes_sha1[20]; +/* + * Use a non-balancing simple 16-tree structure with struct int_node as + * internal nodes, and struct leaf_node as leaf nodes. Each int_node has a + * 16-array of pointers to its children. + * The bottom 2 bits of each pointer is used to identify the pointer type + * - ptr & 3 == 0 - NULL pointer, assert(ptr == NULL) + * - ptr & 3 == 1 - pointer to next internal node - cast to struct int_node * + * - ptr & 3 == 2 - pointer to note entry - cast to struct leaf_node * + * - ptr & 3 == 3 - pointer to subtree entry - cast to struct leaf_node * + * + * The root node is a statically allocated struct int_node. + */ +struct int_node { + void *a[16]; }; -struct hash_map { - struct entry *entries; - off_t count, size; +/* + * Leaf nodes come in two variants, note entries and subtree entries, + * distinguished by the LSb of the leaf node pointer (see above). + * As a note entry, the key is the SHA1 of the referenced commit, and the + * value is the SHA1 of the note object. + * As a subtree entry, the key is the prefix SHA1 (w/trailing NULs) of the + * referenced commit, using the last byte of the key to store the length of + * the prefix. The value is the SHA1 of the tree object containing the notes + * subtree. + */ +struct leaf_node { + unsigned char key_sha1[20]; + unsigned char val_sha1[20]; }; -static int initialized; -static struct hash_map hash_map; +#define PTR_TYPE_NULL 0 +#define PTR_TYPE_INTERNAL 1 +#define PTR_TYPE_NOTE 2 +#define PTR_TYPE_SUBTREE 3 -static int hash_index(struct hash_map *map, const unsigned char *sha1) -{ - int i = ((*(unsigned int *)sha1) % map->size); +#define GET_PTR_TYPE(ptr) ((uintptr_t) (ptr) & 3) +#define CLR_PTR_TYPE(ptr) ((void *) ((uintptr_t) (ptr) & ~3)) +#define SET_PTR_TYPE(ptr, type) ((void *) ((uintptr_t) (ptr) | (type))) - for (;;) { - unsigned char *current = map->entries[i].commit_sha1; +#define GET_NIBBLE(n, sha1) (((sha1[n >> 1]) >> ((~n & 0x01) << 2)) & 0x0f) - if (!hashcmp(sha1, current)) - return i; +#define SUBTREE_SHA1_PREFIXCMP(key_sha1, subtree_sha1) \ + (memcmp(key_sha1, subtree_sha1, subtree_sha1[19])) - if (is_null_sha1(current)) - return -1 - i; +static struct int_node root_node; - if (++i == map->size) - i = 0; +static int initialized; + +static void load_subtree(struct leaf_node *subtree, struct int_node *node, + unsigned int n); + +/* + * To find a leaf_node: + * 1. Start at the root node, with n = 0 + * 2. Use the nth nibble of the key as an index into a: + * - If a[n] is an int_node, recurse into that node and increment n + * - If a leaf_node with matching key, return leaf_node (assert note entry) + * - If a matching subtree entry, unpack that subtree entry (and remove it); + * restart search at the current level. + * - Otherwise, we end up at a NULL pointer, or a non-matching leaf_node. + * Backtrack out of the recursion, one level at a time and check a[0]: + * - If a[0] at the current level is a matching subtree entry, unpack that + * subtree entry (and remove it); restart search at the current level. + */ +static struct leaf_node *note_tree_find(struct int_node *tree, unsigned char n, + const unsigned char *key_sha1) +{ + struct leaf_node *l; + unsigned char i = GET_NIBBLE(n, key_sha1); + void *p = tree->a[i]; + + switch(GET_PTR_TYPE(p)) { + case PTR_TYPE_INTERNAL: + l = note_tree_find(CLR_PTR_TYPE(p), n + 1, key_sha1); + if (l) + return l; + break; + case PTR_TYPE_NOTE: + l = (struct leaf_node *) CLR_PTR_TYPE(p); + if (!hashcmp(key_sha1, l->key_sha1)) + return l; /* return note object matching given key */ + break; + case PTR_TYPE_SUBTREE: + l = (struct leaf_node *) CLR_PTR_TYPE(p); + if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) { + /* unpack tree and resume search */ + tree->a[i] = NULL; + load_subtree(l, tree, n); + free(l); + return note_tree_find(tree, n, key_sha1); + } + break; + case PTR_TYPE_NULL: + default: + assert(!p); + break; } + + /* + * Did not find key at this (or any lower) level. + * Check if there's a matching subtree entry in tree->a[0]. + * If so, unpack tree and resume search. + */ + p = tree->a[0]; + if (GET_PTR_TYPE(p) != PTR_TYPE_SUBTREE) + return NULL; + l = (struct leaf_node *) CLR_PTR_TYPE(p); + if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) { + /* unpack tree and resume search */ + tree->a[0] = NULL; + load_subtree(l, tree, n); + free(l); + return note_tree_find(tree, n, key_sha1); + } + return NULL; } -static void add_entry(const unsigned char *commit_sha1, - const unsigned char *notes_sha1) +/* + * To insert a leaf_node: + * 1. Start at the root node, with n = 0 + * 2. Use the nth nibble of the key as an index into a: + * - If a[n] is NULL, store the tweaked pointer directly into a[n] + * - If a[n] is an int_node, recurse into that node and increment n + * - If a[n] is a leaf_node: + * 1. Check if they're equal, and handle that (abort? overwrite?) + * 2. Create a new int_node, and store both leaf_nodes there + * 3. Store the new int_node into a[n]. + */ +static int note_tree_insert(struct int_node *tree, unsigned char n, + const struct leaf_node *entry, unsigned char type) { - int index; - - if (hash_map.count + 1 > hash_map.size >> 1) { - int i, old_size = hash_map.size; - struct entry *old = hash_map.entries; - - hash_map.size = old_size ? old_size << 1 : 64; - hash_map.entries = (struct entry *) - xcalloc(sizeof(struct entry), hash_map.size); - - for (i = 0; i < old_size; i++) - if (!is_null_sha1(old[i].commit_sha1)) { - index = -1 - hash_index(&hash_map, - old[i].commit_sha1); - memcpy(hash_map.entries + index, old + i, - sizeof(struct entry)); - } - free(old); + struct int_node *new_node; + const struct leaf_node *l; + int ret; + unsigned char i = GET_NIBBLE(n, entry->key_sha1); + void *p = tree->a[i]; + assert(GET_PTR_TYPE(entry) == PTR_TYPE_NULL); + switch(GET_PTR_TYPE(p)) { + case PTR_TYPE_NULL: + assert(!p); + tree->a[i] = SET_PTR_TYPE(entry, type); + return 0; + case PTR_TYPE_INTERNAL: + return note_tree_insert(CLR_PTR_TYPE(p), n + 1, entry, type); + default: + assert(GET_PTR_TYPE(p) == PTR_TYPE_NOTE || + GET_PTR_TYPE(p) == PTR_TYPE_SUBTREE); + l = (const struct leaf_node *) CLR_PTR_TYPE(p); + if (!hashcmp(entry->key_sha1, l->key_sha1)) + return -1; /* abort insert on matching key */ + new_node = (struct int_node *) + xcalloc(sizeof(struct int_node), 1); + ret = note_tree_insert(new_node, n + 1, + CLR_PTR_TYPE(p), GET_PTR_TYPE(p)); + if (ret) { + free(new_node); + return -1; + } + tree->a[i] = SET_PTR_TYPE(new_node, PTR_TYPE_INTERNAL); + return note_tree_insert(new_node, n + 1, entry, type); } +} - index = hash_index(&hash_map, commit_sha1); - if (index < 0) { - index = -1 - index; - hash_map.count++; +/* Free the entire notes data contained in the given tree */ +static void note_tree_free(struct int_node *tree) +{ + unsigned int i; + for (i = 0; i < 16; i++) { + void *p = tree->a[i]; + switch(GET_PTR_TYPE(p)) { + case PTR_TYPE_INTERNAL: + note_tree_free(CLR_PTR_TYPE(p)); + /* fall through */ + case PTR_TYPE_NOTE: + case PTR_TYPE_SUBTREE: + free(CLR_PTR_TYPE(p)); + } } +} - hashcpy(hash_map.entries[index].commit_sha1, commit_sha1); - hashcpy(hash_map.entries[index].notes_sha1, notes_sha1); +/* + * Convert a partial SHA1 hex string to the corresponding partial SHA1 value. + * - hex - Partial SHA1 segment in ASCII hex format + * - hex_len - Length of above segment. Must be multiple of 2 between 0 and 40 + * - sha1 - Partial SHA1 value is written here + * - sha1_len - Max #bytes to store in sha1, Must be >= hex_len / 2, and < 20 + * Returns -1 on error (invalid arguments or invalid SHA1 (not in hex format). + * Otherwise, returns number of bytes written to sha1 (i.e. hex_len / 2). + * Pads sha1 with NULs up to sha1_len (not included in returned length). + */ +static int get_sha1_hex_segment(const char *hex, unsigned int hex_len, + unsigned char *sha1, unsigned int sha1_len) +{ + unsigned int i, len = hex_len >> 1; + if (hex_len % 2 != 0 || len > sha1_len) + return -1; + for (i = 0; i < len; i++) { + unsigned int val = (hexval(hex[0]) << 4) | hexval(hex[1]); + if (val & ~0xff) + return -1; + *sha1++ = val; + hex += 2; + } + for (; i < sha1_len; i++) + *sha1++ = 0; + return len; } -static void initialize_hash_map(const char *notes_ref_name) +static void load_subtree(struct leaf_node *subtree, struct int_node *node, + unsigned int n) { - unsigned char sha1[20], commit_sha1[20]; - unsigned mode; + unsigned char commit_sha1[20]; + unsigned int prefix_len; + int status; + void *buf; struct tree_desc desc; struct name_entry entry; - void *buf; + + buf = fill_tree_descriptor(&desc, subtree->val_sha1); + if (!buf) + die("Could not read %s for notes-index", + sha1_to_hex(subtree->val_sha1)); + + prefix_len = subtree->key_sha1[19]; + assert(prefix_len * 2 >= n); + memcpy(commit_sha1, subtree->key_sha1, prefix_len); + while (tree_entry(&desc, &entry)) { + int len = get_sha1_hex_segment(entry.path, strlen(entry.path), + commit_sha1 + prefix_len, 20 - prefix_len); + if (len < 0) + continue; /* entry.path is not a SHA1 sum. Skip */ + len += prefix_len; + + /* + * If commit SHA1 is complete (len == 20), assume note object + * If commit SHA1 is incomplete (len < 20), assume note subtree + */ + if (len <= 20) { + unsigned char type = PTR_TYPE_NOTE; + struct leaf_node *l = (struct leaf_node *) + xcalloc(sizeof(struct leaf_node), 1); + hashcpy(l->key_sha1, commit_sha1); + hashcpy(l->val_sha1, entry.sha1); + if (len < 20) { + l->key_sha1[19] = (unsigned char) len; + type = PTR_TYPE_SUBTREE; + } + status = note_tree_insert(node, n, l, type); + assert(!status); + } + } + free(buf); +} + +static void initialize_notes(const char *notes_ref_name) +{ + unsigned char sha1[20], commit_sha1[20]; + unsigned mode; + struct leaf_node root_tree; if (!notes_ref_name || read_ref(notes_ref_name, commit_sha1) || get_tree_entry(commit_sha1, "", sha1, &mode)) return; - buf = fill_tree_descriptor(&desc, sha1); - if (!buf) - die("Could not read %s for notes-index", sha1_to_hex(sha1)); - - while (tree_entry(&desc, &entry)) - if (!get_sha1(entry.path, commit_sha1)) - add_entry(commit_sha1, entry.sha1); - free(buf); + hashclr(root_tree.key_sha1); + hashcpy(root_tree.val_sha1, sha1); + load_subtree(&root_tree, &root_node, 0); } static unsigned char *lookup_notes(const unsigned char *commit_sha1) { - int index; - - if (!hash_map.size) - return NULL; - - index = hash_index(&hash_map, commit_sha1); - if (index < 0) - return NULL; - return hash_map.entries[index].notes_sha1; + struct leaf_node *found = note_tree_find(&root_node, 0, commit_sha1); + if (found) + return found->val_sha1; + return NULL; } void free_notes(void) { - free(hash_map.entries); - memset(&hash_map, 0, sizeof(struct hash_map)); + note_tree_free(&root_node); + memset(&root_node, 0, sizeof(struct int_node)); initialized = 0; } @@ -127,7 +306,7 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb, notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT); else if (!notes_ref_name) notes_ref_name = GIT_NOTES_DEFAULT_REF; - initialize_hash_map(notes_ref_name); + initialize_notes(notes_ref_name); initialized = 1; } -- cgit v1.2.1 From ef8db638cc96abaf166bbafe31752219f3d2cdc2 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:09 +0200 Subject: Refactor notes code to concatenate multiple notes annotating the same object Currently, having multiple notes referring to the same commit from various locations in the notes tree is strongly discouraged, since only one of those notes will be parsed and shown. This patch teaches the notes code to _concatenate_ multiple notes that annotate the same commit. Notes are concatenated by creating a new blob object containing the concatenation of the notes in question, and replacing them with the concatenated note in the internal notes tree structure. Getting the concatenation right requires being more proactive in unpacking subtree entries in the internal notes tree structure, so that we don't return a note prematurely (i.e. before having found all other notes that annotate the same object). As such, this patch may incur a small performance penalty. Suggested-by: Sam Vilain Re-suggested-by: Johannes Schindelin Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes.c | 243 ++++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 161 insertions(+), 82 deletions(-) (limited to 'notes.c') diff --git a/notes.c b/notes.c index 210c4b2263..50a4672d7c 100644 --- a/notes.c +++ b/notes.c @@ -59,115 +59,196 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node, unsigned int n); /* - * To find a leaf_node: + * Search the tree until the appropriate location for the given key is found: * 1. Start at the root node, with n = 0 - * 2. Use the nth nibble of the key as an index into a: - * - If a[n] is an int_node, recurse into that node and increment n - * - If a leaf_node with matching key, return leaf_node (assert note entry) + * 2. If a[0] at the current level is a matching subtree entry, unpack that + * subtree entry and remove it; restart search at the current level. + * 3. Use the nth nibble of the key as an index into a: + * - If a[n] is an int_node, recurse from #2 into that node and increment n * - If a matching subtree entry, unpack that subtree entry (and remove it); * restart search at the current level. - * - Otherwise, we end up at a NULL pointer, or a non-matching leaf_node. - * Backtrack out of the recursion, one level at a time and check a[0]: - * - If a[0] at the current level is a matching subtree entry, unpack that - * subtree entry (and remove it); restart search at the current level. + * - Otherwise, we have found one of the following: + * - a subtree entry which does not match the key + * - a note entry which may or may not match the key + * - an unused leaf node (NULL) + * In any case, set *tree and *n, and return pointer to the tree location. */ -static struct leaf_node *note_tree_find(struct int_node *tree, unsigned char n, - const unsigned char *key_sha1) +static void **note_tree_search(struct int_node **tree, + unsigned char *n, const unsigned char *key_sha1) { struct leaf_node *l; - unsigned char i = GET_NIBBLE(n, key_sha1); - void *p = tree->a[i]; + unsigned char i; + void *p = (*tree)->a[0]; + if (GET_PTR_TYPE(p) == PTR_TYPE_SUBTREE) { + l = (struct leaf_node *) CLR_PTR_TYPE(p); + if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) { + /* unpack tree and resume search */ + (*tree)->a[0] = NULL; + load_subtree(l, *tree, *n); + free(l); + return note_tree_search(tree, n, key_sha1); + } + } + + i = GET_NIBBLE(*n, key_sha1); + p = (*tree)->a[i]; switch(GET_PTR_TYPE(p)) { case PTR_TYPE_INTERNAL: - l = note_tree_find(CLR_PTR_TYPE(p), n + 1, key_sha1); - if (l) - return l; - break; - case PTR_TYPE_NOTE: - l = (struct leaf_node *) CLR_PTR_TYPE(p); - if (!hashcmp(key_sha1, l->key_sha1)) - return l; /* return note object matching given key */ - break; + *tree = CLR_PTR_TYPE(p); + (*n)++; + return note_tree_search(tree, n, key_sha1); case PTR_TYPE_SUBTREE: l = (struct leaf_node *) CLR_PTR_TYPE(p); if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) { /* unpack tree and resume search */ - tree->a[i] = NULL; - load_subtree(l, tree, n); + (*tree)->a[i] = NULL; + load_subtree(l, *tree, *n); free(l); - return note_tree_find(tree, n, key_sha1); + return note_tree_search(tree, n, key_sha1); } - break; - case PTR_TYPE_NULL: + /* fall through */ default: - assert(!p); - break; + return &((*tree)->a[i]); } +} - /* - * Did not find key at this (or any lower) level. - * Check if there's a matching subtree entry in tree->a[0]. - * If so, unpack tree and resume search. - */ - p = tree->a[0]; - if (GET_PTR_TYPE(p) != PTR_TYPE_SUBTREE) - return NULL; - l = (struct leaf_node *) CLR_PTR_TYPE(p); - if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) { - /* unpack tree and resume search */ - tree->a[0] = NULL; - load_subtree(l, tree, n); - free(l); - return note_tree_find(tree, n, key_sha1); +/* + * To find a leaf_node: + * Search to the tree location appropriate for the given key: + * If a note entry with matching key, return the note entry, else return NULL. + */ +static struct leaf_node *note_tree_find(struct int_node *tree, unsigned char n, + const unsigned char *key_sha1) +{ + void **p = note_tree_search(&tree, &n, key_sha1); + if (GET_PTR_TYPE(*p) == PTR_TYPE_NOTE) { + struct leaf_node *l = (struct leaf_node *) CLR_PTR_TYPE(*p); + if (!hashcmp(key_sha1, l->key_sha1)) + return l; } return NULL; } +/* Create a new blob object by concatenating the two given blob objects */ +static int concatenate_notes(unsigned char *cur_sha1, + const unsigned char *new_sha1) +{ + char *cur_msg, *new_msg, *buf; + unsigned long cur_len, new_len, buf_len; + enum object_type cur_type, new_type; + int ret; + + /* read in both note blob objects */ + new_msg = read_sha1_file(new_sha1, &new_type, &new_len); + if (!new_msg || !new_len || new_type != OBJ_BLOB) { + free(new_msg); + return 0; + } + cur_msg = read_sha1_file(cur_sha1, &cur_type, &cur_len); + if (!cur_msg || !cur_len || cur_type != OBJ_BLOB) { + free(cur_msg); + free(new_msg); + hashcpy(cur_sha1, new_sha1); + return 0; + } + + /* we will separate the notes by a newline anyway */ + if (cur_msg[cur_len - 1] == '\n') + cur_len--; + + /* concatenate cur_msg and new_msg into buf */ + buf_len = cur_len + 1 + new_len; + buf = (char *) xmalloc(buf_len); + memcpy(buf, cur_msg, cur_len); + buf[cur_len] = '\n'; + memcpy(buf + cur_len + 1, new_msg, new_len); + + free(cur_msg); + free(new_msg); + + /* create a new blob object from buf */ + ret = write_sha1_file(buf, buf_len, "blob", cur_sha1); + free(buf); + return ret; +} + /* * To insert a leaf_node: - * 1. Start at the root node, with n = 0 - * 2. Use the nth nibble of the key as an index into a: - * - If a[n] is NULL, store the tweaked pointer directly into a[n] - * - If a[n] is an int_node, recurse into that node and increment n - * - If a[n] is a leaf_node: - * 1. Check if they're equal, and handle that (abort? overwrite?) - * 2. Create a new int_node, and store both leaf_nodes there - * 3. Store the new int_node into a[n]. + * Search to the tree location appropriate for the given leaf_node's key: + * - If location is unused (NULL), store the tweaked pointer directly there + * - If location holds a note entry that matches the note-to-be-inserted, then + * concatenate the two notes. + * - If location holds a note entry that matches the subtree-to-be-inserted, + * then unpack the subtree-to-be-inserted into the location. + * - If location holds a matching subtree entry, unpack the subtree at that + * location, and restart the insert operation from that level. + * - Else, create a new int_node, holding both the node-at-location and the + * node-to-be-inserted, and store the new int_node into the location. */ -static int note_tree_insert(struct int_node *tree, unsigned char n, - const struct leaf_node *entry, unsigned char type) +static void note_tree_insert(struct int_node *tree, unsigned char n, + struct leaf_node *entry, unsigned char type) { struct int_node *new_node; - const struct leaf_node *l; - int ret; - unsigned char i = GET_NIBBLE(n, entry->key_sha1); - void *p = tree->a[i]; - assert(GET_PTR_TYPE(entry) == PTR_TYPE_NULL); - switch(GET_PTR_TYPE(p)) { + struct leaf_node *l; + void **p = note_tree_search(&tree, &n, entry->key_sha1); + + assert(GET_PTR_TYPE(entry) == 0); /* no type bits set */ + l = (struct leaf_node *) CLR_PTR_TYPE(*p); + switch(GET_PTR_TYPE(*p)) { case PTR_TYPE_NULL: - assert(!p); - tree->a[i] = SET_PTR_TYPE(entry, type); - return 0; - case PTR_TYPE_INTERNAL: - return note_tree_insert(CLR_PTR_TYPE(p), n + 1, entry, type); - default: - assert(GET_PTR_TYPE(p) == PTR_TYPE_NOTE || - GET_PTR_TYPE(p) == PTR_TYPE_SUBTREE); - l = (const struct leaf_node *) CLR_PTR_TYPE(p); - if (!hashcmp(entry->key_sha1, l->key_sha1)) - return -1; /* abort insert on matching key */ - new_node = (struct int_node *) - xcalloc(sizeof(struct int_node), 1); - ret = note_tree_insert(new_node, n + 1, - CLR_PTR_TYPE(p), GET_PTR_TYPE(p)); - if (ret) { - free(new_node); - return -1; + assert(!*p); + *p = SET_PTR_TYPE(entry, type); + return; + case PTR_TYPE_NOTE: + switch (type) { + case PTR_TYPE_NOTE: + if (!hashcmp(l->key_sha1, entry->key_sha1)) { + /* skip concatenation if l == entry */ + if (!hashcmp(l->val_sha1, entry->val_sha1)) + return; + + if (concatenate_notes(l->val_sha1, + entry->val_sha1)) + die("failed to concatenate note %s " + "into note %s for commit %s", + sha1_to_hex(entry->val_sha1), + sha1_to_hex(l->val_sha1), + sha1_to_hex(l->key_sha1)); + free(entry); + return; + } + break; + case PTR_TYPE_SUBTREE: + if (!SUBTREE_SHA1_PREFIXCMP(l->key_sha1, + entry->key_sha1)) { + /* unpack 'entry' */ + load_subtree(entry, tree, n); + free(entry); + return; + } + break; + } + break; + case PTR_TYPE_SUBTREE: + if (!SUBTREE_SHA1_PREFIXCMP(entry->key_sha1, l->key_sha1)) { + /* unpack 'l' and restart insert */ + *p = NULL; + load_subtree(l, tree, n); + free(l); + note_tree_insert(tree, n, entry, type); + return; } - tree->a[i] = SET_PTR_TYPE(new_node, PTR_TYPE_INTERNAL); - return note_tree_insert(new_node, n + 1, entry, type); + break; } + + /* non-matching leaf_node */ + assert(GET_PTR_TYPE(*p) == PTR_TYPE_NOTE || + GET_PTR_TYPE(*p) == PTR_TYPE_SUBTREE); + new_node = (struct int_node *) xcalloc(sizeof(struct int_node), 1); + note_tree_insert(new_node, n + 1, l, GET_PTR_TYPE(*p)); + *p = SET_PTR_TYPE(new_node, PTR_TYPE_INTERNAL); + note_tree_insert(new_node, n + 1, entry, type); } /* Free the entire notes data contained in the given tree */ @@ -220,7 +301,6 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node, { unsigned char commit_sha1[20]; unsigned int prefix_len; - int status; void *buf; struct tree_desc desc; struct name_entry entry; @@ -254,8 +334,7 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node, l->key_sha1[19] = (unsigned char) len; type = PTR_TYPE_SUBTREE; } - status = note_tree_insert(node, n, l, type); - assert(!status); + note_tree_insert(node, n, l, type); } } free(buf); -- cgit v1.2.1