diff options
author | Junio C Hamano <gitster@pobox.com> | 2014-07-16 11:25:40 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2014-07-16 11:25:40 -0700 |
commit | 788cef81d40070d5755490441abad1a27bc120b7 (patch) | |
tree | ebc44e5c83def9855aac4559e87f201882d8f955 /read-cache.c | |
parent | e0a064a10749a8039f28e9bd3de0ffb5cf99e678 (diff) | |
parent | 3e52f70b1505956bf91cb7b107e526d1c67da0a6 (diff) | |
download | git-788cef81d40070d5755490441abad1a27bc120b7.tar.gz |
Merge branch 'nd/split-index'
An experiment to use two files (the base file and incremental
changes relative to it) to represent the index to reduce I/O cost
of rewriting a large index when only small part of the working tree
changes.
* nd/split-index: (32 commits)
t1700: new tests for split-index mode
t2104: make sure split index mode is off for the version test
read-cache: force split index mode with GIT_TEST_SPLIT_INDEX
read-tree: note about dropping split-index mode or index version
read-tree: force split-index mode off on --index-output
rev-parse: add --shared-index-path to get shared index path
update-index --split-index: do not split if $GIT_DIR is read only
update-index: new options to enable/disable split index mode
split-index: strip pathname of on-disk replaced entries
split-index: do not invalidate cache-tree at read time
split-index: the reading part
split-index: the writing part
read-cache: mark updated entries for split index
read-cache: save deleted entries in split index
read-cache: mark new entries for split index
read-cache: split-index mode
read-cache: save index SHA-1 after reading
entry.c: update cache_changed if refresh_cache is set in checkout_entry()
cache-tree: mark istate->cache_changed on prime_cache_tree()
cache-tree: mark istate->cache_changed on cache tree update
...
Diffstat (limited to 'read-cache.c')
-rw-r--r-- | read-cache.c | 272 |
1 files changed, 244 insertions, 28 deletions
diff --git a/read-cache.c b/read-cache.c index 6a45966ec4..5d3c8bd4aa 100644 --- a/read-cache.c +++ b/read-cache.c @@ -14,6 +14,8 @@ #include "resolve-undo.h" #include "strbuf.h" #include "varint.h" +#include "split-index.h" +#include "sigchain.h" static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, unsigned int options); @@ -34,8 +36,15 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, #define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) ) #define CACHE_EXT_TREE 0x54524545 /* "TREE" */ #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */ +#define CACHE_EXT_LINK 0x6c696e6b /* "link" */ + +/* changes that can be kept in $GIT_DIR/index (basically all extensions) */ +#define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ + CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \ + SPLIT_INDEX_ORDERED) struct index_state the_index; +static const char *alternate_index_output; static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) { @@ -47,10 +56,12 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache { struct cache_entry *old = istate->cache[nr]; + replace_index_entry_in_base(istate, old, ce); remove_name_hash(istate, old); free(old); set_index_entry(istate, nr, ce); - istate->cache_changed = 1; + ce->ce_flags |= CE_UPDATE_IN_BASE; + istate->cache_changed |= CE_ENTRY_CHANGED; } void rename_index_entry_at(struct index_state *istate, int nr, const char *new_name) @@ -62,9 +73,10 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n copy_cache_entry(new, old); new->ce_flags &= ~CE_HASHED; new->ce_namelen = namelen; + new->index = 0; memcpy(new->name, new_name, namelen + 1); - cache_tree_invalidate_path(istate->cache_tree, old->name); + cache_tree_invalidate_path(istate, old->name); remove_index_entry_at(istate, nr); add_index_entry(istate, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE); } @@ -483,8 +495,8 @@ int remove_index_entry_at(struct index_state *istate, int pos) record_resolve_undo(istate, ce); remove_name_hash(istate, ce); - free(ce); - istate->cache_changed = 1; + save_or_free_index_entry(istate, ce); + istate->cache_changed |= CE_ENTRY_REMOVED; istate->cache_nr--; if (pos >= istate->cache_nr) return 0; @@ -507,12 +519,14 @@ void remove_marked_cache_entries(struct index_state *istate) for (i = j = 0; i < istate->cache_nr; i++) { if (ce_array[i]->ce_flags & CE_REMOVE) { remove_name_hash(istate, ce_array[i]); - free(ce_array[i]); + save_or_free_index_entry(istate, ce_array[i]); } else ce_array[j++] = ce_array[i]; } - istate->cache_changed = 1; + if (j == istate->cache_nr) + return; + istate->cache_changed |= CE_ENTRY_REMOVED; istate->cache_nr = j; } @@ -521,7 +535,7 @@ int remove_file_from_index(struct index_state *istate, const char *path) int pos = index_name_pos(istate, path, strlen(path)); if (pos < 0) pos = -pos-1; - cache_tree_invalidate_path(istate->cache_tree, path); + cache_tree_invalidate_path(istate, path); while (pos < istate->cache_nr && !strcmp(istate->cache[pos]->name, path)) remove_index_entry_at(istate, pos); return 0; @@ -570,7 +584,9 @@ static int different_name(struct cache_entry *ce, struct cache_entry *alias) * So we use the CE_ADDED flag to verify that the alias was an old * one before we accept it as */ -static struct cache_entry *create_alias_ce(struct cache_entry *ce, struct cache_entry *alias) +static struct cache_entry *create_alias_ce(struct index_state *istate, + struct cache_entry *ce, + struct cache_entry *alias) { int len; struct cache_entry *new; @@ -583,7 +599,7 @@ static struct cache_entry *create_alias_ce(struct cache_entry *ce, struct cache_ new = xcalloc(1, cache_entry_size(len)); memcpy(new->name, alias->name, len); copy_cache_entry(new, ce); - free(ce); + save_or_free_index_entry(istate, ce); return new; } @@ -676,7 +692,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, set_object_name_for_intent_to_add_entry(ce); if (ignore_case && alias && different_name(ce, alias)) - ce = create_alias_ce(ce, alias); + ce = create_alias_ce(istate, ce, alias); ce->ce_flags |= CE_ADDED; /* It was suspected to be racily clean, but it turns out to be Ok */ @@ -939,7 +955,8 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e int skip_df_check = option & ADD_CACHE_SKIP_DFCHECK; int new_only = option & ADD_CACHE_NEW_ONLY; - cache_tree_invalidate_path(istate->cache_tree, ce->name); + if (!(option & ADD_CACHE_KEEP_CACHE_TREE)) + cache_tree_invalidate_path(istate, ce->name); pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce)); /* existing match? Just replace it. */ @@ -1002,7 +1019,7 @@ int add_index_entry(struct index_state *istate, struct cache_entry *ce, int opti istate->cache + pos, (istate->cache_nr - pos - 1) * sizeof(ce)); set_index_entry(istate, pos, ce); - istate->cache_changed = 1; + istate->cache_changed |= CE_ENTRY_ADDED; return 0; } @@ -1101,6 +1118,7 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate, !(ce->ce_flags & CE_VALID)) updated->ce_flags &= ~CE_VALID; + /* istate->cache_changed is updated in the caller */ return updated; } @@ -1182,7 +1200,8 @@ int refresh_index(struct index_state *istate, unsigned int flags, * means the index is not valid anymore. */ ce->ce_flags &= ~CE_VALID; - istate->cache_changed = 1; + ce->ce_flags |= CE_UPDATE_IN_BASE; + istate->cache_changed |= CE_ENTRY_CHANGED; } if (quiet) continue; @@ -1334,6 +1353,10 @@ static int read_index_extension(struct index_state *istate, case CACHE_EXT_RESOLVE_UNDO: istate->resolve_undo = resolve_undo_read(data, sz); break; + case CACHE_EXT_LINK: + if (read_link_extension(istate, data, sz)) + return -1; + break; default: if (*ext < 'A' || 'Z' < *ext) return error("index uses %.4s extension, which we do not understand", @@ -1368,6 +1391,7 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on ce->ce_stat_data.sd_size = get_be32(&ondisk->size); ce->ce_flags = flags & ~CE_NAMEMASK; ce->ce_namelen = len; + ce->index = 0; hashcpy(ce->sha1, ondisk->sha1); memcpy(ce->name, name, len); ce->name[len] = '\0'; @@ -1442,7 +1466,7 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk, } /* remember to discard_cache() before reading a different cache! */ -int read_index_from(struct index_state *istate, const char *path) +int do_read_index(struct index_state *istate, const char *path, int must_exist) { int fd, i; struct stat st; @@ -1459,9 +1483,9 @@ int read_index_from(struct index_state *istate, const char *path) istate->timestamp.nsec = 0; fd = open(path, O_RDONLY); if (fd < 0) { - if (errno == ENOENT) + if (!must_exist && errno == ENOENT) return 0; - die_errno("index file open failed"); + die_errno("%s: index file open failed", path); } if (fstat(fd, &st)) @@ -1480,7 +1504,7 @@ int read_index_from(struct index_state *istate, const char *path) if (verify_hdr(hdr, mmap_size) < 0) goto unmap; - hashcpy(istate->sha1, (unsigned char *)hdr + mmap_size - 20); + hashcpy(istate->sha1, (const unsigned char *)hdr + mmap_size - 20); istate->version = ntohl(hdr->hdr_version); istate->cache_nr = ntohl(hdr->hdr_entries); istate->cache_alloc = alloc_nr(istate->cache_nr); @@ -1534,6 +1558,40 @@ unmap: die("index file corrupt"); } +int read_index_from(struct index_state *istate, const char *path) +{ + struct split_index *split_index; + int ret; + + /* istate->initialized covers both .git/index and .git/sharedindex.xxx */ + if (istate->initialized) + return istate->cache_nr; + + ret = do_read_index(istate, path, 0); + split_index = istate->split_index; + if (!split_index) + return ret; + + if (is_null_sha1(split_index->base_sha1)) + return ret; + + if (split_index->base) + discard_index(split_index->base); + else + split_index->base = xcalloc(1, sizeof(*split_index->base)); + ret = do_read_index(split_index->base, + git_path("sharedindex.%s", + sha1_to_hex(split_index->base_sha1)), 1); + if (hashcmp(split_index->base_sha1, split_index->base->sha1)) + die("broken index, expect %s in %s, got %s", + sha1_to_hex(split_index->base_sha1), + git_path("sharedindex.%s", + sha1_to_hex(split_index->base_sha1)), + sha1_to_hex(split_index->base->sha1)); + merge_base_index(istate); + return ret; +} + int is_index_unborn(struct index_state *istate) { return (!istate->cache_nr && !istate->timestamp.sec); @@ -1543,8 +1601,15 @@ int discard_index(struct index_state *istate) { int i; - for (i = 0; i < istate->cache_nr; i++) + for (i = 0; i < istate->cache_nr; i++) { + if (istate->cache[i]->index && + istate->split_index && + istate->split_index->base && + istate->cache[i]->index <= istate->split_index->base->cache_nr && + istate->cache[i] == istate->split_index->base->cache[istate->cache[i]->index - 1]) + continue; free(istate->cache[i]); + } resolve_undo_clear_index(istate); istate->cache_nr = 0; istate->cache_changed = 0; @@ -1556,6 +1621,7 @@ int discard_index(struct index_state *istate) free(istate->cache); istate->cache = NULL; istate->cache_alloc = 0; + discard_split_index(istate); return 0; } @@ -1616,7 +1682,7 @@ static int write_index_ext_header(git_SHA_CTX *context, int fd, (ce_write(context, fd, &sz, 4) < 0)) ? -1 : 0; } -static int ce_flush(git_SHA_CTX *context, int fd) +static int ce_flush(git_SHA_CTX *context, int fd, unsigned char *sha1) { unsigned int left = write_buffer_len; @@ -1634,6 +1700,7 @@ static int ce_flush(git_SHA_CTX *context, int fd) /* Append the SHA1 signature at the end */ git_SHA1_Final(write_buffer + left, context); + hashcpy(sha1, write_buffer + left); left += 20; return (write_in_full(fd, write_buffer, left) != left) ? -1 : 0; } @@ -1705,7 +1772,7 @@ static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk, ondisk->size = htonl(ce->ce_stat_data.sd_size); hashcpy(ondisk->sha1, ce->sha1); - flags = ce->ce_flags; + flags = ce->ce_flags & ~CE_NAMEMASK; flags |= (ce_namelen(ce) >= CE_NAMEMASK ? CE_NAMEMASK : ce_namelen(ce)); ondisk->flags = htons(flags); if (ce->ce_flags & CE_EXTENDED) { @@ -1724,9 +1791,15 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce, { int size; struct ondisk_cache_entry *ondisk; + int saved_namelen = saved_namelen; /* compiler workaround */ char *name; int result; + if (ce->ce_flags & CE_STRIP_NAME) { + saved_namelen = ce_namelen(ce); + ce->ce_namelen = 0; + } + if (!previous_name) { size = ondisk_ce_size(ce); ondisk = xcalloc(1, size); @@ -1758,6 +1831,10 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce, strbuf_splice(previous_name, common, to_remove, ce->name + common, ce_namelen(ce) - common); } + if (ce->ce_flags & CE_STRIP_NAME) { + ce->ce_namelen = saved_namelen; + ce->ce_flags &= ~CE_STRIP_NAME; + } result = ce_write(c, fd, ondisk, size); free(ondisk); @@ -1827,13 +1904,13 @@ static int has_racy_timestamp(struct index_state *istate) void update_index_if_able(struct index_state *istate, struct lock_file *lockfile) { if ((istate->cache_changed || has_racy_timestamp(istate)) && - verify_index(istate) && !write_index(istate, lockfile->fd)) - commit_locked_index(lockfile); - else + verify_index(istate) && + write_locked_index(istate, lockfile, COMMIT_LOCK)) rollback_lock_file(lockfile); } -int write_index(struct index_state *istate, int newfd) +static int do_write_index(struct index_state *istate, int newfd, + int strip_extensions) { git_SHA_CTX c; struct cache_header hdr; @@ -1855,8 +1932,11 @@ int write_index(struct index_state *istate, int newfd) } } - if (!istate->version) + if (!istate->version) { istate->version = get_index_format_default(); + if (getenv("GIT_TEST_SPLIT_INDEX")) + init_split_index(istate); + } /* demote version 3 to version 2 when the latter suffices */ if (istate->version == 3 || istate->version == 2) @@ -1896,7 +1976,18 @@ int write_index(struct index_state *istate, int newfd) strbuf_release(&previous_name_buf); /* Write extension data here */ - if (istate->cache_tree) { + if (!strip_extensions && istate->split_index) { + struct strbuf sb = STRBUF_INIT; + + err = write_link_extension(&sb, istate) < 0 || + write_index_ext_header(&c, newfd, CACHE_EXT_LINK, + sb.len) < 0 || + ce_write(&c, newfd, sb.buf, sb.len) < 0; + strbuf_release(&sb); + if (err) + return -1; + } + if (!strip_extensions && istate->cache_tree) { struct strbuf sb = STRBUF_INIT; cache_tree_write(&sb, istate->cache_tree); @@ -1906,7 +1997,7 @@ int write_index(struct index_state *istate, int newfd) if (err) return -1; } - if (istate->resolve_undo) { + if (!strip_extensions && istate->resolve_undo) { struct strbuf sb = STRBUF_INIT; resolve_undo_write(&sb, istate->resolve_undo); @@ -1918,13 +2009,138 @@ int write_index(struct index_state *istate, int newfd) return -1; } - if (ce_flush(&c, newfd) || fstat(newfd, &st)) + if (ce_flush(&c, newfd, istate->sha1) || fstat(newfd, &st)) return -1; istate->timestamp.sec = (unsigned int)st.st_mtime; istate->timestamp.nsec = ST_MTIME_NSEC(st); return 0; } +void set_alternate_index_output(const char *name) +{ + alternate_index_output = name; +} + +static int commit_locked_index(struct lock_file *lk) +{ + if (alternate_index_output) { + if (lk->fd >= 0 && close_lock_file(lk)) + return -1; + if (rename(lk->filename, alternate_index_output)) + return -1; + lk->filename[0] = 0; + return 0; + } else { + return commit_lock_file(lk); + } +} + +static int do_write_locked_index(struct index_state *istate, struct lock_file *lock, + unsigned flags) +{ + int ret = do_write_index(istate, lock->fd, 0); + if (ret) + return ret; + assert((flags & (COMMIT_LOCK | CLOSE_LOCK)) != + (COMMIT_LOCK | CLOSE_LOCK)); + if (flags & COMMIT_LOCK) + return commit_locked_index(lock); + else if (flags & CLOSE_LOCK) + return close_lock_file(lock); + else + return ret; +} + +static int write_split_index(struct index_state *istate, + struct lock_file *lock, + unsigned flags) +{ + int ret; + prepare_to_write_split_index(istate); + ret = do_write_locked_index(istate, lock, flags); + finish_writing_split_index(istate); + return ret; +} + +static char *temporary_sharedindex; + +static void remove_temporary_sharedindex(void) +{ + if (temporary_sharedindex) { + unlink_or_warn(temporary_sharedindex); + free(temporary_sharedindex); + temporary_sharedindex = NULL; + } +} + +static void remove_temporary_sharedindex_on_signal(int signo) +{ + remove_temporary_sharedindex(); + sigchain_pop(signo); + raise(signo); +} + +static int write_shared_index(struct index_state *istate, + struct lock_file *lock, unsigned flags) +{ + struct split_index *si = istate->split_index; + static int installed_handler; + int fd, ret; + + temporary_sharedindex = git_pathdup("sharedindex_XXXXXX"); + fd = mkstemp(temporary_sharedindex); + if (fd < 0) { + free(temporary_sharedindex); + temporary_sharedindex = NULL; + hashclr(si->base_sha1); + return do_write_locked_index(istate, lock, flags); + } + if (!installed_handler) { + atexit(remove_temporary_sharedindex); + sigchain_push_common(remove_temporary_sharedindex_on_signal); + } + move_cache_to_base_index(istate); + ret = do_write_index(si->base, fd, 1); + close(fd); + if (ret) { + remove_temporary_sharedindex(); + return ret; + } + ret = rename(temporary_sharedindex, + git_path("sharedindex.%s", sha1_to_hex(si->base->sha1))); + free(temporary_sharedindex); + temporary_sharedindex = NULL; + if (!ret) + hashcpy(si->base_sha1, si->base->sha1); + return ret; +} + +int write_locked_index(struct index_state *istate, struct lock_file *lock, + unsigned flags) +{ + struct split_index *si = istate->split_index; + + if (!si || alternate_index_output || + (istate->cache_changed & ~EXTMASK)) { + if (si) + hashclr(si->base_sha1); + return do_write_locked_index(istate, lock, flags); + } + + if (getenv("GIT_TEST_SPLIT_INDEX")) { + int v = si->base_sha1[0]; + if ((v & 15) < 6) + istate->cache_changed |= SPLIT_INDEX_ORDERED; + } + if (istate->cache_changed & SPLIT_INDEX_ORDERED) { + int ret = write_shared_index(istate, lock, flags); + if (ret) + return ret; + } + + return write_split_index(istate, lock, flags); +} + /* * Read the index file that is potentially unmerged into given * index_state, dropping any unmerged entries. Returns true if |