summaryrefslogtreecommitdiff
path: root/submodule.c
diff options
context:
space:
mode:
authorGlen Choo <chooglen@google.com>2022-03-07 16:14:32 -0800
committerJunio C Hamano <gitster@pobox.com>2022-03-16 16:08:59 -0700
commitb90d9f7632d380d3f16197ae657ab57075acd1eb (patch)
treea89794a4a8f9add7069a79d86e1854fff540d04f /submodule.c
parent5370b91f3fae9d7a511e23142b55082200152cef (diff)
downloadgit-b90d9f7632d380d3f16197ae657ab57075acd1eb.tar.gz
fetch: fetch unpopulated, changed submodules
"git fetch --recurse-submodules" only considers populated submodules (i.e. submodules that can be found by iterating the index), which makes "git fetch" behave differently based on which commit is checked out. As a result, even if the user has initialized all submodules correctly, they may not fetch the necessary submodule commits, and commands like "git checkout --recurse-submodules" might fail. Teach "git fetch" to fetch cloned, changed submodules regardless of whether they are populated. This is in addition to the current behavior of fetching populated submodules (which is always attempted regardless of what was fetched in the superproject, or even if nothing was fetched in the superproject). A submodule may be encountered multiple times (via the list of populated submodules or via the list of changed submodules). When this happens, "git fetch" only reads the 'populated copy' and ignores the 'changed copy'. Amend the verify_fetch_result() test helper so that we can assert on which 'copy' is being read. Signed-off-by: Glen Choo <chooglen@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'submodule.c')
-rw-r--r--submodule.c193
1 files changed, 177 insertions, 16 deletions
diff --git a/submodule.c b/submodule.c
index b36ef26752..2b8a99409e 100644
--- a/submodule.c
+++ b/submodule.c
@@ -808,9 +808,40 @@ static const char *default_name_or_path(const char *path_or_name)
/*
* Holds relevant information for a changed submodule. Used as the .util
- * member of the changed submodule string_list_item.
+ * member of the changed submodule name string_list_item.
+ *
+ * (super_oid, path) allows the submodule config to be read from _some_
+ * .gitmodules file. We store this information the first time we find a
+ * superproject commit that points to the submodule, but this is
+ * arbitrary - we can choose any (super_oid, path) that matches the
+ * submodule's name.
+ *
+ * NEEDSWORK: Storing an arbitrary commit is undesirable because we can't
+ * guarantee that we're reading the commit that the user would expect. A better
+ * scheme would be to just fetch a submodule by its name. This requires two
+ * steps:
+ * - Create a function that behaves like repo_submodule_init(), but accepts a
+ * submodule name instead of treeish_name and path. This should be easy
+ * because repo_submodule_init() internally uses the submodule's name.
+ *
+ * - Replace most instances of 'struct submodule' (which is the .gitmodules
+ * config) with just the submodule name. This is OK because we expect
+ * submodule settings to be stored in .git/config (via "git submodule init"),
+ * not .gitmodules. This also lets us delete get_non_gitmodules_submodule(),
+ * which constructs a bogus 'struct submodule' for the sake of giving a
+ * placeholder name to a gitlink.
*/
struct changed_submodule_data {
+ /*
+ * The first superproject commit in the rev walk that points to
+ * the submodule.
+ */
+ const struct object_id *super_oid;
+ /*
+ * Path to the submodule in the superproject commit referenced
+ * by 'super_oid'.
+ */
+ char *path;
/* The submodule commits that have changed in the rev walk. */
struct oid_array new_commits;
};
@@ -818,6 +849,7 @@ struct changed_submodule_data {
static void changed_submodule_data_clear(struct changed_submodule_data *cs_data)
{
oid_array_clear(&cs_data->new_commits);
+ free(cs_data->path);
}
static void collect_changed_submodules_cb(struct diff_queue_struct *q,
@@ -862,9 +894,14 @@ static void collect_changed_submodules_cb(struct diff_queue_struct *q,
continue;
item = string_list_insert(changed, name);
- if (!item->util)
+ if (item->util)
+ cs_data = item->util;
+ else {
item->util = xcalloc(1, sizeof(struct changed_submodule_data));
- cs_data = item->util;
+ cs_data = item->util;
+ cs_data->super_oid = commit_oid;
+ cs_data->path = xstrdup(p->two->path);
+ }
oid_array_append(&cs_data->new_commits, &p->two->oid);
}
}
@@ -1253,14 +1290,36 @@ void check_for_new_submodule_commits(struct object_id *oid)
oid_array_append(&ref_tips_after_fetch, oid);
}
+/*
+ * Returns 1 if there is at least one submodule gitdir in
+ * $GIT_DIR/modules and 0 otherwise. This follows
+ * submodule_name_to_gitdir(), which looks for submodules in
+ * $GIT_DIR/modules, not $GIT_COMMON_DIR.
+ *
+ * A submodule can be moved to $GIT_DIR/modules manually by running "git
+ * submodule absorbgitdirs", or it may be initialized there by "git
+ * submodule update".
+ */
+static int repo_has_absorbed_submodules(struct repository *r)
+{
+ int ret;
+ struct strbuf buf = STRBUF_INIT;
+
+ strbuf_repo_git_path(&buf, r, "modules/");
+ ret = file_exists(buf.buf) && !is_empty_dir(buf.buf);
+ strbuf_release(&buf);
+ return ret;
+}
+
static void calculate_changed_submodule_paths(struct repository *r,
struct string_list *changed_submodule_names)
{
struct strvec argv = STRVEC_INIT;
struct string_list_item *name;
- /* No need to check if there are no submodules configured */
- if (!submodule_from_path(r, NULL, NULL))
+ /* No need to check if no submodules would be fetched */
+ if (!submodule_from_path(r, NULL, NULL) &&
+ !repo_has_absorbed_submodules(r))
return;
strvec_push(&argv, "--"); /* argv[0] program name */
@@ -1333,7 +1392,16 @@ int submodule_touches_in_range(struct repository *r,
}
struct submodule_parallel_fetch {
- int count;
+ /*
+ * The index of the last index entry processed by
+ * get_fetch_task_from_index().
+ */
+ int index_count;
+ /*
+ * The index of the last string_list entry processed by
+ * get_fetch_task_from_changed().
+ */
+ int changed_count;
struct strvec args;
struct repository *r;
const char *prefix;
@@ -1342,7 +1410,16 @@ struct submodule_parallel_fetch {
int quiet;
int result;
+ /*
+ * Names of submodules that have new commits. Generated by
+ * walking the newly fetched superproject commits.
+ */
struct string_list changed_submodule_names;
+ /*
+ * Names of submodules that have already been processed. Lets us
+ * avoid fetching the same submodule more than once.
+ */
+ struct string_list seen_submodule_names;
/* Pending fetches by OIDs */
struct fetch_task **oid_fetch_tasks;
@@ -1353,6 +1430,7 @@ struct submodule_parallel_fetch {
#define SPF_INIT { \
.args = STRVEC_INIT, \
.changed_submodule_names = STRING_LIST_INIT_DUP, \
+ .seen_submodule_names = STRING_LIST_INIT_DUP, \
.submodules_with_errors = STRBUF_INIT, \
}
@@ -1390,6 +1468,7 @@ struct fetch_task {
const struct submodule *sub;
unsigned free_sub : 1; /* Do we need to free the submodule? */
const char *default_argv; /* The default fetch mode. */
+ struct strvec git_args; /* Args for the child git process. */
struct oid_array *commits; /* Ensure these commits are fetched */
};
@@ -1425,6 +1504,8 @@ static void fetch_task_release(struct fetch_task *p)
if (p->repo)
repo_clear(p->repo);
FREE_AND_NULL(p->repo);
+
+ strvec_clear(&p->git_args);
}
static struct repository *get_submodule_repo_for(struct repository *r,
@@ -1463,6 +1544,9 @@ static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf
task->free_sub = 1;
}
+ if (string_list_lookup(&spf->seen_submodule_names, task->sub->name))
+ goto cleanup;
+
switch (get_fetch_recurse_config(task->sub, spf))
{
default:
@@ -1493,10 +1577,12 @@ static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf
}
static struct fetch_task *
-get_fetch_task(struct submodule_parallel_fetch *spf, struct strbuf *err)
+get_fetch_task_from_index(struct submodule_parallel_fetch *spf,
+ struct strbuf *err)
{
- for (; spf->count < spf->r->index->cache_nr; spf->count++) {
- const struct cache_entry *ce = spf->r->index->cache[spf->count];
+ for (; spf->index_count < spf->r->index->cache_nr; spf->index_count++) {
+ const struct cache_entry *ce =
+ spf->r->index->cache[spf->index_count];
struct fetch_task *task;
if (!S_ISGITLINK(ce->ce_mode))
@@ -1511,7 +1597,7 @@ get_fetch_task(struct submodule_parallel_fetch *spf, struct strbuf *err)
strbuf_addf(err, _("Fetching submodule %s%s\n"),
spf->prefix, ce->name);
- spf->count++;
+ spf->index_count++;
return task;
} else {
struct strbuf empty_submodule_path = STRBUF_INIT;
@@ -1539,11 +1625,83 @@ get_fetch_task(struct submodule_parallel_fetch *spf, struct strbuf *err)
return NULL;
}
+static struct fetch_task *
+get_fetch_task_from_changed(struct submodule_parallel_fetch *spf,
+ struct strbuf *err)
+{
+ for (; spf->changed_count < spf->changed_submodule_names.nr;
+ spf->changed_count++) {
+ struct string_list_item item =
+ spf->changed_submodule_names.items[spf->changed_count];
+ struct changed_submodule_data *cs_data = item.util;
+ struct fetch_task *task;
+
+ if (!is_tree_submodule_active(spf->r, cs_data->super_oid,cs_data->path))
+ continue;
+
+ task = fetch_task_create(spf, cs_data->path,
+ cs_data->super_oid);
+ if (!task)
+ continue;
+
+ if (!task->repo) {
+ strbuf_addf(err, _("Could not access submodule '%s' at commit %s\n"),
+ cs_data->path,
+ find_unique_abbrev(cs_data->super_oid, DEFAULT_ABBREV));
+
+ fetch_task_release(task);
+ free(task);
+ continue;
+ }
+
+ if (!spf->quiet)
+ strbuf_addf(err,
+ _("Fetching submodule %s%s at commit %s\n"),
+ spf->prefix, task->sub->path,
+ find_unique_abbrev(cs_data->super_oid,
+ DEFAULT_ABBREV));
+
+ spf->changed_count++;
+ /*
+ * NEEDSWORK: Submodules set/unset a value for
+ * core.worktree when they are populated/unpopulated by
+ * "git checkout" (and similar commands, see
+ * submodule_move_head() and
+ * connect_work_tree_and_git_dir()), but if the
+ * submodule is unpopulated in another way (e.g. "git
+ * rm", "rm -r"), core.worktree will still be set even
+ * though the directory doesn't exist, and the child
+ * process will crash while trying to chdir into the
+ * nonexistent directory.
+ *
+ * In this case, we know that the submodule has no
+ * working tree, so we can work around this by
+ * setting "--work-tree=." (--bare does not work because
+ * worktree settings take precedence over bare-ness).
+ * However, this is not necessarily true in other cases,
+ * so a generalized solution is still necessary.
+ *
+ * Possible solutions:
+ * - teach "git [add|rm]" to unset core.worktree and
+ * discourage users from removing submodules without
+ * using a Git command.
+ * - teach submodule child processes to ignore stale
+ * core.worktree values.
+ */
+ strvec_push(&task->git_args, "--work-tree=.");
+ return task;
+ }
+ return NULL;
+}
+
static int get_next_submodule(struct child_process *cp, struct strbuf *err,
void *data, void **task_cb)
{
struct submodule_parallel_fetch *spf = data;
- struct fetch_task *task = get_fetch_task(spf, err);
+ struct fetch_task *task =
+ get_fetch_task_from_index(spf, err);
+ if (!task)
+ task = get_fetch_task_from_changed(spf, err);
if (task) {
struct strbuf submodule_prefix = STRBUF_INIT;
@@ -1553,6 +1711,8 @@ static int get_next_submodule(struct child_process *cp, struct strbuf *err,
prepare_submodule_repo_env_in_gitdir(&cp->env_array);
cp->git_cmd = 1;
strvec_init(&cp->args);
+ if (task->git_args.nr)
+ strvec_pushv(&cp->args, task->git_args.v);
strvec_pushv(&cp->args, spf->args.v);
strvec_push(&cp->args, task->default_argv);
strvec_push(&cp->args, "--submodule-prefix");
@@ -1564,6 +1724,7 @@ static int get_next_submodule(struct child_process *cp, struct strbuf *err,
*task_cb = task;
strbuf_release(&submodule_prefix);
+ string_list_insert(&spf->seen_submodule_names, task->sub->name);
return 1;
}
@@ -1678,11 +1839,11 @@ out:
return 0;
}
-int fetch_populated_submodules(struct repository *r,
- const struct strvec *options,
- const char *prefix, int command_line_option,
- int default_option,
- int quiet, int max_parallel_jobs)
+int fetch_submodules(struct repository *r,
+ const struct strvec *options,
+ const char *prefix, int command_line_option,
+ int default_option,
+ int quiet, int max_parallel_jobs)
{
int i;
struct submodule_parallel_fetch spf = SPF_INIT;