diff options
author | Junio C Hamano <gitster@pobox.com> | 2018-02-13 13:39:04 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2018-02-13 13:39:04 -0800 |
commit | 6bed209a20a06f2d6b7142216dabff456de798e1 (patch) | |
tree | ee1b6980108adb6cd4cb03c5749256434f01a809 | |
parent | f3d618d2bf4099abe99babc8b56dcd483c5eec71 (diff) | |
parent | 3aa6694fb3d38a3afe623ccbdf59fb15f338a94d (diff) | |
download | git-6bed209a20a06f2d6b7142216dabff456de798e1.tar.gz |
Merge branch 'jh/partial-clone'
The machinery to clone & fetch, which in turn involves packing and
unpacking objects, have been told how to omit certain objects using
the filtering mechanism introduced by the jh/object-filtering
topic, and also mark the resulting pack as a promisor pack to
tolerate missing objects, taking advantage of the mechanism
introduced by the jh/fsck-promisors topic.
* jh/partial-clone:
t5616: test bulk prefetch after partial fetch
fetch: inherit filter-spec from partial clone
t5616: end-to-end tests for partial clone
fetch-pack: restore save_commit_buffer after use
unpack-trees: batch fetching of missing blobs
clone: partial clone
partial-clone: define partial clone settings in config
fetch: support filters
fetch: refactor calculation of remote list
fetch-pack: test support excluding large blobs
fetch-pack: add --no-filter
fetch-pack, index-pack, transport: partial clone
upload-pack: add object filtering for partial clone
-rw-r--r-- | Documentation/config.txt | 4 | ||||
-rw-r--r-- | Documentation/technical/pack-protocol.txt | 8 | ||||
-rw-r--r-- | Documentation/technical/protocol-capabilities.txt | 8 | ||||
-rw-r--r-- | builtin/clone.c | 22 | ||||
-rw-r--r-- | builtin/fetch-pack.c | 8 | ||||
-rw-r--r-- | builtin/fetch.c | 83 | ||||
-rw-r--r-- | builtin/rev-list.c | 2 | ||||
-rw-r--r-- | cache.h | 1 | ||||
-rw-r--r-- | config.c | 5 | ||||
-rw-r--r-- | connected.c | 2 | ||||
-rw-r--r-- | environment.c | 1 | ||||
-rw-r--r-- | fetch-object.c | 26 | ||||
-rw-r--r-- | fetch-object.h | 5 | ||||
-rw-r--r-- | fetch-pack.c | 17 | ||||
-rw-r--r-- | fetch-pack.h | 2 | ||||
-rw-r--r-- | list-objects-filter-options.c | 92 | ||||
-rw-r--r-- | list-objects-filter-options.h | 18 | ||||
-rw-r--r-- | remote-curl.c | 6 | ||||
-rwxr-xr-x | t/t5500-fetch-pack.sh | 63 | ||||
-rwxr-xr-x | t/t5601-clone.sh | 101 | ||||
-rwxr-xr-x | t/t5616-partial-clone.sh | 146 | ||||
-rw-r--r-- | transport-helper.c | 5 | ||||
-rw-r--r-- | transport.c | 4 | ||||
-rw-r--r-- | transport.h | 5 | ||||
-rw-r--r-- | unpack-trees.c | 22 | ||||
-rw-r--r-- | upload-pack.c | 31 |
26 files changed, 657 insertions, 30 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt index 0e25b2c92b..f57e9cf10c 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -3343,6 +3343,10 @@ uploadpack.packObjectsHook:: was run. I.e., `upload-pack` will feed input intended for `pack-objects` to the hook, and expects a completed packfile on stdout. + +uploadpack.allowFilter:: + If this option is set, `upload-pack` will advertise partial + clone and partial fetch object filtering. + Note that this configuration variable is ignored if it is seen in the repository-level config (this is a safety measure against fetching from diff --git a/Documentation/technical/pack-protocol.txt b/Documentation/technical/pack-protocol.txt index cd31edc91e..7fee6b780a 100644 --- a/Documentation/technical/pack-protocol.txt +++ b/Documentation/technical/pack-protocol.txt @@ -241,6 +241,7 @@ out of what the server said it could do with the first 'want' line. upload-request = want-list *shallow-line *1depth-request + [filter-request] flush-pkt want-list = first-want @@ -256,6 +257,8 @@ out of what the server said it could do with the first 'want' line. additional-want = PKT-LINE("want" SP obj-id) depth = 1*DIGIT + + filter-request = PKT-LINE("filter" SP filter-spec) ---- Clients MUST send all the obj-ids it wants from the reference @@ -278,6 +281,11 @@ complete those commits. Commits whose parents are not received as a result are defined as shallow and marked as such in the server. This information is sent back to the client in the next step. +The client can optionally request that pack-objects omit various +objects from the packfile using one of several filtering techniques. +These are intended for use with partial clone and partial fetch +operations. See `rev-list` for possible "filter-spec" values. + Once all the 'want's and 'shallow's (and optional 'deepen') are transferred, clients MUST send a flush-pkt, to tell the server side that it is done sending the list. diff --git a/Documentation/technical/protocol-capabilities.txt b/Documentation/technical/protocol-capabilities.txt index 26dcc6f502..332d209b58 100644 --- a/Documentation/technical/protocol-capabilities.txt +++ b/Documentation/technical/protocol-capabilities.txt @@ -309,3 +309,11 @@ to accept a signed push certificate, and asks the <nonce> to be included in the push certificate. A send-pack client MUST NOT send a push-cert packet unless the receive-pack server advertises this capability. + +filter +------ + +If the upload-pack server advertises the 'filter' capability, +fetch-pack may send "filter" commands to request a partial clone +or partial fetch and request that the server omit various objects +from the packfile. diff --git a/builtin/clone.c b/builtin/clone.c index 284651797e..101c27a593 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -26,6 +26,7 @@ #include "run-command.h" #include "connected.h" #include "packfile.h" +#include "list-objects-filter-options.h" /* * Overall FIXMEs: @@ -60,6 +61,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP; static int option_dissociate; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; +static struct list_objects_filter_options filter_options; static int recurse_submodules_cb(const struct option *opt, const char *arg, int unset) @@ -135,6 +137,7 @@ static struct option builtin_clone_options[] = { TRANSPORT_FAMILY_IPV4), OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -893,6 +896,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) struct refspec *refspec; const char *fetch_pattern; + fetch_if_missing = 0; + packet_trace_identity("clone"); argc = parse_options(argc, argv, prefix, builtin_clone_options, builtin_clone_usage, 0); @@ -1090,6 +1095,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) warning(_("--shallow-since is ignored in local clones; use file:// instead.")); if (option_not.nr) warning(_("--shallow-exclude is ignored in local clones; use file:// instead.")); + if (filter_options.choice) + warning(_("--filter is ignored in local clones; use file:// instead.")); if (!access(mkpath("%s/shallow", path), F_OK)) { if (option_local > 0) warning(_("source repository is shallow, ignoring --local")); @@ -1118,7 +1125,13 @@ int cmd_clone(int argc, const char **argv, const char *prefix) transport_set_option(transport, TRANS_OPT_UPLOADPACK, option_upload_pack); - if (transport->smart_options && !deepen) + if (filter_options.choice) { + transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, + filter_options.filter_spec); + transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); + } + + if (transport->smart_options && !deepen && !filter_options.choice) transport->smart_options->check_self_contained_and_connected = 1; refs = transport_get_remote_refs(transport); @@ -1178,13 +1191,17 @@ int cmd_clone(int argc, const char **argv, const char *prefix) write_refspec_config(src_ref_prefix, our_head_points_at, remote_head_points_at, &branch_top); + if (filter_options.choice) + partial_clone_register("origin", &filter_options); + if (is_local) clone_local(path, git_dir); else if (refs && complete_refs_before_fetch) transport_fetch_refs(transport, mapped_refs); update_remote_refs(refs, mapped_refs, remote_head_points_at, - branch_top.buf, reflog_msg.buf, transport, !is_local); + branch_top.buf, reflog_msg.buf, transport, + !is_local && !filter_options.choice); update_head(our_head_points_at, remote_head, reflog_msg.buf); @@ -1205,6 +1222,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } junk_mode = JUNK_LEAVE_REPO; + fetch_if_missing = 1; err = checkout(submodule_progress); strbuf_release(&reflog_msg); diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 15eeed7b17..a7bc1366ab 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -153,6 +153,14 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) args.no_dependents = 1; continue; } + if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { + parse_list_objects_filter(&args.filter_options, arg); + continue; + } + if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { + list_objects_filter_set_no_filter(&args.filter_options); + continue; + } usage(fetch_pack_usage); } if (deepen_not.nr) diff --git a/builtin/fetch.c b/builtin/fetch.c index 7bbcd26faf..8ee998ea2e 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -19,6 +19,7 @@ #include "argv-array.h" #include "utf8.h" #include "packfile.h" +#include "list-objects-filter-options.h" static const char * const builtin_fetch_usage[] = { N_("git fetch [<options>] [<repository> [<refspec>...]]"), @@ -56,6 +57,7 @@ static int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND; static int shown_url = 0; static int refmap_alloc, refmap_nr; static const char **refmap_array; +static struct list_objects_filter_options filter_options; static int git_fetch_config(const char *k, const char *v, void *cb) { @@ -161,6 +163,7 @@ static struct option builtin_fetch_options[] = { TRANSPORT_FAMILY_IPV4), OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -1045,6 +1048,11 @@ static struct transport *prepare_transport(struct remote *remote, int deepen) set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes"); if (update_shallow) set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes"); + if (filter_options.choice) { + set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, + filter_options.filter_spec); + set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); + } return transport; } @@ -1265,6 +1273,56 @@ static int fetch_multiple(struct string_list *list) return result; } +/* + * Fetching from the promisor remote should use the given filter-spec + * or inherit the default filter-spec from the config. + */ +static inline void fetch_one_setup_partial(struct remote *remote) +{ + /* + * Explicit --no-filter argument overrides everything, regardless + * of any prior partial clones and fetches. + */ + if (filter_options.no_filter) + return; + + /* + * If no prior partial clone/fetch and the current fetch DID NOT + * request a partial-fetch, do a normal fetch. + */ + if (!repository_format_partial_clone && !filter_options.choice) + return; + + /* + * If this is the FIRST partial-fetch request, we enable partial + * on this repo and remember the given filter-spec as the default + * for subsequent fetches to this remote. + */ + if (!repository_format_partial_clone && filter_options.choice) { + partial_clone_register(remote->name, &filter_options); + return; + } + + /* + * We are currently limited to only ONE promisor remote and only + * allow partial-fetches from the promisor remote. + */ + if (strcmp(remote->name, repository_format_partial_clone)) { + if (filter_options.choice) + die(_("--filter can only be used with the remote configured in core.partialClone")); + return; + } + + /* + * Do a partial-fetch from the promisor remote using either the + * explicitly given filter-spec or inherit the filter-spec from + * the config. + */ + if (!filter_options.choice) + partial_clone_get_default_filter_spec(&filter_options); + return; +} + static int fetch_one(struct remote *remote, int argc, const char **argv) { static const char **refs = NULL; @@ -1320,12 +1378,14 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) { int i; struct string_list list = STRING_LIST_INIT_DUP; - struct remote *remote; + struct remote *remote = NULL; int result = 0; struct argv_array argv_gc_auto = ARGV_ARRAY_INIT; packet_trace_identity("fetch"); + fetch_if_missing = 0; + /* Record the command line for the reflog */ strbuf_addstr(&default_rla, "fetch"); for (i = 1; i < argc; i++) @@ -1359,23 +1419,23 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) if (depth || deepen_since || deepen_not.nr) deepen = 1; + if (filter_options.choice && !repository_format_partial_clone) + die("--filter can only be used when extensions.partialClone is set"); + if (all) { if (argc == 1) die(_("fetch --all does not take a repository argument")); else if (argc > 1) die(_("fetch --all does not make sense with refspecs")); (void) for_each_remote(get_one_remote_for_fetch, &list); - result = fetch_multiple(&list); } else if (argc == 0) { /* No arguments -- use default remote */ remote = remote_get(NULL); - result = fetch_one(remote, argc, argv); } else if (multiple) { /* All arguments are assumed to be remotes or groups */ for (i = 0; i < argc; i++) if (!add_remote_or_group(argv[i], &list)) die(_("No such remote or remote group: %s"), argv[i]); - result = fetch_multiple(&list); } else { /* Single remote or group */ (void) add_remote_or_group(argv[0], &list); @@ -1383,14 +1443,25 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) /* More than one remote */ if (argc > 1) die(_("Fetching a group and specifying refspecs does not make sense")); - result = fetch_multiple(&list); } else { /* Zero or one remotes */ remote = remote_get(argv[0]); - result = fetch_one(remote, argc-1, argv+1); + argc--; + argv++; } } + if (remote) { + if (filter_options.choice || repository_format_partial_clone) + fetch_one_setup_partial(remote); + result = fetch_one(remote, argc, argv); + } else { + if (filter_options.choice) + die(_("--filter can only be used with the remote configured in core.partialClone")); + /* TODO should this also die if we have a previous partial-clone? */ + result = fetch_multiple(&list); + } + if (!result && (recurse_submodules != RECURSE_SUBMODULES_OFF)) { struct argv_array options = ARGV_ARRAY_INIT; diff --git a/builtin/rev-list.c b/builtin/rev-list.c index e27aa1fc07..48300d9e11 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -460,7 +460,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) continue; } if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { - list_objects_filter_release(&filter_options); + list_objects_filter_set_no_filter(&filter_options); continue; } if (!strcmp(arg, "--filter-print-omitted")) { @@ -915,6 +915,7 @@ extern int grafts_replace_parents; #define GIT_REPO_VERSION_READ 1 extern int repository_format_precious_objects; extern char *repository_format_partial_clone; +extern const char *core_partial_clone_filter_default; struct repository_format { int version; @@ -1251,6 +1251,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.partialclonefilter")) { + return git_config_string(&core_partial_clone_filter_default, + var, value); + } + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/connected.c b/connected.c index 4a47f33270..91feb78815 100644 --- a/connected.c +++ b/connected.c @@ -56,6 +56,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data, argv_array_push(&rev_list.args,"rev-list"); argv_array_push(&rev_list.args, "--objects"); argv_array_push(&rev_list.args, "--stdin"); + if (repository_format_partial_clone) + argv_array_push(&rev_list.args, "--exclude-promisor-objects"); argv_array_push(&rev_list.args, "--not"); argv_array_push(&rev_list.args, "--all"); argv_array_push(&rev_list.args, "--quiet"); diff --git a/environment.c b/environment.c index 835bb75c4e..0adc71ebe5 100644 --- a/environment.c +++ b/environment.c @@ -28,6 +28,7 @@ int warn_on_object_refname_ambiguity = 1; int ref_paranoia = -1; int repository_format_precious_objects; char *repository_format_partial_clone; +const char *core_partial_clone_filter_default; const char *git_commit_encoding; const char *git_log_output_encoding; const char *apply_default_whitespace; diff --git a/fetch-object.c b/fetch-object.c index 258fcfac75..853624f811 100644 --- a/fetch-object.c +++ b/fetch-object.c @@ -5,11 +5,10 @@ #include "transport.h" #include "fetch-object.h" -void fetch_object(const char *remote_name, const unsigned char *sha1) +static void fetch_refs(const char *remote_name, struct ref *ref) { struct remote *remote; struct transport *transport; - struct ref *ref; int original_fetch_if_missing = fetch_if_missing; fetch_if_missing = 0; @@ -18,10 +17,29 @@ void fetch_object(const char *remote_name, const unsigned char *sha1) die(_("Remote with no URL")); transport = transport_get(remote, remote->url[0]); - ref = alloc_ref(sha1_to_hex(sha1)); - hashcpy(ref->old_oid.hash, sha1); transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1"); transport_fetch_refs(transport, ref); fetch_if_missing = original_fetch_if_missing; } + +void fetch_object(const char *remote_name, const unsigned char *sha1) +{ + struct ref *ref = alloc_ref(sha1_to_hex(sha1)); + hashcpy(ref->old_oid.hash, sha1); + fetch_refs(remote_name, ref); +} + +void fetch_objects(const char *remote_name, const struct oid_array *to_fetch) +{ + struct ref *ref = NULL; + int i; + + for (i = 0; i < to_fetch->nr; i++) { + struct ref *new_ref = alloc_ref(oid_to_hex(&to_fetch->oid[i])); + oidcpy(&new_ref->old_oid, &to_fetch->oid[i]); + new_ref->next = ref; + ref = new_ref; + } + fetch_refs(remote_name, ref); +} diff --git a/fetch-object.h b/fetch-object.h index f371300c88..4b269d07ed 100644 --- a/fetch-object.h +++ b/fetch-object.h @@ -1,6 +1,11 @@ #ifndef FETCH_OBJECT_H #define FETCH_OBJECT_H +#include "sha1-array.h" + extern void fetch_object(const char *remote_name, const unsigned char *sha1); +extern void fetch_objects(const char *remote_name, + const struct oid_array *to_fetch); + #endif diff --git a/fetch-pack.c b/fetch-pack.c index 7aa1f58995..8253d746e0 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -29,6 +29,7 @@ static int deepen_not_ok; static int fetch_fsck_objects = -1; static int transfer_fsck_objects = -1; static int agent_supported; +static int server_supports_filtering; static struct lock_file shallow_lock; static const char *alternate_shallow_file; @@ -379,6 +380,8 @@ static int find_common(struct fetch_pack_args *args, if (deepen_not_ok) strbuf_addstr(&c, " deepen-not"); if (agent_supported) strbuf_addf(&c, " agent=%s", git_user_agent_sanitized()); + if (args->filter_options.choice) + strbuf_addstr(&c, " filter"); packet_buf_write(&req_buf, "want %s%s\n", remote_hex, c.buf); strbuf_release(&c); } else @@ -407,6 +410,9 @@ static int find_common(struct fetch_pack_args *args, packet_buf_write(&req_buf, "deepen-not %s", s->string); } } + if (server_supports_filtering && args->filter_options.choice) + packet_buf_write(&req_buf, "filter %s", + args->filter_options.filter_spec); packet_buf_flush(&req_buf); state_len = req_buf.len; @@ -711,6 +717,7 @@ static int everything_local(struct fetch_pack_args *args, { struct ref *ref; int retval; + int old_save_commit_buffer = save_commit_buffer; timestamp_t cutoff = 0; save_commit_buffer = 0; @@ -781,6 +788,9 @@ static int everything_local(struct fetch_pack_args *args, print_verbose(args, _("already have %s (%s)"), oid_to_hex(remote), ref->name); } + + save_commit_buffer = old_save_commit_buffer; + return retval; } @@ -970,6 +980,13 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, else prefer_ofs_delta = 0; + if (server_supports("filter")) { + server_supports_filtering = 1; + print_verbose(args, _("Server supports filter")); + } else if (args->filter_options.choice) { + warning("filtering not recognized by server, ignoring"); + } + if ((agent_feature = server_feature_value("agent", &agent_len))) { agent_supported = 1; if (agent_len) diff --git a/fetch-pack.h b/fetch-pack.h index aeac152644..3e224a1822 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -3,6 +3,7 @@ #include "string-list.h" #include "run-command.h" +#include "list-objects-filter-options.h" struct oid_array; @@ -12,6 +13,7 @@ struct fetch_pack_args { int depth; const char *deepen_since; const struct string_list *deepen_not; + struct list_objects_filter_options filter_options; unsigned deepen_relative:1; unsigned quiet:1; unsigned keep_pack:1; diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 4c5b34e949..6a3cc985c4 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -21,29 +21,36 @@ * subordinate commands when necessary. We also "intern" the arg for * the convenience of the current command. */ -int parse_list_objects_filter(struct list_objects_filter_options *filter_options, - const char *arg) +static int gently_parse_list_objects_filter( + struct list_objects_filter_options *filter_options, + const char *arg, + struct strbuf *errbuf) { const char *v0; - if (filter_options->choice) - die(_("multiple object filter types cannot be combined")); + if (filter_options->choice) { + if (errbuf) { + strbuf_init(errbuf, 0); + strbuf_addstr( + errbuf, + _("multiple filter-specs cannot be combined")); + } + return 1; + } filter_options->filter_spec = strdup(arg); if (!strcmp(arg, "blob:none")) { filter_options->choice = LOFC_BLOB_NONE; return 0; - } - if (skip_prefix(arg, "blob:limit=", &v0)) { - if (!git_parse_ulong(v0, &filter_options->blob_limit_value)) - die(_("invalid filter-spec expression '%s'"), arg); - filter_options->choice = LOFC_BLOB_LIMIT; - return 0; - } + } else if (skip_prefix(arg, "blob:limit=", &v0)) { + if (git_parse_ulong(v0, &filter_options->blob_limit_value)) { + filter_options->choice = LOFC_BLOB_LIMIT; + return 0; + } - if (skip_prefix(arg, "sparse:oid=", &v0)) { + } else if (skip_prefix(arg, "sparse:oid=", &v0)) { struct object_context oc; struct object_id sparse_oid; @@ -57,15 +64,27 @@ int parse_list_objects_filter(struct list_objects_filter_options *filter_options filter_options->sparse_oid_value = oiddup(&sparse_oid); filter_options->choice = LOFC_SPARSE_OID; return 0; - } - if (skip_prefix(arg, "sparse:path=", &v0)) { + } else if (skip_prefix(arg, "sparse:path=", &v0)) { filter_options->choice = LOFC_SPARSE_PATH; filter_options->sparse_path_value = strdup(v0); return 0; } - die(_("invalid filter-spec expression '%s'"), arg); + if (errbuf) { + strbuf_init(errbuf, 0); + strbuf_addf(errbuf, "invalid filter-spec '%s'", arg); + } + memset(filter_options, 0, sizeof(*filter_options)); + return 1; +} + +int parse_list_objects_filter(struct list_objects_filter_options *filter_options, + const char *arg) +{ + struct strbuf buf = STRBUF_INIT; + if (gently_parse_list_objects_filter(filter_options, arg, &buf)) + die("%s", buf.buf); return 0; } @@ -75,7 +94,7 @@ int opt_parse_list_objects_filter(const struct option *opt, struct list_objects_filter_options *filter_options = opt->value; if (unset || !arg) { - list_objects_filter_release(filter_options); + list_objects_filter_set_no_filter(filter_options); return 0; } @@ -90,3 +109,44 @@ void list_objects_filter_release( free(filter_options->sparse_path_value); memset(filter_options, 0, sizeof(*filter_options)); } + +void partial_clone_register( + const char *remote, + const struct list_objects_filter_options *filter_options) +{ + /* + * Record the name of the partial clone remote in the + * config and in the global variable -- the latter is + * used throughout to indicate that partial clone is + * enabled and to expect missing objects. + */ + if (repository_format_partial_clone && + *repository_format_partial_clone && + strcmp(remote, repository_format_partial_clone)) + die(_("cannot change partial clone promisor remote")); + + git_config_set("core.repositoryformatversion", "1"); + git_config_set("extensions.partialclone", remote); + + repository_format_partial_clone = xstrdup(remote); + + /* + * Record the initial filter-spec in the config as + * the default for subsequent fetches from this remote. + */ + core_partial_clone_filter_default = + xstrdup(filter_options->filter_spec); + git_config_set("core.partialclonefilter", + core_partial_clone_filter_default); +} + +void partial_clone_get_default_filter_spec( + struct list_objects_filter_options *filter_options) +{ + /* + * Parse default value, but silently ignore it if it is invalid. + */ + gently_parse_list_objects_filter(filter_options, + core_partial_clone_filter_default, + NULL); +} diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index eea44a1a51..0000a61f82 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -31,6 +31,11 @@ struct list_objects_filter_options { enum list_objects_filter_choice choice; /* + * Choice is LOFC_DISABLED because "--no-filter" was requested. + */ + unsigned int no_filter : 1; + + /* * Parsed values (fields) from within the filter-spec. These are * choice-specific; not all values will be defined for any given * choice. @@ -58,4 +63,17 @@ int opt_parse_list_objects_filter(const struct option *opt, void list_objects_filter_release( struct list_objects_filter_options *filter_options); +static inline void list_objects_filter_set_no_filter( + struct list_objects_filter_options *filter_options) +{ + list_objects_filter_release(filter_options); + filter_options->no_filter = 1; +} + +void partial_clone_register( + const char *remote, + const struct list_objects_filter_options *filter_options); +void partial_clone_get_default_filter_spec( + struct list_objects_filter_options *filter_options); + #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */ diff --git a/remote-curl.c b/remote-curl.c index 431839111a..6ec5352435 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -24,6 +24,7 @@ struct options { char *deepen_since; struct string_list deepen_not; struct string_list push_options; + char *filter; unsigned progress : 1, check_self_contained_and_connected : 1, cloning : 1, @@ -165,6 +166,9 @@ static int set_option(const char *name, const char *value) } else if (!strcmp(name, "no-dependents")) { options.no_dependents = 1; return 0; + } else if (!strcmp(name, "filter")) { + options.filter = xstrdup(value);; + return 0; } else { return 1 /* unsupported */; } @@ -834,6 +838,8 @@ static int fetch_git(struct discovery *heads, argv_array_push(&args, "--from-promisor"); if (options.no_dependents) argv_array_push(&args, "--no-dependents"); + if (options.filter) + argv_array_pushf(&args, "--filter=%s", options.filter); argv_array_push(&args, url.buf); for (i = 0; i < nr_heads; i++) { diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh index 80a1a3239a..ec9ba9bf6e 100755 --- a/t/t5500-fetch-pack.sh +++ b/t/t5500-fetch-pack.sh @@ -755,4 +755,67 @@ test_expect_success 'fetching deepen' ' ) ' +test_expect_success 'filtering by size' ' + rm -rf server client && + test_create_repo server && + test_commit -C server one && + test_config -C server uploadpack.allowfilter 1 && + + test_create_repo client && + git -C client fetch-pack --filter=blob:limit=0 ../server HEAD && + + # Ensure that object is not inadvertently fetched + test_must_fail git -C client cat-file -e $(git hash-object server/one.t) +' + +test_expect_success 'filtering by size has no effect if support for it is not advertised' ' + rm -rf server client && + test_create_repo server && + test_commit -C server one && + + test_create_repo client && + git -C client fetch-pack --filter=blob:limit=0 ../server HEAD 2> err && + + # Ensure that object is fetched + git -C client cat-file -e $(git hash-object server/one.t) && + + test_i18ngrep "filtering not recognized by server" err +' + +fetch_filter_blob_limit_zero () { + SERVER="$1" + URL="$2" + + rm -rf "$SERVER" client && + test_create_repo "$SERVER" && + test_commit -C "$SERVER" one && + test_config -C "$SERVER" uploadpack.allowfilter 1 && + + git clone "$URL" client && + test_config -C client extensions.partialclone origin && + + test_commit -C "$SERVER" two && + + git -C client fetch --filter=blob:limit=0 origin HEAD:somewhere && + + # Ensure that commit is fetched, but blob is not + test_config -C client extensions.partialclone "arbitrary string" && + git -C client cat-file -e $(git -C "$SERVER" rev-parse two) && + test_must_fail git -C client cat-file -e $(git hash-object "$SERVER/two.t") +} + +test_expect_success 'fetch with --filter=blob:limit=0' ' + fetch_filter_blob_limit_zero server server +' + +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +test_expect_success 'fetch with --filter=blob:limit=0 and HTTP' ' + fetch_filter_blob_limit_zero "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server" +' + +stop_httpd + + test_done diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index 8c437bf872..0b62037744 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -628,4 +628,105 @@ test_expect_success 'clone on case-insensitive fs' ' ) ' +partial_clone () { + SERVER="$1" && + URL="$2" && + + rm -rf "$SERVER" client && + test_create_repo "$SERVER" && + test_commit -C "$SERVER" one && + HASH1=$(git hash-object "$SERVER/one.t") && + git -C "$SERVER" revert HEAD && + test_commit -C "$SERVER" two && + HASH2=$(git hash-object "$SERVER/two.t") && + test_config -C "$SERVER" uploadpack.allowfilter 1 && + test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 && + + git clone --filter=blob:limit=0 "$URL" client && + + git -C client fsck && + + # Ensure that unneeded blobs are not inadvertently fetched. + test_config -C client extensions.partialclone "not a remote" && + test_must_fail git -C client cat-file -e "$HASH1" && + + # But this blob was fetched, because clone performs an initial checkout + git -C client cat-file -e "$HASH2" +} + +test_expect_success 'partial clone' ' + partial_clone server "file://$(pwd)/server" +' + +test_expect_success 'partial clone: warn if server does not support object filtering' ' + rm -rf server client && + test_create_repo server && + test_commit -C server one && + + git clone --filter=blob:limit=0 "file://$(pwd)/server" client 2> err && + + test_i18ngrep "filtering not recognized by server" err +' + +test_expect_success 'batch missing blob request during checkout' ' + rm -rf server client && + + test_create_repo server && + echo a >server/a && + echo b >server/b && + git -C server add a b && + + git -C server commit -m x && + echo aa >server/a && + echo bb >server/b && + git -C server add a b && + git -C server commit -m x && + + test_config -C server uploadpack.allowfilter 1 && + test_config -C server uploadpack.allowanysha1inwant 1 && + + git clone --filter=blob:limit=0 "file://$(pwd)/server" client && + + # Ensure that there is only one negotiation by checking that there is + # only "done" line sent. ("done" marks the end of negotiation.) + GIT_TRACE_PACKET="$(pwd)/trace" git -C client checkout HEAD^ && + grep "git> done" trace >done_lines && + test_line_count = 1 done_lines +' + +test_expect_success 'batch missing blob request does not inadvertently try to fetch gitlinks' ' + rm -rf server client && + + test_create_repo repo_for_submodule && + test_commit -C repo_for_submodule x && + + test_create_repo server && + echo a >server/a && + echo b >server/b && + git -C server add a b && + git -C server commit -m x && + + echo aa >server/a && + echo bb >server/b && + # Also add a gitlink pointing to an arbitrary repository + git -C server submodule add "$(pwd)/repo_for_submodule" c && + git -C server add a b c && + git -C server commit -m x && + + test_config -C server uploadpack.allowfilter 1 && + test_config -C server uploadpack.allowanysha1inwant 1 && + + # Make sure that it succeeds + git clone --filter=blob:limit=0 "file://$(pwd)/server" client +' + +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +test_expect_success 'partial clone using HTTP' ' + partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server" +' + +stop_httpd + test_done diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh new file mode 100755 index 0000000000..29d8631184 --- /dev/null +++ b/t/t5616-partial-clone.sh @@ -0,0 +1,146 @@ +#!/bin/sh + +test_description='git partial clone' + +. ./test-lib.sh + +# create a normal "src" repo where we can later create new commits. +# expect_1.oids will contain a list of the OIDs of all blobs. +test_expect_success 'setup normal src repo' ' + echo "{print \$1}" >print_1.awk && + echo "{print \$2}" >print_2.awk && + + git init src && + for n in 1 2 3 4 + do + echo "This is file: $n" > src/file.$n.txt + git -C src add file.$n.txt + git -C src commit -m "file $n" + git -C src ls-files -s file.$n.txt >>temp + done && + awk -f print_2.awk <temp | sort >expect_1.oids && + test_line_count = 4 expect_1.oids +' + +# bare clone "src" giving "srv.bare" for use as our server. +test_expect_success 'setup bare clone for server' ' + git clone --bare "file://$(pwd)/src" srv.bare && + git -C srv.bare config --local uploadpack.allowfilter 1 && + git -C srv.bare config --local uploadpack.allowanysha1inwant 1 +' + +# do basic partial clone from "srv.bare" +# confirm we are missing all of the known blobs. +# confirm partial clone was registered in the local config. +test_expect_success 'do partial clone 1' ' + git clone --no-checkout --filter=blob:none "file://$(pwd)/srv.bare" pc1 && + git -C pc1 rev-list HEAD --quiet --objects --missing=print \ + | awk -f print_1.awk \ + | sed "s/?//" \ + | sort >observed.oids && + test_cmp expect_1.oids observed.oids && + test "$(git -C pc1 config --local core.repositoryformatversion)" = "1" && + test "$(git -C pc1 config --local extensions.partialclone)" = "origin" && + test "$(git -C pc1 config --local core.partialclonefilter)" = "blob:none" +' + +# checkout master to force dynamic object fetch of blobs at HEAD. +test_expect_success 'verify checkout with dynamic object fetch' ' + git -C pc1 rev-list HEAD --quiet --objects --missing=print >observed && + test_line_count = 4 observed && + git -C pc1 checkout master && + git -C pc1 rev-list HEAD --quiet --objects --missing=print >observed && + test_line_count = 0 observed +' + +# create new commits in "src" repo to establish a blame history on file.1.txt +# and push to "srv.bare". +test_expect_success 'push new commits to server' ' + git -C src remote add srv "file://$(pwd)/srv.bare" && + for x in a b c d e + do + echo "Mod file.1.txt $x" >>src/file.1.txt + git -C src add file.1.txt + git -C src commit -m "mod $x" + done && + git -C src blame master -- file.1.txt >expect.blame && + git -C src push -u srv master +' + +# (partial) fetch in the partial clone repo from the promisor remote. +# verify that fetch inherited the filter-spec from the config and DOES NOT +# have the new blobs. +test_expect_success 'partial fetch inherits filter settings' ' + git -C pc1 fetch origin && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed && + test_line_count = 5 observed +' + +# force dynamic object fetch using diff. +# we should only get 1 new blob (for the file in origin/master). +test_expect_success 'verify diff causes dynamic object fetch' ' + git -C pc1 diff master..origin/master -- file.1.txt && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed && + test_line_count = 4 observed +' + +# force full dynamic object fetch of the file's history using blame. +# we should get the intermediate blobs for the file. +test_expect_success 'verify blame causes dynamic object fetch' ' + git -C pc1 blame origin/master -- file.1.txt >observed.blame && + test_cmp expect.blame observed.blame && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed && + test_line_count = 0 observed +' + +# create new commits in "src" repo to establish a history on file.2.txt +# and push to "srv.bare". +test_expect_success 'push new commits to server for file.2.txt' ' + for x in a b c d e f + do + echo "Mod file.2.txt $x" >>src/file.2.txt + git -C src add file.2.txt + git -C src commit -m "mod $x" + done && + git -C src push -u srv master +' + +# Do FULL fetch by disabling inherited filter-spec using --no-filter. +# Verify we have all the new blobs. +test_expect_success 'override inherited filter-spec using --no-filter' ' + git -C pc1 fetch --no-filter origin && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed && + test_line_count = 0 observed +' + +# create new commits in "src" repo to establish a history on file.3.txt +# and push to "srv.bare". +test_expect_success 'push new commits to server for file.3.txt' ' + for x in a b c d e f + do + echo "Mod file.3.txt $x" >>src/file.3.txt + git -C src add file.3.txt + git -C src commit -m "mod $x" + done && + git -C src push -u srv master +' + +# Do a partial fetch and then try to manually fetch the missing objects. +# This can be used as the basis of a pre-command hook to bulk fetch objects +# perhaps combined with a command in dry-run mode. +test_expect_success 'manual prefetch of missing objects' ' + git -C pc1 fetch --filter=blob:none origin && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print \ + | awk -f print_1.awk \ + | sed "s/?//" \ + | sort >observed.oids && + test_line_count = 6 observed.oids && + git -C pc1 fetch-pack --stdin "file://$(pwd)/srv.bare" <observed.oids && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print \ + | awk -f print_1.awk \ + | sed "s/?//" \ + | sort >observed.oids && + test_line_count = 0 observed.oids +' + +test_done diff --git a/transport-helper.c b/transport-helper.c index 5080150231..3f380d87d9 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -672,6 +672,11 @@ static int fetch(struct transport *transport, if (data->transport_options.update_shallow) set_helper_option(transport, "update-shallow", "true"); + if (data->transport_options.filter_options.choice) + set_helper_option( + transport, "filter", + data->transport_options.filter_options.filter_spec); + if (data->fetch) return fetch_with_fetch(transport, nr_heads, to_fetch); diff --git a/transport.c b/transport.c index e82db773fd..00d48b5b56 100644 --- a/transport.c +++ b/transport.c @@ -167,6 +167,9 @@ static int set_git_option(struct git_transport_options *opts, } else if (!strcmp(name, TRANS_OPT_NO_DEPENDENTS)) { opts->no_dependents = !!value; return 0; + } else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) { + parse_list_objects_filter(&opts->filter_options, value); + return 0; } return 1; } @@ -237,6 +240,7 @@ static int fetch_refs_via_pack(struct transport *transport, args.update_shallow = data->options.update_shallow; args.from_promisor = data->options.from_promisor; args.no_dependents = data->options.no_dependents; + args.filter_options = data->options.filter_options; if (!data->got_remote_heads) { connect_setup(transport, 0); diff --git a/transport.h b/transport.h index 8c3430a5b9..3c68d73b21 100644 --- a/transport.h +++ b/transport.h @@ -4,6 +4,7 @@ #include "cache.h" #include "run-command.h" #include "remote.h" +#include "list-objects-filter-options.h" struct string_list; @@ -23,6 +24,7 @@ struct git_transport_options { const char *uploadpack; const char *receivepack; struct push_cas_option *cas; + struct list_objects_filter_options filter_options; }; enum transport_family { @@ -170,6 +172,9 @@ void transport_check_allowed(const char *type); */ #define TRANS_OPT_NO_DEPENDENTS "no-dependents" +/* Filter objects for partial clone and fetch */ +#define TRANS_OPT_LIST_OBJECTS_FILTER "filter" + /** * Returns 0 if the option was used, non-zero otherwise. Prints a * message to stderr if the option is not used. diff --git a/unpack-trees.c b/unpack-trees.c index 96c3327f19..e6a15bbe44 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -15,6 +15,7 @@ #include "submodule.h" #include "submodule-config.h" #include "fsmonitor.h" +#include "fetch-object.h" /* * Error messages expected by scripts out of plumbing commands such as @@ -370,6 +371,27 @@ static int check_updates(struct unpack_trees_options *o) load_gitmodules_file(index, &state); enable_delayed_checkout(&state); + if (repository_format_partial_clone && o->update && !o->dry_run) { + /* + * Prefetch the objects that are to be checked out in the loop + * below. + */ + struct oid_array to_fetch = OID_ARRAY_INIT; + int fetch_if_missing_store = fetch_if_missing; + fetch_if_missing = 0; + for (i = 0; i < index->cache_nr; i++) { + struct cache_entry *ce = index->cache[i]; + if ((ce->ce_flags & CE_UPDATE) && + !S_ISGITLINK(ce->ce_mode)) { + if (!has_object_file(&ce->oid)) + oid_array_append(&to_fetch, &ce->oid); + } + } + if (to_fetch.nr) + fetch_objects(repository_format_partial_clone, + &to_fetch); + fetch_if_missing = fetch_if_missing_store; + } for (i = 0; i < index->cache_nr; i++) { struct cache_entry *ce = index->cache[i]; diff --git a/upload-pack.c b/upload-pack.c index d5de18127c..f51b6cfca9 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -10,6 +10,8 @@ #include "diff.h" #include "revision.h" #include "list-objects.h" +#include "list-objects-filter.h" +#include "list-objects-filter-options.h" #include "run-command.h" #include "connect.h" #include "sigchain.h" @@ -19,6 +21,7 @@ #include "argv-array.h" #include "prio-queue.h" #include "protocol.h" +#include "quote.h" static const char * const upload_pack_usage[] = { N_("git upload-pack [<options>] <dir>"), @@ -65,6 +68,10 @@ static int advertise_refs; static int stateless_rpc; static const char *pack_objects_hook; +static int filter_capability_requested; +static int filter_advertise; +static struct list_objects_filter_options filter_options; + static void reset_timeout(void) { alarm(timeout); @@ -132,6 +139,17 @@ static void create_pack_file(void) argv_array_push(&pack_objects.args, "--delta-base-offset"); if (use_include_tag) argv_array_push(&pack_objects.args, "--include-tag"); + if (filter_options.filter_spec) { + if (pack_objects.use_shell) { + struct strbuf buf = STRBUF_INIT; + sq_quote_buf(&buf, filter_options.filter_spec); + argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf); + strbuf_release(&buf); + } else { + argv_array_pushf(&pack_objects.args, "--filter=%s", + filter_options.filter_spec); + } + } pack_objects.in = -1; pack_objects.out = -1; @@ -795,6 +813,12 @@ static void receive_needs(void) deepen_rev_list = 1; continue; } + if (skip_prefix(line, "filter ", &arg)) { + if (!filter_capability_requested) + die("git upload-pack: filtering capability not negotiated"); + parse_list_objects_filter(&filter_options, arg); + continue; + } if (!skip_prefix(line, "want ", &arg) || get_oid_hex(arg, &oid_buf)) die("git upload-pack: protocol error, " @@ -822,6 +846,8 @@ static void receive_needs(void) no_progress = 1; if (parse_feature_request(features, "include-tag")) use_include_tag = 1; + if (parse_feature_request(features, "filter")) + filter_capability_requested = 1; o = parse_object(&oid_buf); if (!o) { @@ -941,7 +967,7 @@ static int send_ref(const char *refname, const struct object_id *oid, struct strbuf symref_info = STRBUF_INIT; format_symref_info(&symref_info, cb_data); - packet_write_fmt(1, "%s %s%c%s%s%s%s%s agent=%s\n", + packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s agent=%s\n", oid_to_hex(oid), refname_nons, 0, capabilities, (allow_unadvertised_object_request & ALLOW_TIP_SHA1) ? @@ -950,6 +976,7 @@ static int send_ref(const char *refname, const struct object_id *oid, " allow-reachable-sha1-in-want" : "", stateless_rpc ? " no-done" : "", symref_info.buf, + filter_advertise ? " filter" : "", git_user_agent_sanitized()); strbuf_release(&symref_info); } else { @@ -1028,6 +1055,8 @@ static int upload_pack_config(const char *var, const char *value, void *unused) } else if (current_config_scope() != CONFIG_SCOPE_REPO) { if (!strcmp("uploadpack.packobjectshook", var)) return git_config_string(&pack_objects_hook, var, value); + } else if (!strcmp("uploadpack.allowfilter", var)) { + filter_advertise = git_config_bool(var, value); } return parse_hide_refs_config(var, value, "uploadpack"); } |