#!/usr/bin/perl # Copyright (C) 2018 The Qt Company Ltd. # Contact: http://www.qt.io/licensing/ # # You may use this file under the terms of the 3-clause BSD license. # See the file LICENSE from this package for details. # use v5.14; use strict; use warnings; no warnings qw(io); our ($script, $script_path); BEGIN { use Cwd qw(abs_path); if ($^O eq "msys") { $0 =~ s,\\,/,g; $0 =~ s,^(.):/,/$1/,g; } $script_path = $script = abs_path($0); $script_path =~ s,/[^/]+$,,; unshift @INC, $script_path; } use git_gpush; use JSON; sub usage() { print << "EOM"; Usage: git ggc [options] This program cleans up state from git-gpush that became stale due to local Changes disappearing (most likely being upstreamed). You should use git-gpull instead of 'git pull --rebase', to make sure that git-ggc is called at regular intervals (default 30 days, configurable with gpush.gcInterval). The only recognized options are --help (-h, -?), --dry-run (-n), --verbose (-v), --quiet (-q), and --debug with the usual meanings (see git-gpush help). Copyright: Copyright (C) 2018 The Qt Company Ltd. Contact: http://www.qt.io/licensing/ License: You may use this file under the terms of the 3-clause BSD license. EOM } sub parse_arguments(@) { while (scalar @_) { my $arg = shift @_; if ($arg eq "-v" || $arg eq "--verbose") { $verbose = 1; } elsif ($arg eq "-q" || $arg eq "--quiet") { $quiet = 1; } elsif ($arg eq "--debug") { $debug = 1; $verbose = 1; } elsif ($arg eq "-n" || $arg eq "--dry-run") { $dry_run = 1; } elsif ($arg eq "-?" || $arg eq "--?" || $arg eq "-h" || $arg eq "--help") { usage(); exit; } else { fail("Unrecognized command line argument '$arg'.\n"); } } fail("--quiet and --verbose/--debug are mutually exclusive.\n") if ($quiet && $verbose); } my %upstream_remotes; sub get_gerrit_config() { $remote = git_config('gpush.remote'); # If a remote is specified, use exactly that one. if (!$remote) { # Otherwise try 'gerrit', and fall back to the origin. $remote = 'gerrit'; if (!git_config('remote.'.$remote.'.url')) { my @remotes = keys %upstream_remotes; if (@remotes == 1) { $remote = shift @remotes; } else { $remote = git_config('gpush.upstream', 'origin'); werr("Notice: Upstream remote is ambiguous; defaulting to '$remote'.\n") if (!$quiet); } } } set_gerrit_config($remote); } sub visit_revs_pull(@) { my %changeids; my $log = open_cmd_pipe(USE_STDIN, 'git', 'log', '-z', '--pretty=%H%x00%B', '--stdin', @upstream_excludes); write_process($log, map { "$_\n" } @_); while (read_fields($log, my ($id, $message))) { $message =~ /^(.*)$/m; my $subject = $1; # Gerrit uses the last Change-Id if multiple are present. my @cids = ($message =~ /^Change-Id: (.+)$/mg); next if (!@cids); my $changeid = $cids[-1]; print "-- $id: $subject\n" if ($debug); $changeids{$changeid} = 1; } close_process($log); return \%changeids; } # The garbage-collecting workhorse. # Note that we entirely ignore branches. A Change is assumed to be still # relevant if it exists on any local branch or exists in a non-terminal # state on Gerrit for any branch. sub perform_gc() { print "Loading refs ...\n" if ($debug); my %pushed; # { sequence-number => sha1 } my %zaps; # { sequence-number => [ state-ref, ... ] } my %fzaps; # { gerrit-number => [ patchset-number => fetched-ref, ... ] } my @fsha1s; # [ fetched-sha1, ... ] my @local_refs; # These define which Changes are still active. my %remote_refs; # Used for quickly validating upstreams. my $branches = open_cmd_pipe(0, "git", "for-each-ref", "--format=%(objectname) %(refname)", "refs/heads/", "refs/remotes/", "refs/gpush/"); while (read_process($branches)) { if (s,^.{40} refs/remotes/,,) { $remote_refs{$_} = 1; } elsif (s,^.{40} refs/heads/,,) { push @local_refs, $_; } elsif (s,^(.{40}) refs/gpush/,,) { my $sha1 = $1; if (m,^i(\d+)_(.*)$,) { push @{$zaps{$1}}, $_; $pushed{$1} = $sha1 if ($2 eq "pushed"); } elsif (m,^g(\d+)_(\d+)$,) { $fzaps{$1}[$2] = $_; push @fsha1s, $sha1; } elsif (!m,^state(-new)?$,) { print "Keeping unrecognized '$_'.\n" if ($debug); } } } close_process($branches); my %upstream_refs; foreach my $ref (@local_refs) { my ($uprm, $upbr) = (git_config("branch.$ref.remote"), git_config("branch.$ref.merge")); # Local WIP branches typically have no upstream. next if (!$uprm || !$upbr); $upbr =~ s,^refs/heads/,,; my $upref = $uprm.'/'.$upbr; # The upstream branch may have been pruned. Don't error out when this happens. next if (!defined($remote_refs{$upref})); # Note that multiple branches may have the same upstream. $upstream_refs{$upref} = 1; # Record which remotes are used as upstreams. $upstream_remotes{$uprm} = 1; } @upstream_excludes = map { '^refs/remotes/'.$_ } keys %upstream_refs; # Changes which are on local branches are excluded from pruning. Obviously. print "Visiting local branches ...\n" if ($debug); my $local_changes = visit_revs_pull('HEAD', (map { 'refs/heads/'.$_ } @local_refs)); print "Collecting locally present Changes ...\n" if ($debug); my %zap_ids; # { change-id => 1 } foreach my $key (sort keys %change_by_key) { my $change = $change_by_key{$key}; my $changeid = $$change{id}; if (defined($$local_changes{$changeid})) { print "Keeping $key ($changeid): exists locally.\n" if ($verbose); delete $zaps{$key}; } else { $zap_ids{$changeid} = 1; } } my %change2active; if (%zap_ids || %fzaps) { print "Querying Gerrit for prunable Changes ...\n" if ($debug); get_gerrit_config(); my $ginfos = query_gerrit_only([ map { "change:".$_ } (keys %zap_ids, keys %fzaps) ]); foreach my $ginfo (@$ginfos) { my ($key, $changeid, $status, $branch) = ($$ginfo{key}, $$ginfo{id}, $$ginfo{status}, $$ginfo{branch}); my $active = (($status ne 'MERGED') && ($status ne 'ABANDONED')); print "$changeid is ".($active ? "" : "NOT ")."active on $branch.\n" if ($debug); $change2active{$changeid} ||= $active; } } print "Pruning stale Changes ...\n" if ($debug); foreach my $key (sort keys %change_by_key) { my $change = $change_by_key{$key}; # Even Changes which are absent from the local branch are pruned # only if they are in a terminal state. Otherwise, there is reason # to believe that they might be used again at a later point. my $changeid = $$change{id}; if (!defined($zap_ids{$changeid})) { # Exists locally; already handled above. } elsif ($change2active{$changeid}) { print "Keeping $key ($changeid): active on Gerrit.\n" if ($verbose); $$local_changes{$changeid} = 1; delete $zaps{$key}; } else { print "Pruning $key ($changeid).\n" if ($verbose); $$change{garbage} = 1; } } if ($verbose) { foreach my $key (sort keys %zaps) { # This is unlikely to ever trigger, as the regular load_refs() # cleans up anyway. print "Pruning $key (unrecognized Change).\n" if (!$change_by_key{$key}); } } print "Visiting fetched PatchSets ...\n" if ($debug); visit_local_commits(\@fsha1s); # Keep fetched PatchSets that correspond with kept Changes, but only the ones # that are not older than the pushed revision of the Change. Older PatchSets # are accessed only when explicitly picking an old state of the series, which # is very rare, so it is not worth keeping these refs cached. my (@visit_sha1s, %seen_sha1s, %chop_gs); foreach my $key (sort keys %fzaps) { my $ginfo = $gerrit_info_by_key{$key}; if (!defined($ginfo)) { $chop_gs{$key} = -1; next; } my $changeid = $$ginfo{id}; if (!defined($$local_changes{$changeid})) { # Might be still referenced recursively. print "Might prune fetched $key ($changeid): corresponds with no kept Change.\n" if ($debug); next; } my $chgs = $changes_by_id{$changeid}; if (!$chgs) { # Corresponding commits exist locally, but we never assigned them to Changes. print "Might prune fetched $key ($changeid): corresponds with no tracked Change.\n" if ($debug); next; } my $revs = $$ginfo{revs}; my $mps = $$revs[-1]{ps}; # Subsequent sync-up attempts will fetch the current (and previous, # for MERGED Changes) PatchSet, so keep that in every case. my $kps = ($mps > 1 && ($$ginfo{status} eq "MERGED")) ? $mps - 1 : $mps; foreach my $change (@$chgs) { next if ($$change{garbage}); my $sha1 = $pushed{$$change{key}}; # This Change was not gpushed/gpicked, and a possible sync-up failed. next if (!defined($sha1)); my $rev = $$ginfo{rev_by_id}{$sha1}; # A Change on a different branch owns this commit. next if (!$rev); my $ps = $$rev{ps}; $kps = $ps if ($ps < $kps); } # Note that this may include SHA1s that were not fetched. push @visit_sha1s, map { $$_{id} } grep { $$_{ps} >= $kps } @$revs; $chop_gs{$key} = $kps; } # Keep all fetched PatchSets that are ancestors of other kept # PatchSets, recursively. while (my $sha1 = shift @visit_sha1s) { while (!defined($seen_sha1s{$sha1})) { $seen_sha1s{$sha1} = 1; my $commit = $commit_by_id{$sha1}; last if (!$commit); # Hit bottom of series, or initial commit was not fetched. # If the Change disappeared, then its fetched refs will be nuked anyway, # and ancestors which are not in the initial set won't be accessed anymore. my $ginfo = $gerrit_info_by_sha1{$sha1}; last if (!$ginfo); # Ancestry traversal may lead us to older PatchSets than those in the # initial set, but assemble_series()' callbacks would upgrade from these # anyway, so we can just stop here. last if (defined($$local_changes{$$ginfo{id}})); my $ps = $$ginfo{rev_by_id}{$sha1}; last if (!defined($ps)); # Shouldn't happen. push @visit_sha1s, map { $$_{id} } grep { $$_{ps} >= $ps } @{$$ginfo{revs}}; my $key = $$ginfo{key}; my $ops = $chop_gs{$key}; $chop_gs{$key} = $ps if (!defined($ops) || $ops > $ps); $sha1 = get_1st_parent($commit); } } foreach my $key (sort keys %fzaps) { my $fzap = $fzaps{$key}; my $kps = $chop_gs{$key}; if (!defined($kps)) { print "Pruning fetched $key (unused).\n" if ($verbose); } elsif ($kps < 0) { print "Pruning fetched $key (disappeared from Gerrit).\n" if ($verbose); } else { my $mps = $#$fzap; print "Keeping fetched $key starting at $kps/$mps.\n" if ($verbose); splice @$fzap, $kps; } $fzaps{$key} = [ grep { defined($_) } @$fzap ]; } # We completely bypass the regular state handling when it comes # to refs, as we want to also prune refs in our namespace that # don't correspond with known state keys. update_refs(DRY_RUN, [ map { "delete refs/gpush/$_\n" } map { @$_ } (values %zaps, values %fzaps) ]); } parse_arguments(@ARGV); goto_gitdir(); load_config(); load_state_file(); # No load_state(), and thus no load_refs(). perform_gc(); $last_gc = time(); save_state($dry_run);