# Copyright (C) 2017 The Qt Company Ltd.
# Copyright (C) 2019 Oswald Buddenhagen
# Contact: http://www.qt.io/licensing/
#
# You may use this file under the terms of the 3-clause BSD license.
# See the file LICENSE from this package for details.
#

package git_gpush;

use v5.14;
use strict;
use warnings;
no warnings qw(io);

use Carp;
$SIG{__WARN__} = \&Carp::cluck;
$SIG{__DIE__} = \&Carp::confess;

use List::Util qw(min max);
use File::Spec;
use File::Temp qw(mktemp);
use IPC::Open3 qw(open3);
use Symbol qw(gensym);
use Term::ReadKey;
use Text::Wrap;
use JSON;

our @_imported;
BEGIN {
    no strict 'refs';
    @_imported = keys %{__PACKAGE__.'::'};
}

##################
# shared options #
##################

our $debug = 0;
our $verbose = 0;
our $quiet = 0;
our $dry_run = 0;

##################
# message output #
##################

$| = 1;  # OUTPUT_AUTOFLUSH; in case we're redirecting debug output.

my ($tty_width, undef, undef, undef) = (-t STDOUT) ? GetTerminalSize() : (80);

# This is for messages which even after wrapping will rarely/barely
# exceed a single line, so it does not matter that the output possibly
# violates the "no more than 80 columns for flowed text" guideline.
sub _wrap_wide($)
{
    $Text::Wrap::columns = $tty_width + 1;
    return wrap("", "", $_[0]);
}

sub wout($)
{
    print _wrap_wide($_[0]);
}

sub werr($)
{
    print STDERR _wrap_wide($_[0]);
}

sub wfail($)
{
    werr($_[0]);
    exit(1);
}

# This is for bigger amounts of text.
sub _wrap_narrow($)
{
    $Text::Wrap::columns = min($tty_width, 80) + 1;
    return wrap("", "", $_[0]);
}

sub nwout($)
{
    print _wrap_narrow($_[0]);
}

sub nwerr($)
{
    print STDERR _wrap_narrow($_[0]);
}

sub nwfail($)
{
    nwerr($_[0]);
    exit(1);
}

sub fail($)
{
    print STDERR $_[0];
    exit(1);
}

#######################
# subprocess handling #
#######################

use constant {
    NUL_STDIN => 0,
    USE_STDIN => 1,
    FWD_STDIN => 2,
    NUL_STDOUT => 0,
    USE_STDOUT => 4,
    FWD_STDOUT => 8,
    NUL_STDERR => 0,
    USE_STDERR => 16,
    FWD_STDERR => 32,
    FWD_OUTPUT => 40,
    SILENT_STDIN => 64,  # Suppress debug output for stdin
    SOFT_FAIL => 256,    # A non-zero exit from the process is not fatal
    DRY_RUN => 512       # Don't actually run the command if $dry_run is true
};

sub _format_cmd(@)
{
    return join(' ', map { /\s/ ? '"' . $_ . '"' : $_ } @_);
}

sub open_process($@)
{
    my ($flags, @cmd) = @_;
    my %process;

    $flags &= ~DRY_RUN if (!$dry_run);
    $process{flags} = $flags;
    if ($flags & DRY_RUN) {
        print "+ "._format_cmd(@cmd)." [DRY]\n" if ($debug);
        return \%process;
    }
    my $cmd = _format_cmd(@cmd);
    $process{cmd} = $cmd;
    my ($in, $out, $err);
    if ($flags & USE_STDIN) {
        $in = \$process{stdin};
    } elsif ($flags & FWD_STDIN) {
        # open3() closes the handle input is redirected from, which is
        # most definitely not what we want to happen to stdin.
        no warnings 'once';  # The parser doesn't see the string reference.
        open INPUT, '<&STDIN' or wfail("Failed to dup() stdin: $!\n");
        $in = \'<&INPUT';
    } else {
        $in = \'<&NUL';
    }
    if ($flags & USE_STDOUT) {
        $out = \$process{stdout};
    } elsif ($flags & FWD_STDOUT) {
        $out = \'>&STDOUT';
    } else {
        $out = \'>&NUL';
    }
    if ($flags & USE_STDERR) {
        $err = \$process{stderr};
        $$err = gensym();
    } elsif ($flags & FWD_STDERR) {
        $err = \'>&STDERR';
    } else {
        $err = \'>&NUL';
    }
    print "+ $cmd\n" if ($debug);
    open(NUL, '>'.File::Spec->devnull()) or wfail("Failed to open bitbucket: $!\n");
    eval { $process{pid} = open3($$in, $$out, $$err, @cmd); };
    wfail("Failed to run \"$cmd[0]\": $!\n") if ($@);
    close(NUL);
    return \%process;
}

sub close_process($)
{
    my ($process) = @_;

    if ($$process{flags} & DRY_RUN) {
        $? = 0;
        return;
    }
    my $cmd = $$process{cmd};
    if ($$process{stdout}) {
        close($$process{stdout}) or wfail("Failed to close read pipe of '$cmd': $!\n");
    }
    if ($$process{stderr}) {
        close($$process{stderr}) or wfail("Failed to close error read pipe of '$cmd': $!\n");
    }
    waitpid($$process{pid}, 0) or wfail("Failed to wait for '$cmd': $!\n");
    if ($? & 128) {
        wfail("'$cmd' crashed with signal ".($? & 127).".\n") if ($? != 141); # allow SIGPIPE
        $? = 0;
    } elsif ($? && !($$process{flags} & SOFT_FAIL)) {
        exit($? >> 8);
    }
}

# Write any number of lines to the process' stdin.
# The input is expected to already contain trailing newlines.
# This function must be called exactly once iff USE_STDIN is used.
# Note that this will deadlock with USE_STDOUT if the process outputs
# too much before all input is written.
sub write_process($@)
{
    my ($process, @input) = @_;

    my $stdin = $$process{stdin};
    my $silent = ($$process{flags} & SILENT_STDIN);
    my $dry = ($$process{flags} & DRY_RUN);
    local $SIG{PIPE} = "IGNORE";
    foreach (@input) {
        print "> $_" if ($debug && !$silent);
        print $stdin $_ if (!$dry);
    }
    $dry or close($stdin) or wfail("Failed to close write pipe of '$$process{cmd}': $!\n");
}

# Read a line from the process' stdout.
sub read_process($)
{
    my ($process) = @_;

    my $fh = $$process{stdout};
    $_ = <$fh>;
    if (defined($_)) {
        chomp;
        print "- $_\n" if ($debug);
    }
    return $_;
}

# Read all lines from the process' stdout.
sub read_process_all($)
{
    my ($process) = @_;

    my $fh = $$process{stdout};
    my @lines = <$fh>;
    chomp @lines;
    printf("Read %d lines.\n", int(@lines)) if ($debug);
    return \@lines;
}

# Read any number of null-terminated fields from the process' stdout.
sub read_fields($@)
{
    my $process = shift;
    my $fh = $$process{stdout};
    return 0 if (eof($fh));
    local $/ = "\0";
    for (@_) { chop($_ = <$fh>); }
    return 1;
}

# Read all of a process' output channel as a single string.
sub get_process_output($$)
{
    my ($process, $which) = @_;

    local $/;
    my $fh = $$process{$which};
    return <$fh>;
}

# The equivalent of system().
sub run_process($@)
{
    my ($flags, @cmd) = @_;

    close_process(open_process($flags, @cmd));
}

# The equivalent of popen("r").
sub open_cmd_pipe($@)
{
    my ($flags, @cmd) = @_;

    return open_process(USE_STDOUT | FWD_STDERR | $flags, @cmd);
}

# Run the specified command and try to read exactly one line from its stdout.
sub read_cmd_line($@)
{
    my ($flags, @cmd) = @_;

    my $proc = open_cmd_pipe($flags, @cmd);
    read_process($proc);
    close_process($proc);
    return $_;
}

##############
# git basics #
##############

our $gitdir;  # $GIT_DIR
our $gitcommondir;  # $GIT_COMMON_DIR

sub goto_gitdir()
{
    my $cdup = read_cmd_line(0, 'git', 'rev-parse', '--show-cdup');
    fail("fatal: This operation must be run in a work tree\n") if (!defined($cdup));
    chdir($cdup) unless ($cdup eq "");
    $gitdir = read_cmd_line(0, 'git', 'rev-parse', '--git-dir');
    $gitcommondir = read_cmd_line(0, 'git', 'rev-parse', '--no-flags', '--git-common-dir');
}

# `git config --list` output, plus contents of .git-gpush-aliases' [config]
our %gitconfig;  # { key => [ value, ... ] }
# Pre-processed url.*.insteadOf mappings.
my @url_rewrites;
my @url_rewrites_push;

sub _load_git_config()
{
    # Read all git configuration at once, as that's faster than repeated
    # git invocations, especially under Windows.
    my $cfg = open_cmd_pipe(0, 'git', 'config', '-l', '-z');
    while (read_fields($cfg, my $entry)) {
        $entry =~ /^([^\n]+)\n(.*)$/;
        my ($key, $value) = ($1, $2);
        push @{$gitconfig{$key}}, $value;

        if ($key =~ /^url\.(.*)\.insteadof$/) {
            push @url_rewrites, [ $value, $1 ];
        } elsif ($key =~ /^url\.(.*)\.pushinsteadof$/) {
            push @url_rewrites_push, [ $value, $1 ];
        }
    }
    close_process($cfg);

    # Sort backwards, so longest match is hit first.
    @url_rewrites = sort { $$b[0] cmp $$a[0] } @url_rewrites;
    @url_rewrites_push = sort { $$b[0] cmp $$a[0] } @url_rewrites_push;
}

sub git_configs($)
{
    my ($key) = @_;
    my $ref = $gitconfig{$key};
    return defined($ref) ? @$ref : ();
}

sub git_config($;$)
{
    my ($key, $dflt) = @_;
    my @cfg = git_configs($key);
    return scalar(@cfg) ? $cfg[-1] : $dflt;
}

sub _rewrite_git_push_url($)
{
    my ($url) = @_;

    foreach my $ent (@url_rewrites_push, @url_rewrites) {
        my ($pfx, $sub) = @$ent;
        return $url if ($url =~ s/^\Q$pfx\E/$sub/);
    }
    return $url;
}

our $_indexfile;
END { unlink($_indexfile) if ($_indexfile); }

sub with_local_git_index($@)
{
    my ($callback, @args) = @_;

    $_indexfile = mktemp(($ENV{TMPDIR} or "/tmp") . "/git-gpush.XXXXXX");
    local $ENV{GIT_INDEX_FILE} = $_indexfile;

    local ($SIG{HUP}, $SIG{INT}, $SIG{QUIT}, $SIG{TERM});
    $SIG{HUP} = $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = sub { exit; };

    my $ret = $callback->(@args);

    unlink($_indexfile);
    $_indexfile = undef;

    return $ret;
}

#################
# configuration #
#################

sub _file_contents($)
{
    my ($filename) = @_;

    my @contents = "";
    my $fh;
    if (-e $filename && open($fh, "< $filename")) {
        @contents = <$fh>;
        close $fh;
    }
    return @contents;
}

our %aliases;  # { alias => login }

sub load_config()
{
    # Read config from .git-gpush-aliases file
    my $in_aliases = 1;
    foreach my $line (_file_contents($::script_path."/.git-gpush-aliases")) {
        chomp $line;
        $line =~ s,(#|//).*$,,;  # Remove any comments
        if ($line =~ /^\[([^]]+)\]/) {
            if ($1 eq "aliases") {
                $in_aliases = 1;
            } elsif ($1 eq "config") {
                $in_aliases = 0;
            } else {
                fail("Unrecognized section '$1' in alias file.\n");
            }
        } elsif ($line =~ /^\s*([^ =]+)\s*=\s*(.*?)\s*$/) {  # Capture the value
            if ($in_aliases) {
                for my $alias (split(/,/, $1)) {
                    $aliases{$alias} = $2;
                }
            } else {
                push @{$gitconfig{"gpush.$1"}}, $2;
            }
        }
    }

    _load_git_config();

    foreach (keys %gitconfig) {
        if (/^gpush\.alias\.(.*)$/) {
            $aliases{$1} = git_config($_);
        }
    }
}

######################
# branches & remotes #
######################

# The name of the local branch we're working with (not necessarily the
# current branch). May be undef.
our $local_branch;
# The tips of the local and remote branches.
our %local_refs;  # { branch => sha1 }
our %remote_refs;  # { remote => { branch => sha1 } }

# The name of the local branch's upstream remote, or a configurable fallback.
our $upstream_remote;
# The name of the Gerrit remote, possibly identical to the upstream remote.
our $remote;

# The tips of the remote branches which are excluded from commit listings.
# The commits are already prepared for git-rev-list (prefixed with '^').
our @upstream_excludes;

sub update_excludes()
{
    my %heads;
    my $urefs = $remote_refs{$upstream_remote};
    if ($urefs) {
        $heads{$_} = 1 foreach (values %$urefs);
    }
    my $grefs = $remote_refs{$remote};
    if ($grefs) {
        $heads{$_} = 1 foreach (values %$grefs);
    }
    @upstream_excludes = map { "^$_" } keys %heads;
}

sub find_gerrit_remote($)
{
    my ($remotes) = @_;

    if (defined($local_branch)) {
        # If a branch-specific Gerrit remote is configured, use exactly that one.
        $remote = git_config("branch.$local_branch.gpushremote");
        if (defined($remote)) {
            fail("Remote '$remote' configured in branch.$local_branch.gpushremote is invalid.\n")
                if (!defined($$remotes{$remote}));
            return;
        }
    }
    # Otherwise, if a global Gerrit remote is configured, use exactly that one.
    $remote = git_config("gpush.remote");
    if (defined($remote)) {
        fail("Remote '$remote' configured in gpush.remote is invalid.\n")
            if (!defined($$remotes{$remote}));
        return;
    }
    # Otherwise use 'gerrit' if present.
    if (defined($$remotes{'gerrit'})) {
        $remote = 'gerrit';
        return;
    }
    if (defined($local_branch)) {
        # Otherwise, if a branch-specific push remote is configured, use that one.
        $remote = git_config("branch.$local_branch.pushremote");
        if (defined($remote)) {
            fail("Remote '$remote' configured in branch.$local_branch.pushremote is invalid.\n")
                if (!defined($$remotes{$remote}));
            return;
        }
    }
    # Otherwise, if a global push remote is configured, use that one.
    $remote = git_config("remote.pushdefault");
    if (defined($remote)) {
        fail("Remote '$remote' configured in remote.pushdefault is invalid.\n")
            if (!defined($$remotes{$remote}));
        return;
    }
    # Otherwise fall back to the upstream remote.
    $remote = $upstream_remote;
}

sub setup_remotes($)
{
    my ($source) = @_;

    my %remotes;
    for my $ky (keys %gitconfig) {
        if ($ky =~ /^remote\.(.*)\.url$/) {
            $remotes{$1} = 1;
        }
    }
    fail("No remotes configured.\n")
        if (!%remotes);
    if (defined($local_branch)) {
        $upstream_remote = git_config("branch.$local_branch.remote");
        fail("$source has invalid upstream remote '$upstream_remote'.\n")
            if (defined($upstream_remote) && !defined($remotes{$upstream_remote}));
    }
    if (!defined($upstream_remote)) {
        $upstream_remote = git_config('gpush.upstream');
        if (defined($upstream_remote)) {
            fail("Remote '$upstream_remote' configured in gpush.upstream is invalid.\n")
                if (!defined($remotes{$upstream_remote}));
        } else {
            $upstream_remote = 'origin';
            if (!defined($remotes{$upstream_remote})) {
                my @remotes_arr = keys %remotes;
                fail("$source has no upstream remote, and cannot guess one.\n")
                    if (@remotes_arr != 1);
                $upstream_remote = $remotes_arr[0];
            }
        }
        wout("Notice: $source has no upstream remote; defaulting to $upstream_remote.\n")
            if (!$quiet);
    }
    if (defined($remote)) {
        fail("Specified Gerrit remote '$remote' is invalid.\n")
            if (!defined($remotes{$remote}));
    } else {
        find_gerrit_remote(\%remotes);
    }
    update_excludes();
}

###################
# commit metadata #
###################

our %commit_by_id;

sub init_commit($)
{
    my ($commit) = @_;

    my $id = $$commit{id};
    # Duplicating is bad, because it would discard members which are not
    # necessarily re-instantiated. Also, it's an indicator of inefficiency.
    die("Commit $id already instantiated")
        if ($commit_by_id{$id});
    $commit_by_id{$id} = $commit;
}

sub changes_from_commits($)
{
    my ($commits) = @_;

    return [ map { $$_{change} } @$commits ];
}

sub get_1st_commit($)
{
    my ($parents) = @_;

    return @$parents ? $$parents[0] : 'ROOT';
}

sub get_1st_parent($)
{
    my ($commit) = @_;

    my $parents = $$commit{parents};
    return @$parents ? $$parents[0] : 'ROOT';
}

sub get_1st_parent_tree($)
{
    my ($commit) = @_;

    # Upstream commits are not visited, so we get no tree id. However,
    # as this code aims at series which were not rebased, using the base
    # commit itself will work just as well for the series' first commit.
    my $parents = $$commit{parents};
    my $parent_id = @$parents ? $$parents[0] : 'ROOT';
    my $parent = $commit_by_id{$parent_id};
    return $parent ? $$parent{tree} : $parent_id;
}

sub get_more_parents($)
{
    my ($commit) = @_;

    my $parents = $$commit{parents};
    return @$parents > 1 ? "@$parents[1..$#$parents]" : "";
}

# Enumerate commits from the specified tip down to the merge base
# with upstream. Merges are handled in --first-parent mode.
sub get_commits_free($)
{
    my ($tip) = @_;

    my @commits;
    while (1) {
        my $commit = $commit_by_id{$tip};
        last if (!$commit);
        unshift @commits, $commit;
        $tip = get_1st_parent($commit);
    }
    return \@commits;
}

# Enumerate the specified number of commits from the specified tip.
# Merges are treated in --first-parent mode.
sub get_commits_count($$$)
{
    my ($tip, $count, $tip_raw) = @_;

    my @commits;
    for (1 .. $count) {
        my $commit = $commit_by_id{$tip};
        wfail("Range $tip_raw:$count extends beyond the local branch.\n")
            if (!$commit);
        unshift @commits, $commit;
        $tip = get_1st_parent($commit);
    }
    return \@commits;
}

# Enumerate commits from the specified tip down to the specified base.
# Merges are treated in --first-parent mode.
sub get_commits_base($$$$)
{
    my ($base, $tip, $base_raw, $tip_raw) = @_;

    # An empty base is understood to mean the merge base with upstream.
    # This avoids the need to figure out the actual commit, which is
    # particularly useful in the presence of multiple bases due to merges.
    return get_commits_free($tip) if (!length($base));

    my @commits;
    while ($tip ne $base) {
        my $commit = $commit_by_id{$tip};
        wfail("$base_raw is not an ancestor of $tip_raw within the local branch.\n")
            if (!$commit);
        unshift @commits, $commit;
        $tip = get_1st_parent($commit);
    }
    return \@commits;
}

########################
# gerrit query results #
########################

use constant {
    RVRTYPE_NONE => 0,
    RVRTYPE_REV => 1,
    RVRTYPE_CC => 2,
    RVRTYPE_ANY => 3  # Because Gerrit won't tell via SSH.
};

our %gerrit_info_by_key;
our %gerrit_info_by_sha1;
our %gerrit_infos_by_id;

##################
# state handling #
##################

# This is built upon Change objects with these attributes:
# - key: Sequence number. This runs independently from Gerrit, so
#   we can enumerate Changes which were never pushed, and to make
#   it possible to re-associate local Changes with remote ones.
# - grp: Group (series) sequence number.
# - id: Gerrit Change-Id.
# - src: Local branch name, or "-" if Change is on a detached HEAD.
# - tgt: Target branch name.
# - topic: Gerrit topic. Persisted only as a cache.
# - pushed: SHA1 of the commit this Change was pushed as last time
#   from this repository.
# - base: SHA1 of commit on top of which the entire series which this
#   Change is part of was pushed.
# - orig: SHA1 of the _local_ commit 'pushed' was derived from.
# - nbase/ntgt/ntopic: Non-committed values of the respective
#   attributes, used by --group mode.
# - exclude: Flag indicating whether the Change is excluded from
#   push --all mode.
# - hide: Flag indicating whether the Change is excluded from all
#   pushes.

my $next_key = 10000;
# All known Gerrit Changes for the current repository.
our %change_by_key;  # { sequence-number => change-object }
# Same, indexed by Gerrit Change-Id. A Change can exist on multiple branches.
our %changes_by_id;  # { gerrit-id => [ change-object, ... ] }

our $next_group = 10000;

our $last_gc = 0;

my $state_lines;
my $state_updater = ($0 =~ s,^.*/,,r)." @ARGV";

# Perform a batch update of refs.
sub update_refs($$)
{
    my ($flags, $updates) = @_;

    if (!@$updates) {
        print "No refs to update.\n" if ($debug);
        return;
    }
    my $pipe = open_process(USE_STDIN | FWD_STDERR | $flags, "git", "update-ref", "--stdin");
    write_process($pipe, @$updates);
    close_process($pipe);
}

sub _commit_state($$)
{
    my ($blob, $new) = @_;

    run_process(0, 'git', 'update-index', '--add', '--cacheinfo', "100644,$blob,state");
    my $tree = read_cmd_line(0, 'git', 'write-tree');
    my $sha1 = read_cmd_line(0, 'git', 'commit-tree', '-m', 'Saving state', $tree);
    if ($new) {
        run_process(0, 'git', 'update-ref', 'refs/gpush/state-new', $sha1);
        return;
    }
    run_process(0, 'git', 'update-ref', '-m', $state_updater,
                   '--create-reflog', 'refs/gpush/state', $sha1);
}

sub save_state(;$$)
{
    my ($dry, $new) = @_;

    print "Saving ".($new ? "new " : "")."state".($dry ? " [DRY]" : "")." ...\n" if ($debug);
    my (@lines, @updates);
    my @fkeys = ('key', 'grp', 'id', 'src', 'tgt', 'topic', 'base',
                 'ntgt', 'ntopic', 'nbase', 'exclude', 'hide');
    my @rkeys = ('pushed', 'orig');
    if ($new) {
        push @lines, "verify $new", "updater $state_updater";
        push @fkeys, @rkeys;
        @rkeys = ();
    }
    push @lines,
        "next_key $next_key",
        "next_group $next_group",
        "last_gc $last_gc",
        "";
    foreach my $key (sort keys %change_by_key) {
        my $change = $change_by_key{$key};
        my $garbage = $$change{garbage};
        foreach my $ky (@rkeys) {
            my ($val, $oval) = ($garbage ? undef : $$change{$ky}, $$change{'_'.$ky});
            if (!defined($val)) {
                push @updates, "delete refs/gpush/i${key}_$ky\n"
                    if (defined($oval));
            } else {
                push @updates, "update refs/gpush/i${key}_$ky $val\n"
                    if (!defined($oval) || ($oval ne $val));
            }
            $$change{'_'.$ky} = $val;
        }
        next if ($garbage);
        foreach my $ky (@fkeys) {
            my $val = $$change{$ky};
            if (defined($val)) {
                # Support empty values without making the file look funny.
                # We assume that no property ever contains a literal "".
                $val = '""' if (!length($val));
                push @lines, "$ky $val";
            }
        }
        push @lines, "";
    }
    foreach my $ginfo (values %gerrit_info_by_key) {
        my $fetched = $$ginfo{fetched};
        next if (!defined($fetched));
        my $_fetched = \%{$$ginfo{_fetched}};
        for my $ky (keys %$fetched) {
            my ($val, $oval) = ($$fetched{$ky}, $$_fetched{$ky});
            if (!defined($val)) {
                push @updates, "delete refs/gpush/g$$ginfo{key}_$ky\n"
                    if (defined($oval));
            } else {
                push @updates, "update refs/gpush/g$$ginfo{key}_$ky $val\n"
                    if (!defined($oval) || ($oval ne $val));
            }
            $$_fetched{$ky} = $val;
        }
    }
    update_refs($dry ? DRY_RUN : 0, \@updates);

    # We save the state file in a git ref as well, so the entire state
    # can be synced between hosts with git operations.
    if ($new || ("@lines" ne "@$state_lines")) {
        my $sts = open_process(USE_STDIN | SILENT_STDIN | USE_STDOUT | FWD_STDERR,
                               'git', 'hash-object', '-w', '--stdin');
        write_process($sts, map { "$_\n" } @lines);
        my $blob = read_process($sts);
        close_process($sts);
        with_local_git_index(\&_commit_state, $blob, $new) if (!$dry);
        $state_lines = \@lines;
    } else {
        print "State file unmodified.\n" if ($debug);
    }
}

# Constructor for the Change object.
sub _init_change($$)
{
    my ($change, $changeid) = @_;

    print "Creating Change $next_key ($changeid).\n" if ($debug);
    $$change{key} = $next_key;
    $$change{id} = $changeid;
    push @{$changes_by_id{$changeid}}, $change;
    $change_by_key{$next_key} = $change;
    $next_key++;
}

sub load_state_file(;$)
{
    my ($new) = @_;

    my $ref = $new ? "state-new" : "state";
    my $sts = open_process(SOFT_FAIL | USE_STDOUT | NUL_STDERR,
                           'git', 'cat-file', '-p', "refs/gpush/$ref:state");
    $state_lines = read_process_all($sts);
    close_process($sts);
    return if (!@$state_lines);

    my $state_verify;
    my $line = 0;
    my $inhdr = 1;
    my $change;
    my @changes;
    foreach (@$state_lines) {
        $line++;
        if (!length($_)) {
            $inhdr = 0;
            $change = undef;
        } elsif (!/^(\w+) (.*)/) {
            fail("Bad state file: Malformed entry at line $line.\n");
        } elsif ($inhdr) {
            if ($1 eq "next_key") {
                $next_key = int($2);
            } elsif ($1 eq "next_group") {
                $next_group = int($2);
            } elsif ($1 eq "last_gc") {
                $last_gc = int($2);
            } elsif ($new && ($1 eq "verify")) {
                $state_verify = $2;
            } elsif ($new && ($1 eq "updater")) {
                $state_updater = $2;
            } else {
                fail("Bad state file: Unknown header keyword '$1' at line $line.\n");
            }
        } else {
            if (!$change) {
                $change = {};
                $$change{line} = $line;
                push @changes, $change;
            }
            $$change{$1} = ($2 eq '""') ? "" : $2;
        }
    }

    foreach my $change (@changes) {
        my $line = $$change{line};
        my ($key, $id) = ($$change{key}, $$change{id});
        fail("Bad state file: Change with no key at line $line.\n") if (!$key);
        fail("Bad state file: Change with no id at line $line.\n") if (!$id);
        fail("Bad state file: Change with duplicate id at line $line.\n")
            if (defined($change_by_key{$key}));
        $change_by_key{$key} = $change;
        push @{$changes_by_id{$id}}, $change;
    }

    return $state_verify;
}

sub load_refs(@)
{
    my (@refs) = @_;

    my @updates;
    my $info = open_cmd_pipe(0, 'git', 'for-each-ref', '--format=%(objectname) %(refname)', @refs);
    while (read_process($info)) {
        if (m,^(.{40}) refs/heads/(.*)$,) {
            $local_refs{$2} = $1;
        } elsif (m,^(.{40}) refs/remotes/([^/]+)/(.*)$,) {
            $remote_refs{$2}{$3} = $1;
        } elsif (m,^(.{40}) refs/gpush/i(\d+)_(.*)$,) {
            my $change = $change_by_key{$2};
            if (!$change) {
                my $ref = substr($_, 41);
                werr("Warning: Unrecognized Change key in state ref $ref - dropping.\n");
                # It would cause trouble once the key is re-used.
                push @updates, "delete $ref\n";
                next;
            }
            $$change{$3} //= $1;  # Don't overwrite value from new file.
            $$change{'_'.$3} = $1;
        } elsif (m,^(.{40}) refs/gpush/g(\d+)_(\d+)$,) {
            my $ginfo = \%{$gerrit_info_by_key{$2}};
            $$ginfo{key} = $2;
            $$ginfo{fetched}{$3} = $1;
            $$ginfo{_fetched}{$3} = $1;
        }
    }
    close_process($info);
    update_refs(0, \@updates);
}

sub load_state($)
{
    my ($all) = @_;

    print "Loading state ...\n" if ($debug);
    load_state_file();
    load_refs($all ? "refs/gpush/" : "refs/gpush/i*", "refs/heads/", "refs/remotes/");
}

##########################
# commit metadata output #
##########################

# Don't let lists get arbitrarily wide, as this makes them hard to follow.
use constant _LIST_WIDTH => 120;
# The _minimal_ width assumed for annotations, even if absent. This
# ensures that annotations always "stick out" on the right, even if only
# short subjects have annotations.
use constant _ANNOTATION_WIDTH => 6;
# Elide over-long Change subjects, as they add nothing but noise.
use constant _SUBJECT_WIDTH => 70;
# Truncation width for Change-Ids and SHA1s; empirically determined to be
# "sufficiently unambiguous".
use constant _ID_WIDTH => 10;

sub format_id($)
{
    my ($id) = @_;

    return substr($id, 0, _ID_WIDTH);
}

sub format_subject($$;$)
{
    my ($id, $subject, $max) = @_;

    $max = _SUBJECT_WIDTH if (!defined($max));
    $max += $tty_width if ($max < 0);
    $max -= _ID_WIDTH + 3 if (defined($id));
    $max = max(25, min(_SUBJECT_WIDTH, $max)) - 5;
    # Right-elide if subject is longer than $max + ellipsis:
    $subject =~ s/^(.{$max}).{6,}$/$1\[...]/;
    return $subject if (!defined($id));
    return format_id($id)." ($subject)";
}

sub format_commit($;$)
{
    my ($commit, $max) = @_;
    return format_subject($$commit{changeid}, $$commit{subject}, $max);
}

sub format_commits($;$)
{
    my ($commits, $prefix) = @_;

    $prefix = "  " if (!defined($prefix));
    my $output = "";
    foreach my $commit (@$commits) {
        $output .= $prefix.format_commit($commit, -length($prefix))."\n";
    }
    return $output;
}

sub _unpack_report($@)
{
    my $report = shift @_;
    my @a = ($$report{id}, $$report{subject},
             $$report{prefix} // "", $$report{suffix} // "", $$report{annotation} // "");
    push @a, length($a[2]) + length($a[3]) + max(length($a[4]), _ANNOTATION_WIDTH);
    for (@_) { $_ = shift @a; }
}

sub format_reports($)
{
    my ($reports) = @_;

    my $width = 0;
    foreach my $report (@$reports) {
        next if ($$report{type} ne "change");
        _unpack_report($report, my ($id, $subject, $pfx_, $sfx_, $ann_, $fixlen));
        my $w = length($subject);
        $w += _ID_WIDTH + 3 if (defined($id));
        $width = max($width, min($w, _SUBJECT_WIDTH) + $fixlen);
    }
    $width = min($width, $tty_width, _LIST_WIDTH);
    my $output = "";
    foreach my $report (@$reports) {
        my $type = $$report{type} // "";
        if ($type eq "flowed") {
            $output .= wrap("", "", $_)."\n" foreach (@{$$report{texts}});
        } elsif ($type eq "fixed") {
            $output .= join("", @{$$report{texts}});
        } elsif ($type eq "change") {
            _unpack_report($report, my ($id, $subject, $prefix, $suffix, $annot, $fixlen));
            my $w = $width - $fixlen;
            my $str = format_subject($id, $subject, min($w, _SUBJECT_WIDTH));
            my $spacing = length($annot) ? (" " x max($w - length($str), 0)) : "";
            $output .= $prefix.$str.$suffix.$spacing.$annot."\n";
        } else {
            die("Unknown report type '$type'.\n");
        }
    }
    return $output;
}

sub fail_formatted($)
{
    my ($reports) = @_;

    fail(format_reports($reports));
}

sub report_text($$@)
{
    my ($reports, $type, @texts) = @_;

    push @$reports, {
        type => $type,
        texts => \@texts
    };
}

sub report_flowed($@)
{
    my ($reports, @texts) = @_;

    push @$reports, {
        type => "flowed",
        texts => \@texts
    };
}

sub report_fixed($@)
{
    my ($reports, @texts) = @_;

    push @$reports, {
        type => "fixed",
        texts => \@texts
    };
}

sub set_change_error($$$)
{
    my ($change, $style, $error) = @_;

    ($$change{error_style}, $$change{error}) = ($style, $error);
}

sub report_local_changes($$;$$)
{
    my ($reports, $changes, $prefix, $suffix) = @_;

    foreach my $change (@$changes) {
        my $commit = $$change{local};
        push @$reports, {
            type => "change",
            id => $$commit{changeid},
            subject => $$commit{subject},
            prefix => $prefix // "  ",
            suffix => $suffix,
            annotation => $$change{annotation}
        };
        my $error = $$change{error};
        if (defined($error)) {
            $error = (" " x (3 + _ID_WIDTH)).$error."\n"
                if (($$change{error_style} // 'oneline') eq 'oneline');
            report_fixed($reports, $error);
        }
    }
}

#############################
# commit metadata retrieval #
#############################

use constant _GIT_LOG_ARGS =>
        ('-z', '--pretty=%H%x00%P%x00%T%x00%B%x00%an%x00%ae%x00%ad%x00%cn%x00%ce%x00%cd');

# Retrieve metadata for commits reachable from the specified tips.
sub visit_commits_raw($$;$)
{
    my ($tips, $args, $cid_opt) = @_;

    return if (!@$tips);

    my @commits;
    my $log = open_process(USE_STDIN | USE_STDOUT | FWD_STDERR,
                           'git', 'log', _GIT_LOG_ARGS, @$args, '--stdin');
    write_process($log, map { "$_\n" } @$tips);
    while (1) {
        my @author = (undef, undef, undef);
        my @committer = (undef, undef, undef);
        last if (!read_fields($log, my ($id, $parents, $tree, $message), @author, @committer));

        # We truncate the subject anyway, so using just the first line is OK.
        $message =~ /^(.*)$/m;
        my $subject = $1;

        my @cids = ($message =~ /^Change-Id: (.+)$/mg);
        my $changeid;
        if (!@cids) {
            fail(format_subject($id, $subject, -18)." has no Change-Id.\n")
                if (!$cid_opt);
        } else {
            # Gerrit uses the last Change-Id if multiple are present.
            $changeid = $cids[-1];
        }

        print "-- $id: ".format_subject($changeid, $subject, -45)."\n" if ($debug);

        unshift @commits, {
            id => $id,
            parents => [ split(/ /, $parents) ],
            changeid => $changeid,
            subject => $subject,
            message => $message,
            tree => $tree,
            author => \@author,
            committer => \@committer
        };
    }
    close_process($log);
    return \@commits;
}

# Note: The return value is reliable only for the first call.
sub visit_commits($$;$)
{
    my ($tips, $args, $cid_opt) = @_;

    my @revs = grep {
        if ($commit_by_id{$_}) {
            print "Already visited $_.\n" if ($debug);
            0;
        } else {
            1;
        }
    } @$tips;
    return if (!@revs);

    # The grep above excludes tips which match previous tips or their
    # ancestors. A tip that is a descendant of a previous tip still
    # needs to be visited, but the traversal needs to be cut off, so
    # pass all previous tips as exclusions to git.
    state %visited;
    my @excl = map { "^$_" } keys %visited;
    $visited{$_} = 1 foreach (@revs);
    push @revs, @excl;

    my $commits = visit_commits_raw(\@revs, $args, $cid_opt);
    foreach my $commit (@$commits) {
        init_commit($commit);
        # This excludes tips that were in fact ancestors of other tips,
        # thereby cutting down the noise in the exclusion list.
        delete $visited{$_} foreach (@{$$commit{parents}});
    }
    return $commits;
}

sub visit_local_commits($;$)
{
    my ($tips, $cid_opt) = @_;

    # We exclude all upstream heads instead of only that of the current branch,
    # because Gerrit will ignore all known commits (reviewed or not).
    # This matters for cross-branch pushes and re-targeted Changes, where a commit
    # can have known ancestors which aren't on its upstream branch.
    # When traversing PatchSets from Gerrit, we need to exclude the heads from the
    # gerrit remote as well, as they may be actually ahead of our merge-base with
    # upstream.
    return visit_commits($tips, \@upstream_excludes, $cid_opt);
}

my $analyzed_local_branch = 0;
# SHA1 of the local branch's merge base with upstream.
our $local_base;
# SHA1 of the local branch's tip.
our $local_tip;
# Mapping of Change-Ids to commits on the local branch.
our %changeid2local;  # { change-id => SHA1 }

sub source_map_validate();
sub _source_map_prepare_local();
sub source_map_assign($$);
sub source_map_traverse();
sub _source_map_finish_initial();

sub analyze_local_branch($)
{
    my ($tip) = @_;

    $analyzed_local_branch = 1;

    # Get the revs ...
    print "Enumerating local Changes ...\n" if ($debug);
    my $raw_commits = visit_local_commits([ $tip ]);
    return 0 if (!@$raw_commits);

    # ... then sanity-check a bit ...
    my %seen;
    foreach my $commit (@$raw_commits) {
        my $subject = $$commit{subject};
        fail("Commit on ".($local_branch // "<detached HEAD>")." was meant to be squashed:\n  "
                .format_subject($$commit{id}, $subject, -2)."\n")
            if ($subject =~ /^(squash|fixup)! /);
        my $changeid = $$commit{changeid};
        my $excommit = $seen{$changeid};
        fail("Duplicate Change-Id $changeid on ".($local_branch // "<detached HEAD>").":\n  "
                .format_subject($$excommit{id}, $$excommit{subject}, -2)."\n  "
                .format_subject($$commit{id}, $subject, -2)."\n")
            if (defined($excommit));
        $seen{$changeid} = $commit;
    }

    # Iff we have a detached HEAD, we don't know the tip yet, because
    # we resolve only named branches. In other cases, this is a no-op.
    $local_tip = $$raw_commits[-1]{id};

    my $commits = get_commits_free($local_tip);

    $local_base = $$commits[0]{parents}[0];

    # This needs to happen early, for parse_local_rev(), which
    # _source_map_prepare_local() calls.
    $changeid2local{$$_{changeid}} = $$_{id} foreach (@$commits);

    # ... then map them to Change objects ...
    _source_map_prepare_local();
    while (1) {
        foreach my $commit (@$commits) {
            source_map_assign($commit, undef);
        }
        last if (!source_map_traverse());
    }
    _source_map_finish_initial();

    # ... and then add them to the set of local Changes.
    my $idx = 0;
    my $prev;
    foreach my $commit (@$commits) {
        my $change = $$commit{change};
        $$change{local} = $commit;
        $$change{index} = $idx++;
        $$change{parent} = $prev;
        $$prev{child} = $change if ($prev);
        $prev = $change;
    }

    return 1;
}

##### ... and also for upstream commits.

# The requirements are quite different than for local commits:
# - we need only the Change-Id as meta data
# - we can just discard Changes without Id
# - we don't want to pollute the %commit_by_id namespace, as doing that
#   would violate the assumption that upstream commits are not visited

use constant _GIT_UPSTREAM_LOG_ARGS => ('-z', '--pretty=%H%x00%B');

sub visit_upstream_commits($$)
{
    my ($tips, $bases) = @_;

    my $base;
    if (@$bases == 1) {
        $base = $$bases[0];
    } else {
        # If we have multiple bases, we need to find the oldest one.
        $base = read_cmd_line(0, 'git', 'merge-base', '--octopus', @$bases);
    }

    my %changeids;
    my $log = open_process(USE_STDIN | USE_STDOUT | FWD_STDERR,
                           'git', 'log', _GIT_UPSTREAM_LOG_ARGS, @$tips, "^$base");
    while (read_fields($log, my ($commit, $message))) {
        my @ids = ($message =~ /^Change-Id: (.+)$/mg);
        $changeids{$ids[-1]} = 1 if (@ids);
        # We don't print debug messages here, as this would completely
        # flood the log.
    }
    close_process($log);
    print "Found ".int(keys %changeids)." Changes.\n" if ($debug);
    return \%changeids;
}

#####################
# change resolution #
#####################

# Resolve a possibly abbreviated Change-Id referring to a commit in
# the local branch.
# NB, the most likely source of abbreviated Ids is the gpush output.
sub _sha1_for_partial_local_id($)
{
    my ($id) = @_;

    state %changeid2local_cache;
    return $changeid2local_cache{$id} if (exists($changeid2local_cache{$id}));

    my $sha1 = $changeid2local{$id};
    if (!defined($sha1)) {
        my @sha1s;
        my $rx = qr/^\Q$id\E/;
        foreach my $changeid (keys %changeid2local) {
            push @sha1s, $changeid2local{$changeid} if ($changeid =~ $rx);
        }
        if (@sha1s) {
            fail("$id is ambiguous.\n") if (@sha1s != 1);
            $sha1 = $sha1s[0];
        }
    }

    $changeid2local_cache{$id} = $sha1;
    return $sha1;
}

use constant {
    SPEC_BASE => 0,
    SPEC_TIP => 1,
    SPEC_PARENT => 2
};

sub _finalize_parse_local_rev($$$)
{
    my ($rev, $out, $scope) = @_;

    # There is no point in restricting the base here - subsequent
    # use will find out soon enough if it is not reachable.
    return $out if ($scope == SPEC_BASE);
    return $out if (!$analyzed_local_branch || $commit_by_id{$out});
    fail("$rev is outside the local branch.\n");
}

sub _parse_local_rev_sym($$)
{
    my ($rev, $scope) = @_;

    my $out;
    if ($rev eq 'HEAD') {
        fail("HEAD is not valid for base revspecs.\n") if ($scope == SPEC_BASE);
        $out = $local_tip;
    } elsif ($rev eq 'ROOT') {
        fail("ROOT is not valid for tip revspecs.\n") if ($scope == SPEC_TIP);
        $out = $rev;
    } elsif (($rev eq '@{u}') || ($rev eq '@{upstream}')) {
        fail("\@{upstream} is not valid for tip revspecs.\n") if ($scope == SPEC_TIP);
        $out = "";
    } else {
        return (undef, undef);
    }
    return (_finalize_parse_local_rev($rev, $out, $scope), 1);
}

sub _parse_local_rev_id_only($$)
{
    my ($rev, $scope) = @_;

    fail("$rev is not a valid revspec.\n") if ($rev !~ /^(\w+)(.*)$/);

    # This looks like a valid revspec, but git failed to parse it.
    # Try to parse it as a Change-Id instead.
    my ($id, $rest) = ($1, $2);
    my $sha1 = _sha1_for_partial_local_id($id);
    wfail("$rev does not refer to a Change on the local branch.\n") if (!$sha1);
    return $sha1 if (!$rest);

    my $out = read_cmd_line(SOFT_FAIL, 'git', 'rev-parse', '--verify', '-q', $sha1.$rest);
    fail("$rev is not a valid revspec.\n") if (!$out);
    return _finalize_parse_local_rev($rev, $out, $scope);
}

sub parse_local_rev_id($$)
{
    my ($rev, $scope) = @_;

    my ($sout, $done) = _parse_local_rev_sym($rev, $scope);
    return $sout if ($done);
    return _parse_local_rev_id_only($rev, $scope);
}

# Parse a revision specification referring to a commit in the local branch.
# This will return undef for @{u}, HEAD, and Change-Ids if the local branch
# was not visited yet; call parse_local_rev_id() after doing so.
sub parse_local_rev($$)
{
    my ($rev, $scope) = @_;

    $rev = "HEAD".$rev if ($rev =~ /^[~^]/);
    my ($sout, $done) = _parse_local_rev_sym($rev, $scope);
    return $sout if ($done);
    my $out = read_cmd_line(SOFT_FAIL, 'git', 'rev-parse', '--verify', '-q', $rev);
    return _finalize_parse_local_rev($rev, $out, $scope) if ($out);
    return undef if (!$analyzed_local_branch);  # Come back later!
    return _parse_local_rev_id_only($rev, $scope);
}

################
# smart series #
################

sub assign_series($)
{
    my ($changes) = @_;

    my $gid = $next_group++;
    $$_{grp} = $gid foreach (@$changes);
}

# Deduce a series from a single commit.
# Merges are treated in --first-parent mode.
sub do_determine_series($;$$$)
{
    my ($change, $extend, $capture, $forward_only) = @_;

    print "Deducing series from $$change{id}\n" if ($debug);
    my (@prospects, @changes);
    my $group_key;
    my $rchange = $change;
    while (1) {
        my $gid = $$change{grp};
        if (!defined($gid)) {
            if ($capture && @changes) {
                print "Capturing loose $$change{id}\n" if ($debug);
                unshift @changes, $change;
            } else {
                print "Prospectively capturing loose $$change{id}\n" if ($debug);
                unshift @prospects, $change;
            }
        } else {
            if (@changes) {
                # We already have a proto-series.
                # Check whether the new candidate is part of it.
                if ($gid != $group_key) {
                    # Miss; end of series.
                    print "Breaking off at foreign bound $$change{id}\n" if ($debug);
                    last;
                }
                # Hit; add the Change to the series.
                print "Adding bound $$change{id} and ".int(@prospects)." prospect(s)\n"
                    if ($debug);
            } elsif (!@prospects || $extend) {
                # The specified tip Change is bound, or we are extending
                # and the proto-series so far consists of only loose Changes.
                print "Adding bound $$change{id} and ".int(@prospects)." prospect(s) at tip\n"
                    if ($debug);
                # This Change determines the series.
                $group_key = $gid;
            } else {
                # Stop when encountering a bound Change after only loose ones
                # (unless extending).
                print "Breaking off at bound $$change{id} after only loose\n" if ($debug);
                last;
            }
            unshift @changes, $change, @prospects;
            @prospects = ();
        }
        $change = $$change{parent};
        last if (!$change);
    }
    # We also do reverse traversal, so one can re-order the series including
    # the last Change, but continue to use the same push command from history.
    # But we don't reverse-traverse if the specified Change is loose (and we're
    # not extending), based on the assumption that it was meant to be the tip
    # - otherwise, the semantics get really unintuitive.
    return (\@prospects, undef, $change, []) if (!defined($group_key));
    return (\@changes, $group_key, @prospects ? $prospects[-1] : $change, [])
        if ($forward_only);
    my @rprospects;
    while (1) {
        $rchange = $$rchange{child};
        last if (!defined($rchange));
        my $gid = $$rchange{grp};
        if (!defined($gid)) {
            # We don't automatically capture loose Changes on top of the series,
            # again to keep the semantics sane.
            print "Prospectively capturing loose $$rchange{id} (reverse)\n" if ($debug);
            push @rprospects, $rchange;
        } else {
            if ($gid != $group_key) {
                print "Breaking off at foreign bound $$rchange{id} (reverse)\n" if ($debug);
                last;
            }
            print "Adding bound $$rchange{id} and ".int(@rprospects)." prospect(s) (reverse)\n"
                if ($debug);
            push @changes, @rprospects, $rchange;
            @rprospects = ();
        }
    }
    return (\@changes, $group_key, undef, \@prospects);
}

###################
# commit creation #
###################

# Cache of diffs. Global, so it can be flushed from the outside.
our %commit2diff;

# Apply one commit's diff on top of another commit; return the new tree.
sub apply_diff($$$)
{
    my ($commit, $base_id, $flags) = @_;

    my $sha1 = $$commit{id};
    my $diff = $commit2diff{$sha1};
    if (!defined($diff)) {
        my $show = open_cmd_pipe(0, 'git', 'diff-tree', '--binary', '--root', "$sha1^!");
        $diff = get_process_output($show, 'stdout');
        close_process($show);
        $commit2diff{$sha1} = $diff;
    }

    # We don't know the tree when the base is an upstream commit.
    # This is just fine, as this will be the first commit in the
    # series by definition, so we need to load it anyway. The
    # subsequent apply will change it in every case, so there
    # is no chance to recycle it, either.
    state $curr_tree = "";
    my $base = $commit_by_id{$base_id};
    run_process(FWD_STDERR, 'git', 'read-tree', ($base_id eq 'ROOT') ? '--empty' : $base_id)
        if (!$base || ($curr_tree ne $$base{tree}));
    $curr_tree = "";

    my $proc = open_process(SOFT_FAIL | USE_STDIN | SILENT_STDIN | NUL_STDOUT | $flags,
                            'git', 'apply', '--cached', '-C1', '--whitespace=nowarn');
    write_process($proc, $diff);
    my $errors = get_process_output($proc, 'stderr')
        if ($flags & USE_STDERR);
    close_process($proc);
    return (undef, $errors) if ($?);
    $curr_tree = read_cmd_line(0, 'git', 'write-tree');
    return ($curr_tree, undef);
}

# Create a commit object from the specified metadata.
sub create_commit($$$$$)
{
    my ($parents, $tree, $commit_msg, $author, $committer) = @_;

    ($ENV{GIT_AUTHOR_NAME}, $ENV{GIT_AUTHOR_EMAIL}, $ENV{GIT_AUTHOR_DATE}) = @$author;
    ($ENV{GIT_COMMITTER_NAME}, $ENV{GIT_COMMITTER_EMAIL}, $ENV{GIT_COMMITTER_DATE}) = @$committer;
    my @pargs = map { ('-p', $_) } @$parents;
    my $proc = open_process(USE_STDIN | SILENT_STDIN | USE_STDOUT | FWD_STDERR,
                            'git', 'commit-tree', $tree, @pargs);
    write_process($proc, $commit_msg);
    my $sha1 = read_process($proc);
    close_process($proc);

    my $commit = {
        id => $sha1,
        parents => $parents,
        tree => $tree
    };
    init_commit($commit);
    return $commit;
}

###################
# branch tracking #
###################

# This tracks the local branch each Change lives on, following cherry-picks
# and purges as much as possible.

# Format a single branch name for display.
sub _format_branch($)
{
    my ($branch) = @_;

    return ($branch ne '-') ? "'$branch'" : '<detached HEAD>';
}

# Format the source branch names from an array of Changes,
# using Oxford English grammar.
sub _format_branches_raw($)
{
    my ($changes) = @_;

    my @branches = sort map { _format_branch($$_{src}) } @$changes;
    my $str = "$branches[0]";
    if (@branches > 1) {
        if (@branches > 2) {
            $str .= ", $branches[$_]" for (1 .. @branches - 2);
            $str .= ",";
        }
        $str .= " and $branches[-1]";
    }
    return $str;
}

# As above, but with quantifiers to signify that we mean all branches.
sub _format_branches(@)
{
    my ($changes) = @_;

    my $str = _format_branches_raw($changes);
    return $str if (@$changes == 1);
    return "both $str" if (@$changes == 2);
    return "all of $str";
}

# Format the message about the outcome of a Change assignment attempt.
sub _format_result($$$@)
{
    my ($commit, $new, $fmt, @args) = @_;

    my $lbr = $local_branch // "-";
    return sprintf("%s\n    %son %s $fmt.\n", format_commit($commit),
                   $new ? "newly " : "", _format_branch($lbr), @args);
}

use constant {
    _SRC_NOOP => 0,
    _SRC_MOVE => 1,
    _SRC_COPY => 2,
    _SRC_HIDE => 3
};

my @sm_options;
my $sm_option_new;
my %sm_option_by_id;
my %sm_wanted;
my %sm_present;
my $sm_printed = 0;
my $sm_changed = 0;
my $sm_failed = 0;

# Parse a single command line option relating to source branch tracking.
# $arg is the currently processed argument, while $args are the remaining
# arguments on the command line. If $rmt_ok is true, plus-prefixed remote
# specifications are accepted as well; note that these are not removed
# from the command line, unlike local ones.
sub parse_source_option($$\@)
{
    my ($arg, $rmt_ok, $args) = @_;

    my $action;
    if ($arg eq "--move") {
        $action = _SRC_MOVE;
    } elsif ($arg eq "--copy") {
        $action = _SRC_COPY;
    } elsif ($arg eq "--hide") {
        $action = _SRC_HIDE;
    } else {
        return undef;
    }
    fail("$arg needs an argument.\n")
        if (!@$args || ($$args[0] =~ /^-/));
    my $orig = shift @$args;
    my $tip = $orig;
    my ($base, $count);
    my $branch = $1 if ($tip =~ s,/(.*)$,,);
    my $rmt_id;
    if ($rmt_ok && $tip =~ /^\+(\w+)/) {
        $rmt_id = $1;
        unshift @$args, $tip;
    } else {
        $base = $1 if ($tip =~ s,^(.*)\.\.,,);
        $count = $1 if ($tip =~ s,:(.*)$,,);
        $tip = undef if ($tip eq "new");
        fail("Specifying a commit count and a range base are mutually exclusive.\n")
            if (defined($base) && defined($count));
        if (defined($base) || defined($count)) {
            wfail("Specifying a commit count or range base is incompatible with range 'new'.\n")
                if (!defined($tip));
        } else {
            wfail("Automatic ranges are not supported with $arg."
                    ." Use $tip:1 if you actually meant a single Change.\n")
                if (defined($tip));
        }
    }
    fail("$arg does not support specifying a source branch.\n")
        if (defined($branch) && ($action != _SRC_MOVE));
    push @sm_options, {
        action => $action,
        orig => $orig,
        rmt_id => $rmt_id,
        base => defined($base) ? length($base) ? $base : '@{u}' : undef,
        tip => defined($tip) ? length($tip) ? $tip : 'HEAD' : undef,
        count => $count,
        branch => $branch
    };
    return 1;
}

# Do final sanity checking on the source branch tracking related commands.
sub source_map_validate()
{
    my $br = $local_branch // "-";
    foreach my $option (@sm_options) {
        my $sbr = $$option{branch};
        wfail("Source and target branch are both '$br' in attempt to move Changes.\n")
            if (defined($sbr) && ($sbr eq $br));

        my $rmt_id = $$option{rmt_id};
        if (defined($rmt_id)) {
            $sm_option_by_id{$rmt_id} = $option;
            next;
        }

        my $raw_tip = $$option{tip};
        next if (defined($raw_tip));

        wfail("Only one of --move, --copy, and --hide may be specified with 'new'.\n")
            if (defined($sm_option_new));
        $sm_option_new = $option;
    }
}

# Expand the supplied ranges into series of commits, and create a reverse
# mapping of commits to command objects.
sub _source_map_prepare_local()
{
    foreach my $option (@sm_options) {
        my $raw_tip = $$option{tip};
        next if (!defined($raw_tip));

        my $commits;
        my $tip = parse_local_rev($raw_tip, SPEC_TIP);
        my $raw_base = $$option{base};
        if (defined($raw_base)) {
            my $base = parse_local_rev($raw_base, SPEC_BASE);
            $commits = get_commits_base($base, $tip, $raw_base, $raw_tip);
        } else {
            my $count = $$option{count};
            $commits = get_commits_count($tip, $count, $raw_tip);
        }
        foreach my $commit (@$commits) {
            my $sha1 = $$commit{id};
            my $old_option = $sm_option_by_id{$sha1};
            wfail("Range $$option{orig} intersects $$old_option{orig} (at $sha1).\n")
                if ($old_option);
            $sm_option_by_id{$sha1} = $option;
        }
        $$option{commits} = $commits;
    }
}

# Schedule the removal a single Change object from the state database.
sub _obliterate_change($$)
{
    my ($change, $changes) = @_;

    print "Obliterating $$change{key} ($$change{id}) from $$change{src}.\n" if ($debug);
    @$changes = grep { $_ != $change } @$changes;  # From %changes_by_id
    $$change{garbage} = 1;  # Mark it for %change_by_key traversal
}

# Visit the requested non-current local branches with the purpose of
# determining which Change objects still correspond with actual commits.
sub source_map_traverse()
{
    # It would be tempting to always query all local branches in
    # advance, to save the extra git call. However, this would also
    # collect *really* old branches, and even though they are short,
    # calculating the exclusion takes hundreds of milliseconds,
    # which is slower than an extra git call even on Windows.
    # Additionally, we need to traverse down to the push base of the
    # previously pushed Changes (in case they were upstreamed already),
    # and this can also have a significant cost if the corresponding
    # local branch advanced a lot since a Change was pushed last time,
    # so it's better to do that selectively.

    return if (!%sm_wanted);

    # This loop is likely to run only once per call, so don't bother
    # coalescing the resulting visit_*() calls.
    foreach my $br (keys %sm_wanted) {
        print "Investigating other local branch $br ...\n" if ($debug);
        my %present;
        my @changes = grep { $$_{src} eq $br } values %change_by_key;
        my (@missing, $utips);
        my $tip = $local_refs{$br};
        if (defined($tip)) {
            visit_local_commits([ $tip ]);
            my $commits = get_commits_free($tip);
            print "Still present on the branch:\n".format_commits($commits) if ($debug);
            if (@$commits) {
                %present = map { $$_{changeid} => 1 } @$commits;

                # Prepare garbage collection.
                @missing = grep { !defined($present{$$_{id}}) } @changes;
                $utips = [ get_1st_parent($$commits[0]) ];
            } else {
                @missing = @changes;
                $utips = [ $tip ];
            }
        } else {
            print "Branch disappeared.\n" if ($debug);
            # The branch was deleted, so all its Changes are missing.
            @missing = @changes;
            # For the upstream check, we fall back to the branches the Changes
            # were targeting. This is worse than using the actual upstream, as
            # the target may be outdated and the Change actually went to another
            # branch. Also, it's potentially more work.
            my $urefs = $remote_refs{$upstream_remote};
            if ($urefs) {
                my %utiph = map { $_ => 1 } grep { $_ } map { $$urefs{$$_{tgt}} } @changes;
                $utips = [ keys %utiph ];
            }
        }
        $sm_present{$br} = \%present;

        # We may need to garbage-collect the branch.
        # Failure to do this would affect cherry-picks of Changes that were
        # upstreamed - neither do we want to need --copy for Changes that are
        # actually gone, nor do we want them to be detected as moves (and thus
        # inherit the now incorrect target branch).
        if (@missing) {
            # Record the push base of each Change - if a Change is actually
            # in our upstream, it will be so between its push base (which
            # cannot be younger than the branch's tip at that time) and the
            # branch's merge base with upstream.
            my %bases = map { $_ => 1 } grep { $_ } map { $$_{base} } @missing;
            if (%bases) {
                print "Visiting upstream of other local branch $br ...\n" if ($debug);
                my $urevs = visit_upstream_commits($utips, [ keys %bases ]);
                foreach my $change (@missing) {
                    my $changeid = $$change{id};
                    _obliterate_change($change, $changes_by_id{$changeid})
                        if (defined($$urevs{$changeid}));
                }
            }
        }
    }
    %sm_wanted = ();
    return 1;
}

# Determine which of the Change objects in $changes still refer to
# actual commits.
sub _find_candidate_sources($$$)
{
    my ($changes, $vanished, $persisting) = @_;

    return 0 if (!$changes);

    my $lbr = $local_branch // "-";
    my $retry = 0;
    foreach my $chg (@$changes) {
        # Hidden Changes are supposed to be skipped over, so it
        # would be backwards to use them as sources for moves.
        next if ($$chg{hide});

        my $obr = $$chg{src};
        # The current branch is obviously not a sensible source.
        next if ($obr eq $lbr);

        # Note that detached HEADs "vanish" after switching to an actual
        # branch, thereby freeing all Changes assigned to them, sans the
        # garbage-collected ones.
        my $present = $sm_present{$obr};
        if (!$present) {
            # Ensure that the branch is visited and garbage-collected.
            $sm_wanted{$obr} = 1;
            $retry = 1;
        } elsif (defined($$present{$$chg{id}})) {
            print "$$chg{id} persists on $obr.\n" if ($debug);
            push @$persisting, $chg;
        } else {
            print "$$chg{id} vanished from $obr.\n" if ($debug);
            $$chg{vanished} = 1;
            push @$vanished, $chg;
        }
    }
    return $retry;
}

sub source_map_assign($$)
{
    my ($commit, $reference) = @_;

    my $change = $$commit{change};
    return $change if ($change);

    my $lbr = $local_branch // "-";

    my $changeid = $$commit{changeid};
    my $changes = $changes_by_id{$changeid};
    my %change_by_branch;
    if ($changes) {
        foreach my $chg (@$changes) {
            my $br = $$chg{src};
            $change_by_branch{$br} = $chg;
        }
    }
    $change = $change_by_branch{$lbr};
    my $new = !$change;

    my $option = $sm_option_by_id{$reference // $$commit{id}} // $sm_option_new;
    my $new_only = $option && !defined($$option{tip});
    my $action = $option ? $$option{action} : _SRC_NOOP;
    if ($action == _SRC_COPY || $action == _SRC_HIDE) {
        # Note: We don't bother finding Changes which could be recycled -
        # gpull will clean up.
        # We don't complain about no-ops, as they don't hurt.
        my $label = ($action == _SRC_HIDE) ? "hidden" : "copied";
        if ($new || !$new_only) {
            if ($new) {
                $change = {};
                _init_change($change, $changeid);
            }
            $$change{hide} = ($action == _SRC_HIDE) ? 1 : undef;
            wout(_format_result($commit, $new, "marked as $label"))
                if (!$quiet);
            goto PRINTED;
        }
        print "Already have $$change{key} ($changeid) on $lbr; not $label.\n"
            if ($debug);
        goto FOUND;
    }
    if ($new || ($action == _SRC_MOVE && !$new_only)) {
        my $schange;
        my $sbr = ($action == _SRC_MOVE) ? $$option{branch} : undef;
        if (defined($sbr)) {
            $schange = $change_by_branch{$sbr};
            if (!$schange) {
                werr("Change $changeid does not exist on '$sbr'; cannot move.\n");
                goto FAIL;
            }
            if ($$schange{hide}) {
                werr("Change $changeid is hidden on '$sbr'; cannot move.\n");
                goto FAIL;
            }
        } else {
            my (@vanished, @persisting);
            if (_find_candidate_sources($changes, \@vanished, \@persisting)) {
                print "Need to come back for $changeid.\n" if ($debug);
                return undef;
            }
            if ($new) {
                if (@vanished) {
                    if (@vanished > 1) {
                        werr(_format_result(
                                $commit, 1, "was previously on %s.\n"
                                ."  Use --move with a source, or use --copy/--hide",
                                _format_branches(\@vanished)));
                        goto FAIL;
                    }
                    $change = pop @vanished;
                    # Note: this is a post-fact notice, so if the user failed to use
                    # --copy/--hide in advance, they'll need to fix it laboriously.
                    # This seems acceptable, as moving is the much more common case.
                    wout(_format_result(
                            $commit, 1, "was previously on %s. Inferring move",
                            _format_branch($$change{src})))
                        if (!$quiet);
                    goto PRINTED;
                }
                if (!@persisting) {
                    # This is the common case: an entirely new Change.
                    $change = {};
                    _init_change($change, $changeid);
                    goto CHANGED;
                }
                if ($action != _SRC_MOVE) {
                    werr(_format_result(
                            $commit, 1, "exists also on %s.\n"
                            ."  Prune unneeded copies, or use --move/--copy/--hide",
                            _format_branches_raw(\@persisting)));
                    goto FAIL;
                }
            } else { # implies $action == _SRC_MOVE
                if (!$$change{hide}) {
                    # Attempts to move over active Changes are most likely mistakes.
                    werr(_format_result(
                            $commit, 0, "is not hidden.\n"
                            ."  Pass a source to --move if you really mean it"));
                    goto FAIL;
                }
                if (@vanished) {
                    if (@vanished > 1) {
                        werr(_format_result(
                                $commit, 0, "was previously on %s.\n"
                                ."  Pass a source to --move to disambiguate",
                                _format_branches(\@vanished)));
                        goto FAIL;
                    }
                    $schange = pop @vanished;
                } elsif (!@persisting) {
                    werr(_format_result(
                            $commit, 0, "exists on no other branch. Use --copy to unhide it"));
                    goto FAIL;
                }
            }
            if (!$schange) {
                if (@persisting > 1) {
                    werr(_format_result(
                            $commit, $new, "exists also on %s.\n"
                            ."  Prune unneeded copies, pass a source to --move,"
                                ." or use --copy/--hide",
                            _format_branches_raw(\@persisting)));
                    goto FAIL;
                }
                $schange = pop @persisting;
            }
            $sbr = $$schange{src};
        }  # !defined($sbr)
        if ($change) {
            # If the slot on the current branch is busy, we need to free it first.
            # Note that this might be a hidden entry from a previous --move/--hide.
            _obliterate_change($change, $changes);
        }
        if (!$$schange{vanished}) {
            # If the Change still does or might exist on the source branch, we
            # need to create a hidden entry for it, so it does not appear to be
            # new next time around.
            my %chg = (src => $sbr, grp => $$schange{grp}, hide => 1);
            _init_change(\%chg, $changeid);
            print "... and hidden on $sbr.\n" if ($debug);
        }
        wout(_format_result($commit, $new, "moved from %s", _format_branch($sbr)))
            if (!$quiet);
        $change = $schange;
        goto PRINTED;
    }
    print "Already have $$change{key} ($changeid) on $lbr; leaving alone.\n"
        if ($debug);
    goto FOUND;

  PRINTED:
    $sm_printed = 1 if (!$quiet);
  CHANGED:
    $$change{src} = $lbr;
    $sm_changed = 1;
  FOUND:
    $$commit{change} = $change;
    return $change;

  FAIL:
    $sm_failed = 1;
    return undef;
}

sub source_map_finish()
{
    exit(1) if ($sm_failed);
    print "\n" if ($sm_printed);  # Delimit from remaining output.
    $sm_printed = 0;
}

sub _source_map_finish_initial()
{
    foreach my $option (@sm_options) {
        # Only --copy & --hide imply grouping.
        my $action = $$option{action};
        next if ($action != _SRC_COPY && $action != _SRC_HIDE);

        my $commits = $$option{commits};
        next if (!$commits);
        assign_series(changes_from_commits($commits));
        $sm_changed = 1;
    }

    source_map_finish();
    save_state() if ($sm_changed);
}

# Update the target branches of local Changes according to the data
# from a Gerrit query.
# Each SHA1 is assigned to exactly one PatchSet, which in turn is
# assigned to exactly one Change. That way we can identify each Change
# by the last SHA1 we pushed it with, irrespective of the branch it is
# targeting. If the latter changes on Gerrit, we just follow it, as
# this is what the user usually wants.
sub _update_target_branches($)
{
    my ($ginfos) = @_;

    state %trackable_changes;
    if (!%trackable_changes) {
        # Changes on multiple local branches may have the same _pushed commit,
        # so make sure to collect only the Changes on the current branch.
        foreach my $change (values %change_by_key) {
            next if ($$change{garbage});
            next if (!$$change{local});
            my $pushed = $$change{pushed};
            $trackable_changes{$pushed} = $change if (defined($pushed));
        }
    }

    my @changed;
    my $need_save;
    foreach my $ginfo (@$ginfos) {
        foreach my $rev (@{$$ginfo{revs}}) {
            my $change = $trackable_changes{$$rev{id}};
            next if (!$change);
            my $pbr = $$change{tgt} // "";
            my $abr = $$ginfo{branch};
            next if ($pbr eq $abr);
            # Old gpush versions don't store the target branch. Make no noise about that.
            if ((length($pbr) && !$quiet) || $debug) {
                $$change{annotation} = "  [$pbr => $abr]";
                push @changed, $change;
            }
            $$change{tgt} = $abr;
            $$change{ntgt} = undef
                if (($$change{ntgt} // "") eq $abr);
            $need_save = 1;
        }
    }
    if (@changed) {
        my @reports;
        report_flowed(\@reports,
                "Notice: Adjusting ".int(@changed)." Change(s) to server-side re-targeting:");
        report_local_changes(\@reports, \@changed);
        print format_reports(\@reports)."\n";  # Delimit from remaining output.
        # The Changes may be printed again later.
        delete $$_{annotation} foreach (@changed);
    }
    save_state() if ($need_save);
}

#######################
# gerrit ssh handling #
#######################

# URL for pushing/fetching from the Gerrit instance.
our $gerrit_url;
# SSH command with arguments for connecting the Gerrit instance.
our @gerrit_ssh;
# Target repository name on the Gerrit instance.
our $gerrit_project;

# Extract Gerrit configuration from the previously determined Gerrit remote.
sub set_gerrit_config($)
{
    my ($rmt) = @_;

    my $url = git_config('remote.'.$rmt.'.pushurl');
    $url = git_config('remote.'.$rmt.'.url') if (!$url);
    fail("Remote '$rmt' does not exist.\n") if (!$url);
    $url = _rewrite_git_push_url($url);
    $gerrit_url = $url;
    if ($url =~ m,^ssh://([^/:]+)(?::(\d+))?/(.*?)(?:\.git)?/?$,) {
        push @gerrit_ssh, '-p', $2 if (defined($2));
        push @gerrit_ssh, $1;
        $gerrit_project = $3;
    } elsif ($url =~ m,^([^/:]+):([^/].*?)(?:\.git)?/?$,) {
        push @gerrit_ssh, $1;
        $gerrit_project = $2;
    } else {
        fail("Remote '$rmt' does not use a supported protocol.\n")
    }
    if (@gerrit_ssh) {
        # Precedence according to git/connect.c:fill_ssh_args()
        my $gssh = $ENV{GIT_SSH_COMMAND} // git_config('core.sshcommand');
        if (defined($gssh)) {
            # Should use shell rules here, but that seems like overkill.
            unshift @gerrit_ssh, split(/ /, $gssh);
        } else {
            unshift @gerrit_ssh, $ENV{GIT_SSH} // 'ssh';
        }
    }
}

sub query_gerrit_only($;$)
{
    my ($ids, $extra) = @_;

    my @ginfos;
    my $info = open_cmd_pipe(0, @gerrit_ssh, 'gerrit', 'query', '--format', 'JSON',
                                '--no-limit', '--patch-sets', $extra ? @$extra : (),
                                "project:$gerrit_project", '\\('.join(' OR ', @$ids).'\\)');
    while (read_process($info)) {
        my $review = decode_json($_);
        defined($review) or fail("Cannot decode JSON string '".chomp($_)."'\n");
        my ($key, $changeid) = ($$review{'number'}, $$review{'id'});
        next if (!defined($key) || !defined($changeid));
        my $ginfo = \%{$gerrit_info_by_key{$key}};
        push @{$gerrit_infos_by_id{$changeid}}, $ginfo;
        my ($subject, $status) = ($$review{'subject'}, $$review{'status'});
        defined($subject) or fail("Huh?! $changeid has no subject?\n");
        defined($status) or fail("Huh?! $changeid has no status?\n");
        my ($branch, $topic) = ($$review{'branch'}, $$review{'topic'});
        defined($branch) or fail("Huh?! $changeid has no branch?\n");
        my $pss = $$review{'patchSets'};
        defined($pss) or fail("Huh?! $changeid has no PatchSets?\n");
        my (@revs, %rev_map);
        foreach my $cps (@{$pss}) {
            my ($number, $ts, $revision, $base, $ref) =
                    ($$cps{'number'}, $$cps{'createdOn'}, $$cps{'revision'},
                     $$cps{'gpush-base'}, $$cps{'ref'});
            defined($number) or fail("Huh?! PatchSet in $changeid has no number?\n");
            defined($ts) or fail("Huh?! PatchSet $number in $changeid has no timestamp?\n");
            defined($revision) or fail("Huh?! PatchSet $number in $changeid has no commit?\n");
            defined($ref) or fail("Huh?! PatchSet $number in $changeid has no ref?\n");
            my %rev = (
                id => $revision,
                ps => $number,
                ts => int($ts),
                base => $base,
                ref => $ref
            );
            $revs[$number] = \%rev;
            $rev_map{$revision} = \%rev;
            $gerrit_info_by_sha1{$revision} = $ginfo;
        }
        $$ginfo{key} = $key;
        $$ginfo{id} = $changeid;
        $$ginfo{subject} = $subject;
        $$ginfo{status} = $status;
        $$ginfo{branch} = $branch;
        $$ginfo{topic} = $topic;
        $$ginfo{revs} = [ grep { $_ } @revs ];  # Drop deleted ones.
        $$ginfo{rev_by_id} = \%rev_map;
        my $rvrs = $$review{'allReviewers'};
        if ($rvrs) {
            # Note: This does not differentiate between reviewers and CCs.
            # Reported upstream as https://crbug.com/gerrit/11709
            my %reviewers;
            foreach my $rvr (@$rvrs) {
                foreach my $ky ('name', 'email', 'username') {
                    my $val = $$rvr{$ky};
                    $reviewers{$val} = RVRTYPE_ANY if (defined($val));
                }
            }
            $$ginfo{reviewers} = \%reviewers;
        }
        push @ginfos, $ginfo;
    }
    close_process($info);
    return \@ginfos;
}

sub query_gerrit($;$)
{
    my ($ids, $extra) = @_;

    _update_target_branches(query_gerrit_only($ids, $extra));
}

#############################
# export all public symbols #
#############################

sub import()
{
    no strict 'refs';

    my %imported = map { $_ => 1 } @_imported;
    undef @_imported;
    while (my ($name, $symbol) = each %{__PACKAGE__.'::'}) {
        next if (defined($imported{$name}));
        next if ($name =~ /^(_.*|BEGIN|END|a|b|import)$/);
        # $symbol values referring to constants are resolved, but we want to alias the
        # inline function. Other values are typeglobs which can be aliased directly.
        *{caller.'::'.$name} = !length(ref($symbol)) ? *$symbol : \&{__PACKAGE__.'::'.$name};
    }
}

1;