From fc83a907ba97fe66132bd941ca2a34a040f269c8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 21 Aug 2013 19:04:22 +0000 Subject: git-send-email: Cache generated message-ids, use them when prompting Allow the user to specify a file (sendemail.msgidcachefile) in which to store the message-ids generated by git-send-email, along with time and subject information. When prompting for a Message-ID to be used in In-Reply-To, that file can be used to generate a list of options. When composing v2 or v3 of a patch or patch series, this avoids the need to get one's MUA to display the Message-ID of the earlier email (which is cumbersome in some MUAs) and then copy-paste that. Listing all previously sent emails is useless, so currently only the 10 most "relevant" emails. "Relevant" is based on a simple scoring, which might need to be revised: Count the words in the old subject which also appear in the subject of the first email to be sent; add a bonus if the old email was first in a batch (that is, [00/74] is more likely to be relevant than [43/74]). Resort to comparing timestamps (newer is more relevant) when the scores tie. To limit disk usage, the oldest half of the cached entries are expunged when the cache file exceeds sendemail.msgidcachemaxsize (default 100kB). This also ensures that we will never have to read, score, and sort 1000s of entries on each invocation of git-send-email. Signed-off-by: Rasmus Villemoes Signed-off-by: Junio C Hamano --- git-send-email.perl | 133 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 127 insertions(+), 6 deletions(-) diff --git a/git-send-email.perl b/git-send-email.perl index ac3b02da68..b36a925939 100755 --- a/git-send-email.perl +++ b/git-send-email.perl @@ -203,6 +203,7 @@ my ($validate, $confirm); my (@suppress_cc); my ($auto_8bit_encoding); my ($compose_encoding); +my ($msgid_cache_file, $msgid_cache_maxsize); my ($debug_net_smtp) = 0; # Net::SMTP, see send_message() @@ -237,6 +238,8 @@ my %config_settings = ( "from" => \$sender, "assume8bitencoding" => \$auto_8bit_encoding, "composeencoding" => \$compose_encoding, + "msgidcachefile" => \$msgid_cache_file, + "msgidcachemaxsize" => \$msgid_cache_maxsize, ); my %config_path_settings = ( @@ -796,11 +799,23 @@ sub expand_one_alias { @bcclist = expand_aliases(@bcclist); @bcclist = validate_address_list(sanitize_address_list(@bcclist)); +if ($compose && $compose > 0) { + @files = ($compose_filename . ".final", @files); +} + if ($thread && !defined $initial_reply_to && $prompting) { + my @choices = (); + if ($msgid_cache_file) { + my $first_subject = get_patch_subject($files[0]); + $first_subject =~ s/^GIT: //; + @choices = msgid_cache_getmatches($first_subject, 10); + @choices = map {[$_->{id}, sprintf "[%s] %s", format_2822_time($_->{epoch}), $_->{subject}]} @choices; + } $initial_reply_to = ask( "Message-ID to be used as In-Reply-To for the first email (if any)? ", default => "", - valid_re => qr/\@.*\./, confirm_only => 1); + valid_re => qr/\@.*\./, confirm_only => 1, + choices => \@choices); } if (defined $initial_reply_to) { $initial_reply_to =~ s/^\s* 0) { - @files = ($compose_filename . ".final", @files); -} - # Variables we set as part of the loop over files our ($message_id, %mail, $subject, $reply_to, $references, $message, $needs_confirm, $message_num, $ask_default); @@ -1136,7 +1147,7 @@ sub send_message { my $to = join (",\n\t", @recipients); @recipients = unique_email_list(@recipients,@cc,@bcclist); @recipients = (map { extract_valid_address_or_die($_) } @recipients); - my $date = format_2822_time($time++); + my $date = format_2822_time($time); my $gitversion = '@@GIT_VERSION@@'; if ($gitversion =~ m/..GIT_VERSION../) { $gitversion = Git::version(); @@ -1477,6 +1488,11 @@ foreach my $t (@files) { my $message_was_sent = send_message(); + if ($message_was_sent && $msgid_cache_file && !$dry_run) { + msgid_cache_this($message_id, $message_num == 1 ? 1 : 0, , $time, $subject); + } + $time++; + # set up for the next message if ($thread && $message_was_sent && ($chain_reply_to || !defined $reply_to || length($reply_to) == 0 || @@ -1521,6 +1537,8 @@ sub cleanup_compose_files { $smtp->quit if $smtp; +msgid_cache_write() if $msgid_cache_file && !$dry_run; + sub unique_email_list { my %seen; my @emails; @@ -1569,3 +1587,106 @@ sub body_or_subject_has_nonascii { } return 0; } + +my @msgid_new_entries; +sub msgid_cache_this { + my $msgid = shift; + my $first = shift; + my $epoch = shift; + my $subject = shift; + # Make sure there are no tabs which will confuse us, and save + # some valuable horizontal real-estate by removing redundant + # whitespace. + if ($subject) { + $subject =~ s/^\s+|\s+$//g; + $subject =~ s/\s+/ /g; + } + # Replace undef or the empty string by an actual string. + $subject = '(none)' if (!defined $subject || $subject eq ''); + + push @msgid_new_entries, {id => $msgid, first => $first, subject => $subject, epoch => $epoch}; +} + + +# For now, use a simple tab-separated format: +# +# $id\t$wasfirst\t$unixtime\t$subject\n +sub msgid_cache_read { + my $fh; + my $line; + my @entries; + if (not open ($fh, '<', $msgid_cache_file)) { + # A non-existing cache file is ok, but should we warn if errno != ENOENT? + return (); + } + while ($line = <$fh>) { + chomp($line); + my ($id, $first, $epoch, $subject) = split /\t/, $line; + push @entries, {id=>$id, first=>$first, epoch=>$epoch, subject=>$subject}; + } + close($fh); + return @entries; +} + +sub msgid_cache_getmatches { + my ($first_subject, $maxentries) = @_; + my @list = msgid_cache_read(); + + # We need to find the message-ids which are most likely to be + # useful. There are probably better ways to do this, but for + # now we simply count how many words in the old subject also + # appear in $first_subject. + my %words = map {$_ => 1} msgid_subject_words($first_subject); + for my $item (@list) { + # Emails which were first in a batch are more likely + # to be used for followups (cf. the example in "man + # git-send-email"), so give those a head start. + my $score = $item->{first} ? 3 : 0; + for (msgid_subject_words($item->{subject})) { + $score++ if exists $words{$_}; + } + $item->{score} = $score; + } + @list = sort {$b->{score} <=> $a->{score} || + $b->{epoch} <=> $a->{epoch}} @list; + @list = @list[0 .. $maxentries-1] if (@list > $maxentries); + return @list; +} + +sub msgid_subject_words { + my $subject = shift; + # Ignore initial "[PATCH 02/47]" + $subject =~ s/^\s*\[.*?\]//; + my @words = split /\s+/, $subject; + # Ignore short words. + @words = grep { length > 3 } @words; + return @words; +} + +sub msgid_cache_write { + msgid_cache_do_write(1, \@msgid_new_entries); + + if (defined $msgid_cache_maxsize && $msgid_cache_maxsize =~ m/^\s*([0-9]+)\s*([kKmMgG]?)$/) { + my %SI = ('' => 1, 'k' => 1e3, 'm' => 1e6, 'g' => 1e9); + $msgid_cache_maxsize = $1 * $SI{lc($2)}; + } + else { + $msgid_cache_maxsize = 100000; + } + if (-s $msgid_cache_file > $msgid_cache_maxsize) { + my @entries = msgid_cache_read(); + splice @entries, 0, int(@entries/2); + msgid_cache_do_write(0, \@entries); + } +} + +sub msgid_cache_do_write { + my $append = shift; + my $entries = shift; + my $fh; + if (not open($fh, $append ? '>>' : '>', $msgid_cache_file)) { + die "cannot open $msgid_cache_file for writing: $!"; + } + printf $fh "%s\t%d\t%s\t%s\n", $_->{id}, $_->{first}, $_->{epoch}, $_->{subject} for (@$entries); + close($fh); +} -- cgit v1.2.1