summaryrefslogtreecommitdiff
path: root/Porting/updateAUTHORS.pm
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2022-08-09 17:21:35 +0200
committerYves Orton <demerphq@gmail.com>2022-08-21 12:09:05 +0200
commit65e5fd895adae09ebb011480c4c0347060db8330 (patch)
tree82e4aeebe160033162d70c93aa24d8bd9dcf71f7 /Porting/updateAUTHORS.pm
parent69055d50eddbdf148566931792772e672a6ecbcf (diff)
downloadperl-65e5fd895adae09ebb011480c4c0347060db8330.tar.gz
updateAUTHORS.pm - remove redundant data from .mailmap
A number of devs have noted and raised concerns that having .mailmap entries where the LHS and RHS of the entry is redundant. A few have also expressed the view that this exposes unnecessarily exposes their email address in an easily harvestible form. On the technical level as far as git specifically is concerned it is true this data is redundant, as git uses .mailmap to transform user data that match the RHS of an entry into the value on LHS, and when they are the same obviously it is a no-op. However on the technical level for our infra these entries are not redundant. We can use them to identify and correctly respond to many cases of manual update of the AUTHORS file, for instance changing ones preferred name. With the .mailmap entries we have the data to identify the old preferred name, and join it together with the unchanged email for the user and then automatically update their .mailmap entries. This is why these entries were created originally. However, I believe that this functionality is not useful enough to require us to have discussions with contributors on this subject on a regular basis. We can add command line options that allow people to change the AUTHORS file and the .mailmap file properly, so we can drop the "redundant" data and avoid the need to talk about why it is there. The required functionality for changing names will come in a follow up patch in this PR.
Diffstat (limited to 'Porting/updateAUTHORS.pm')
-rw-r--r--Porting/updateAUTHORS.pm61
1 files changed, 41 insertions, 20 deletions
diff --git a/Porting/updateAUTHORS.pm b/Porting/updateAUTHORS.pm
index 4e062ec0f4..1c323f856b 100644
--- a/Porting/updateAUTHORS.pm
+++ b/Porting/updateAUTHORS.pm
@@ -342,6 +342,7 @@ sub read_authors_file {
and $email ne "unknown";
$author_info{"name2email"}{$name}= $email
if $name and $name ne "unknown";
+ $author_info{"clean_full"}{ __fold_trim_ws($line) }= $line;
}
close $in_fh
or die "Failed to close '$authors_file': $!";
@@ -489,6 +490,7 @@ sub update_mailmap_file {
foreach
my $line (@$mailmap_preamble, __sorted_hash_keys($mailmap_hash),)
{
+ next if $line =~ m!\A(.*) \1\z!;
print $out encode_utf8($line), "\n"
or die "Failed to print to scalar buffer handle: $!";
}
@@ -535,7 +537,8 @@ sub parse_orig_mailmap_hash {
my $line_num= $mailmap_hash->{$line};
$line =~ /^ \s* (?: ( [^<>]*? ) \s+ )? <([^<>]*)>
(?: \s+ (?: ( [^<>]*? ) \s+ )? <([^<>]*)> )? \s* \z /x
- or die encode_utf8 "Failed to parse line num $line_num: '$line'";
+ or die encode_utf8
+ "Failed to parse '$self->{mailmap_file}' line num $line_num: '$line'\n";
if (!$1 or !$2) {
die encode_utf8 "Both preferred name and email are mandatory ",
"in line num $line_num: '$line'";
@@ -620,14 +623,21 @@ sub _author_to_mailmap {
sub check_fix_mailmap_hash {
my ($self)= @_;
- my $mailmap_hash= $self->{orig_mailmap_hash};
+ my $orig_mailmap_hash= $self->{orig_mailmap_hash};
my $author_info= $self->{author_info};
-
+ foreach my $key (keys %{ $author_info->{clean_full} }) {
+ $key .= " <unknown>"
+ unless $key =~ /\s+(?:<[^>]+>|\@\w+)\z/;
+ $key =~ s/\s+(\@\w+)\z/ <$1>/;
+ $orig_mailmap_hash->{"$key $key"} //= -1;
+ }
my $parsed= $self->parse_orig_mailmap_hash();
my @fixed;
my %seen_map;
my %pref_groups;
+ my $remove_no_names_with_overlaps= 0;
+
# first pass through the data, do any conversions, eg, LC
# the email address, decode any MIME-Header style email addresses.
# We also correct any preferred name entries so they match what
@@ -683,25 +693,34 @@ sub check_fix_mailmap_hash {
}
}
- # Build an index of "preferred name/email" to other-email, other name
- # we use this later to remove redundant entries missing a name.
- $pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }=
- [ $pname, $pemail, $oname, $oemail, $line_num ];
+ my $rec= [ $pname, $pemail, $oname, $oemail, $line_num ];
+ if ($remove_no_names_with_overlaps) {
+
+ # Build an index of "preferred name/email" to other-email, other name
+ # we use this later to remove redundant entries missing a name.
+ $pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }= $rec;
+ }
+ else {
+ push @fixed, $rec;
+ }
}
- # this removes entries like
- # Joe <blogs> <whatever>
- # where there is a corresponding
- # Joe <blogs> Joe X <blogs>
- foreach my $pref (__sorted_hash_keys(\%pref_groups)) {
- my $entries= $pref_groups{$pref};
- foreach my $email (__sorted_hash_keys($entries)) {
- my @names= __sorted_hash_keys($entries->{$email});
- if ($names[0] eq "" and @names > 1) {
- shift @names;
- }
- foreach my $name (@names) {
- push @fixed, $entries->{$email}{$name};
+ if ($remove_no_names_with_overlaps) {
+
+ # this removes entries like
+ # Joe <blogs> <whatever>
+ # where there is a corresponding
+ # Joe <blogs> Joe X <whatever>
+ foreach my $pref (__sorted_hash_keys(\%pref_groups)) {
+ my $entries= $pref_groups{$pref};
+ foreach my $email (__sorted_hash_keys($entries)) {
+ my @names= __sorted_hash_keys($entries->{$email});
+ if (0 and $names[0] eq "" and @names > 1) {
+ shift @names;
+ }
+ foreach my $name (@names) {
+ push @fixed, $entries->{$email}{$name};
+ }
}
}
}
@@ -902,6 +921,8 @@ sub new {
$self->read_exclude_file();
+ die Dumper(\%self) if $self{dump_opts};
+
return $self;
}