diff options
author | Yves Orton <demerphq@gmail.com> | 2022-08-09 17:21:35 +0200 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2022-08-21 12:09:05 +0200 |
commit | 65e5fd895adae09ebb011480c4c0347060db8330 (patch) | |
tree | 82e4aeebe160033162d70c93aa24d8bd9dcf71f7 /Porting/updateAUTHORS.pm | |
parent | 69055d50eddbdf148566931792772e672a6ecbcf (diff) | |
download | perl-65e5fd895adae09ebb011480c4c0347060db8330.tar.gz |
updateAUTHORS.pm - remove redundant data from .mailmap
A number of devs have noted and raised concerns that having .mailmap
entries where the LHS and RHS of the entry is redundant. A few have also
expressed the view that this exposes unnecessarily exposes their email
address in an easily harvestible form.
On the technical level as far as git specifically is concerned it is
true this data is redundant, as git uses .mailmap to transform user data
that match the RHS of an entry into the value on LHS, and when they are
the same obviously it is a no-op. However on the technical level for our
infra these entries are not redundant. We can use them to identify and
correctly respond to many cases of manual update of the AUTHORS file,
for instance changing ones preferred name. With the .mailmap entries we
have the data to identify the old preferred name, and join it together
with the unchanged email for the user and then automatically update
their .mailmap entries. This is why these entries were created
originally.
However, I believe that this functionality is not useful enough to
require us to have discussions with contributors on this subject on a
regular basis. We can add command line options that allow people to
change the AUTHORS file and the .mailmap file properly, so we can drop
the "redundant" data and avoid the need to talk about why it is there.
The required functionality for changing names will come in a follow up
patch in this PR.
Diffstat (limited to 'Porting/updateAUTHORS.pm')
-rw-r--r-- | Porting/updateAUTHORS.pm | 61 |
1 files changed, 41 insertions, 20 deletions
diff --git a/Porting/updateAUTHORS.pm b/Porting/updateAUTHORS.pm index 4e062ec0f4..1c323f856b 100644 --- a/Porting/updateAUTHORS.pm +++ b/Porting/updateAUTHORS.pm @@ -342,6 +342,7 @@ sub read_authors_file { and $email ne "unknown"; $author_info{"name2email"}{$name}= $email if $name and $name ne "unknown"; + $author_info{"clean_full"}{ __fold_trim_ws($line) }= $line; } close $in_fh or die "Failed to close '$authors_file': $!"; @@ -489,6 +490,7 @@ sub update_mailmap_file { foreach my $line (@$mailmap_preamble, __sorted_hash_keys($mailmap_hash),) { + next if $line =~ m!\A(.*) \1\z!; print $out encode_utf8($line), "\n" or die "Failed to print to scalar buffer handle: $!"; } @@ -535,7 +537,8 @@ sub parse_orig_mailmap_hash { my $line_num= $mailmap_hash->{$line}; $line =~ /^ \s* (?: ( [^<>]*? ) \s+ )? <([^<>]*)> (?: \s+ (?: ( [^<>]*? ) \s+ )? <([^<>]*)> )? \s* \z /x - or die encode_utf8 "Failed to parse line num $line_num: '$line'"; + or die encode_utf8 + "Failed to parse '$self->{mailmap_file}' line num $line_num: '$line'\n"; if (!$1 or !$2) { die encode_utf8 "Both preferred name and email are mandatory ", "in line num $line_num: '$line'"; @@ -620,14 +623,21 @@ sub _author_to_mailmap { sub check_fix_mailmap_hash { my ($self)= @_; - my $mailmap_hash= $self->{orig_mailmap_hash}; + my $orig_mailmap_hash= $self->{orig_mailmap_hash}; my $author_info= $self->{author_info}; - + foreach my $key (keys %{ $author_info->{clean_full} }) { + $key .= " <unknown>" + unless $key =~ /\s+(?:<[^>]+>|\@\w+)\z/; + $key =~ s/\s+(\@\w+)\z/ <$1>/; + $orig_mailmap_hash->{"$key $key"} //= -1; + } my $parsed= $self->parse_orig_mailmap_hash(); my @fixed; my %seen_map; my %pref_groups; + my $remove_no_names_with_overlaps= 0; + # first pass through the data, do any conversions, eg, LC # the email address, decode any MIME-Header style email addresses. # We also correct any preferred name entries so they match what @@ -683,25 +693,34 @@ sub check_fix_mailmap_hash { } } - # Build an index of "preferred name/email" to other-email, other name - # we use this later to remove redundant entries missing a name. - $pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }= - [ $pname, $pemail, $oname, $oemail, $line_num ]; + my $rec= [ $pname, $pemail, $oname, $oemail, $line_num ]; + if ($remove_no_names_with_overlaps) { + + # Build an index of "preferred name/email" to other-email, other name + # we use this later to remove redundant entries missing a name. + $pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }= $rec; + } + else { + push @fixed, $rec; + } } - # this removes entries like - # Joe <blogs> <whatever> - # where there is a corresponding - # Joe <blogs> Joe X <blogs> - foreach my $pref (__sorted_hash_keys(\%pref_groups)) { - my $entries= $pref_groups{$pref}; - foreach my $email (__sorted_hash_keys($entries)) { - my @names= __sorted_hash_keys($entries->{$email}); - if ($names[0] eq "" and @names > 1) { - shift @names; - } - foreach my $name (@names) { - push @fixed, $entries->{$email}{$name}; + if ($remove_no_names_with_overlaps) { + + # this removes entries like + # Joe <blogs> <whatever> + # where there is a corresponding + # Joe <blogs> Joe X <whatever> + foreach my $pref (__sorted_hash_keys(\%pref_groups)) { + my $entries= $pref_groups{$pref}; + foreach my $email (__sorted_hash_keys($entries)) { + my @names= __sorted_hash_keys($entries->{$email}); + if (0 and $names[0] eq "" and @names > 1) { + shift @names; + } + foreach my $name (@names) { + push @fixed, $entries->{$email}{$name}; + } } } } @@ -902,6 +921,8 @@ sub new { $self->read_exclude_file(); + die Dumper(\%self) if $self{dump_opts}; + return $self; } |