diff options
author | Yves Orton <demerphq@gmail.com> | 2022-04-14 05:55:15 +0200 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2022-04-19 05:41:19 -0600 |
commit | a050bf18828e23a630cb58453b2b43c167204282 (patch) | |
tree | a3a5737b7a349b2998f106f3621d4c28370a4dce | |
parent | 692b8075436fb40aab020faef4c021e9d70e040b (diff) | |
download | perl-a050bf18828e23a630cb58453b2b43c167204282.tar.gz |
regen/mph.pl - perltidy file for style consistency
and document the perltidy options used so any future maintainers
can follow the style of the file more easily.
-rw-r--r-- | regen/mph.pl | 291 |
1 files changed, 171 insertions, 120 deletions
diff --git a/regen/mph.pl b/regen/mph.pl index 7b097da66a..3fcd2046f0 100644 --- a/regen/mph.pl +++ b/regen/mph.pl @@ -4,19 +4,25 @@ use warnings; use Data::Dumper; use Carp; use Text::Wrap; + +# The style of this file is determined by: +# +# perltidy -w -ple -bbb -bbc -bbs -nolq -l=80 -noll -nola -nwls='=' \ +# -isbc -nolc -otr -kis -ci=4 -se -sot -sct -nsbl -pt=2 -fs \ +# -fsb='##!' -fse='##.' + use constant { FNV32_PRIME => 16777619, - U8_MAX => 0xFF, - U16_MAX => 0xFFFF, - U32_MAX => 0xFFFFFFFF, + U8_MAX => 0xFF, + U16_MAX => 0xFFFF, + U32_MAX => 0xFFFFFFFF, }; our $DEBUG= 0; my $RSHIFT= 8; my $MASK= U32_MAX; -my $MAX_SEED2= U16_MAX; # currently the same, but it isn't required. -my $IS_32BIT= !eval { pack "Q", 1}; - +my $MAX_SEED2= U16_MAX; # currently the same, but it isn't required. +my $IS_32BIT= !eval { pack "Q", 1 }; # The basic idea is that you have a two level structure, and effectively # hash the key twice. @@ -44,11 +50,12 @@ sub fnv1a_32 { my ($key, $seed)= @_; use integer; - my $hash = 0+$seed; + my $hash= 0 + $seed; foreach my $char (split //, $key) { - $hash = $hash ^ ord($char); + $hash= $hash ^ ord($char); + # the & U32_MAX is to simulate 32 bit ints on a 64 bit integer Perl. - $hash = ($hash * FNV32_PRIME) & U32_MAX; + $hash= ($hash * FNV32_PRIME) & U32_MAX; } # The hash can end up negative on 32 bit Perls due to use integer being @@ -62,24 +69,24 @@ sub fnv1a_32 { sub build_perfect_hash { my ($hash)= @_; - my $n= 0+keys %$hash; + my $n= 0 + keys %$hash; my $seed1= unpack("N", "Perl") - 1; my $hash_to_key; my $key_to_hash; my $key_buckets; SEED1: - for ($seed1++;1;$seed1++) { + for ($seed1++ ; 1 ; $seed1++) { my %hash_to_key; my %key_to_hash; my %key_buckets; my %high; foreach my $key (sort keys %$hash) { - my $h= fnv1a_32($key,$seed1); + my $h= fnv1a_32($key, $seed1); next SEED1 if exists $hash_to_key{$h}; - next SEED1 if $high{$h >> $RSHIFT}++; + next SEED1 if $high{ $h >> $RSHIFT }++; $hash_to_key{$h}= $key; $key_to_hash{$key}= $h; - push @{$key_buckets{$h % $n}}, $key; + push @{ $key_buckets{ $h % $n } }, $key; } $hash_to_key= \%hash_to_key; $key_to_hash= \%key_to_hash; @@ -90,15 +97,22 @@ sub build_perfect_hash { my %token; my @first_level; my @second_level; - foreach my $first_idx (sort { @{$key_buckets->{$b}} <=> @{$key_buckets->{$a}} || $a <=> $b } keys %$key_buckets) { + foreach my $first_idx ( + sort { + @{ $key_buckets->{$b} } <=> @{ $key_buckets->{$a} } + || $a <=> $b + } keys %$key_buckets + ) + { my $keys= $key_buckets->{$first_idx}; + #printf "got %d keys in bucket %d\n", 0+@$keys, $first_idx; my $seed2; SEED2: - for ($seed2=1;1;$seed2++) { + for ($seed2= 1 ; 1 ; $seed2++) { goto FIND_SEED if $seed2 > $MAX_SEED2; my @idx= map { - ( ( ( $key_to_hash->{$_} >> $RSHIFT ) ^ $seed2 ) & $MASK ) % $n + ((($key_to_hash->{$_} >> $RSHIFT) ^ $seed2) & $MASK) % $n } @$keys; my %seen; next SEED2 if grep { $second_level[$_] || $seen{$_}++ } @idx; @@ -106,27 +120,25 @@ sub build_perfect_hash { @second_level[@idx]= map { +{ - key => $_, - hash => $key_to_hash->{$_}, - value => $hash->{$_}, - seed2 => 0, + key => $_, + hash => $key_to_hash->{$_}, + value => $hash->{$_}, + seed2 => 0, } } @$keys; last; } } - $second_level[$_]{seed2}= $first_level[$_]||0, $second_level[$_]{idx}= $_ for 0 .. $#second_level; + $second_level[$_]{seed2}= $first_level[$_] || 0, $second_level[$_]{idx}= $_ + for 0 .. $#second_level; return $seed1, \@second_level; } sub _sort_keys_longest_first { my ($hash)= shift; - my @keys= sort { - length($b) <=> length ($a) || - $a cmp $b - } keys %$hash; + my @keys= sort { length($b) <=> length($a) || $a cmp $b } keys %$hash; return \@keys; } @@ -154,24 +166,27 @@ sub build_split_words { my $blob= ""; if ($preprocess) { my %parts; - foreach my $key (@{_sort_keys_longest_first($hash)}) { - my ($prefix,$suffix); - if ($key=~/^([^=]+=)([^=]+)\z/) { - ($prefix,$suffix)= ($1, $2); + foreach my $key (@{ _sort_keys_longest_first($hash) }) { + my ($prefix, $suffix); + if ($key =~ /^([^=]+=)([^=]+)\z/) { + ($prefix, $suffix)= ($1, $2); $parts{$suffix}++; + #$parts{$prefix}++; - } else { + } + else { $prefix= $key; $parts{$prefix}++; } } - foreach my $part (@{_sort_keys_longest_first(\%parts)}) { + foreach my $part (@{ _sort_keys_longest_first(\%parts) }) { $blob .= $part; } printf "Using preprocessing, initial blob size is %d chars.\n", length($blob); - } else { + } + else { print "No preprocessing, starting with an empty blob.\n"; } my ($res, $old_res, $added, $passes); @@ -182,9 +197,9 @@ sub build_split_words { $passes++; KEY: - foreach my $key (@{_sort_keys_longest_first($hash)}) { + foreach my $key (@{ _sort_keys_longest_first($hash) }) { next if exists $res->{$key}; - if (index($blob,$key) >= 0 ) { + if (index($blob, $key) >= 0) { my $idx= length($key); if ($DEBUG and $old_res and $old_res->{$key} != $idx) { print "changing: $key => $old_res->{$key} : $idx\n"; @@ -197,11 +212,11 @@ sub build_split_words { my $best_prefix= $key; my $best_suffix= ""; my $min= 1; - foreach my $idx (reverse $min .. length($key)-1) { - my $prefix= substr($key,0,$idx); - my $suffix= substr($key,$idx); - my $i1= index($blob,$prefix)>=0; - my $i2= index($blob,$suffix)>=0; + foreach my $idx (reverse $min .. length($key) - 1) { + my $prefix= substr($key, 0, $idx); + my $suffix= substr($key, $idx); + my $i1= index($blob, $prefix) >= 0; + my $i2= index($blob, $suffix) >= 0; if ($i1 and $i2) { if ($DEBUG and $old_res and $old_res->{$key} != $idx) { print "changing: $key => $old_res->{$key} : $idx\n"; @@ -210,14 +225,16 @@ sub build_split_words { $appended{$prefix}++; $appended{$suffix}++; next KEY; - } elsif ($i1) { + } + elsif ($i1) { if (length $suffix <= length $append) { $best= $idx; $append= $suffix; $best_prefix= $prefix; $best_suffix= $suffix; } - } elsif ($i2) { + } + elsif ($i2) { if (length $prefix <= length $append) { $best= $idx; $append= $prefix; @@ -229,6 +246,7 @@ sub build_split_words { if ($DEBUG and $old_res and $old_res->{$key} != $best) { print "changing: $key => $old_res->{$key} : $best\n"; } + #print "$best_prefix|$best_suffix => $best => $append\n"; $res->{$key}= $best; $blob .= $append; @@ -240,51 +258,54 @@ sub build_split_words { if ($added < length $blob) { printf "Appended %d chars. Blob is %d chars long.\n", $added, length($blob); - } else { + } + else { printf "Blob is %d chars long.\n", $added; } - } elsif ($passes>1) { + } + elsif ($passes > 1) { print "Blob needed no changes.\n"; } my $new_blob= ""; - foreach my $part (@{_sort_keys_longest_first(\%appended)}) { - $new_blob .= $part unless index($new_blob,$part)>=0; + foreach my $part (@{ _sort_keys_longest_first(\%appended) }) { + $new_blob .= $part unless index($new_blob, $part) >= 0; } if (length($new_blob) < length($blob)) { printf "Uncorrected new blob length of %d chars is smaller.\n" - . " Correcting new blob...%s", + . " Correcting new blob...%s", length($new_blob), $DEBUG ? "\n" : " "; $blob= $new_blob; $old_res= $res; %appended= (); goto REDO; - } else { + } + else { printf "After %d passes final blob length is %d chars.\n" - . "This is %.2f%% of the raw key length of %d chars.\n\n", - $passes, length($blob), 100*length($blob)/$length_all_keys, + . "This is %.2f%% of the raw key length of %d chars.\n\n", + $passes, length($blob), 100 * length($blob) / $length_all_keys, $length_all_keys; } + # sanity check - die sprintf "not same size? %d != %d", 0+keys %$res, 0+keys %$hash + die sprintf "not same size? %d != %d", 0 + keys %$res, 0 + keys %$hash unless keys %$res == keys %$hash; return ($blob, $res, $length_all_keys); } - sub blob_as_code { - my ($blob,$blob_name)= @_; + my ($blob, $blob_name)= @_; $blob_name ||= "mph_blob"; # output the blob as C code. - my @code= (sprintf "STATIC const unsigned char %s[] =\n",$blob_name); + my @code= (sprintf "STATIC const unsigned char %s[] =\n", $blob_name); my $blob_len= length $blob; while (length($blob)) { - push @code, sprintf qq( "%s"), substr($blob,0,65,""); + push @code, sprintf qq( "%s"), substr($blob, 0, 65, ""); push @code, length $blob ? "\n" : ";\n"; } push @code, "/* $blob_name length: $blob_len */\n"; - return join "",@code; + return join "", @code; } sub print_includes { @@ -296,7 +317,7 @@ sub print_includes { } sub print_defines { - my ($ofh,$defines)= @_; + my ($ofh, $defines)= @_; my $key_len; foreach my $def (keys %$defines) { @@ -309,55 +330,60 @@ sub print_defines { print $ofh "\n"; } - sub build_array_of_struct { - my ($second_level,$blob)= @_; + my ($second_level, $blob)= @_; my %defines; my %tests; my @rows; foreach my $row (@$second_level) { - $defines{$row->{value}}= $row->{idx}+1; - $tests{$row->{key}}= $defines{$row->{value}}; + $defines{ $row->{value} }= $row->{idx} + 1; + $tests{ $row->{key} }= $defines{ $row->{value} }; + ##! my @u16= ( $row->{seed2}, - index($blob,$row->{prefix}//0), - index($blob,$row->{suffix}//0), + index($blob, $row->{prefix} // 0), + index($blob, $row->{suffix} // 0), ); - $_ > U16_MAX and die "panic: value exceeds range of U16" + $_ > U16_MAX and die "panic: value exceeds range of U16" for @u16; my @u8= ( - length($row->{prefix}), + length($row->{prefix}), length($row->{suffix}), ); - $_ > U8_MAX and die "panic: value exceeds range of U8" + $_ > U8_MAX and die "panic: value exceeds range of U8" for @u8; - push @rows, sprintf(" { %5d, %5d, %5d, %3d, %3d, %s } /* %s%s */", - @u16, @u8, $row->{value}, $row->{prefix}, $row->{suffix}); + push @rows, sprintf " { %5d, %5d, %5d, %3d, %3d, %s } /* %s%s */", + @u16, @u8, $row->{value}, $row->{prefix}, $row->{suffix}; + ##. } - return \@rows,\%defines,\%tests; + return \@rows, \%defines, \%tests; } sub make_algo { - my ($second_level, $seed1, $length_all_keys, $smart_blob, $rows, - $blob_name, $struct_name, $table_name, $match_name, $prefix) = @_; + my ( + $second_level, $seed1, $length_all_keys, $smart_blob, + $rows, $blob_name, $struct_name, $table_name, + $match_name, $prefix + )= @_; - $blob_name ||= "mph_blob"; + $blob_name ||= "mph_blob"; $struct_name ||= "mph_struct"; - $table_name ||= "mph_table"; - $prefix ||= "MPH"; + $table_name ||= "mph_table"; + $prefix ||= "MPH"; - my $n= 0+@$second_level; - my $data_size= 0+@$second_level * 8 + length $smart_blob; + my $n= 0 + @$second_level; + my $data_size= 0 + @$second_level * 8 + length $smart_blob; - my @code = "#define ${prefix}_VALt I16\n\n"; + my @code= "#define ${prefix}_VALt I16\n\n"; push @code, "/*\n"; - push @code, sprintf "rows: %s\n", $n; - push @code, sprintf "seed: %s\n", $seed1; + push @code, sprintf "rows: %s\n", $n; + push @code, sprintf "seed: %s\n", $seed1; push @code, sprintf "full length of keys: %d\n", $length_all_keys; - push @code, sprintf "blob length: %d\n", length $smart_blob; - push @code, sprintf "ref length: %d\n", 0+@$second_level * 8; - push @code, sprintf "data size: %d (%%%.2f)\n", $data_size, ($data_size / $length_all_keys) * 100; + push @code, sprintf "blob length: %d\n", length $smart_blob; + push @code, sprintf "ref length: %d\n", 0 + @$second_level * 8; + push @code, sprintf "data size: %d (%%%.2f)\n", $data_size, + ($data_size / $length_all_keys) * 100; push @code, "*/\n\n"; push @code, blob_as_code($smart_blob, $blob_name); @@ -377,10 +403,13 @@ EOF_CODE push @code, "#define ${prefix}_RSHIFT $RSHIFT\n"; push @code, "#define ${prefix}_BUCKETS $n\n\n"; push @code, sprintf "STATIC const U32 ${prefix}_SEED1 = 0x%08x;\n", $seed1; - push @code, sprintf "STATIC const U32 ${prefix}_FNV32_PRIME = 0x%08x;\n\n", FNV32_PRIME; + push @code, sprintf "STATIC const U32 ${prefix}_FNV32_PRIME = 0x%08x;\n\n", + FNV32_PRIME; push @code, "/* The comments give the input key for the row it is in */\n"; - push @code, "STATIC const struct $struct_name $table_name\[${prefix}_BUCKETS] = {\n", join(",\n", @$rows)."\n};\n\n"; + push @code, + "STATIC const struct $struct_name $table_name\[${prefix}_BUCKETS] = {\n", + join(",\n", @$rows) . "\n};\n\n"; push @code, <<"EOF_CODE"; ${prefix}_VALt $match_name( const unsigned char * const key, const U16 key_len ) { const unsigned char * ptr= key; @@ -415,8 +444,10 @@ EOF_CODE } sub print_algo { - my ($ofh, $second_level, $seed1, $long_blob, $smart_blob, $rows, - $blob_name, $struct_name, $table_name, $match_name ) = @_; + my ( + $ofh, $second_level, $seed1, $long_blob, $smart_blob, + $rows, $blob_name, $struct_name, $table_name, $match_name + )= @_; if (!ref $ofh) { my $file= $ofh; @@ -425,14 +456,15 @@ sub print_algo { or die "Failed to open '$file': $!"; } - my $code = make_algo( - $second_level, $seed1, $long_blob, $smart_blob, $rows, - $blob_name, $struct_name, $table_name, $match_name ); + my $code= make_algo( + $second_level, $seed1, $long_blob, $smart_blob, $rows, + $blob_name, $struct_name, $table_name, $match_name + ); print $ofh $code; } sub print_main { - my ($ofh,$h_file,$match_name,$prefix)=@_; + my ($ofh, $h_file, $match_name, $prefix)= @_; print $ofh <<"EOF_CODE"; #include "$h_file" @@ -454,33 +486,40 @@ sub print_tests { open my $ofh, ">", $file or die "Failed to open '$file' for writing: $!"; my $num_tests= 2 + keys %$tests_hash; - print $ofh "use strict;\nuse warnings;\nuse Test::More tests => $num_tests;\nmy \@res;"; + print $ofh + "use strict;\nuse warnings;\nuse Test::More tests => $num_tests;\nmy \@res;"; my $bytes= 0; my @tests= sort keys %$tests_hash; - print $ofh "\@res=`./mph_test '$tests[0]/should-not-match' 'should-not-match/$tests[0]'`;\n"; + print $ofh + "\@res=`./mph_test '$tests[0]/should-not-match' 'should-not-match/$tests[0]'`;\n"; print $ofh "ok( \$res[0] =~ /got: 0/,'proper prefix does not match');\n"; print $ofh "ok( \$res[1] =~ /got: 0/,'proper suffix does not match');\n"; + while (@tests) { - my @batch= splice @tests,0,10; + my @batch= splice @tests, 0, 10; my $batch_args= join " ", map { "'$_'" } @batch; print $ofh "\@res=`./mph_test $batch_args`;\n"; - foreach my $i (0..$#batch) { + foreach my $i (0 .. $#batch) { my $key= $batch[$i]; my $want= $tests_hash->{$key}; - print $ofh "ok(\$res[$i]=~/got: (\\d+)/ && \$1 == $want, '$key');\n"; + print $ofh + "ok(\$res[$i]=~/got: (\\d+)/ && \$1 == $want, '$key');\n"; } } close $ofh; } sub print_test_binary { - my ($file,$h_file, $second_level, $seed1, $length_all_keys, - $smart_blob, $rows, $defines, $match_name, $prefix)= @_; + my ( + $file, $h_file, $second_level, $seed1, + $length_all_keys, $smart_blob, $rows, $defines, + $match_name, $prefix + )= @_; open my $ofh, ">", $file or die "Failed to open '$file': $!"; print_includes($ofh); print_defines($ofh, $defines); - print_main($ofh,$h_file,$match_name,$prefix); + print_main($ofh, $h_file, $match_name, $prefix); close $ofh; } @@ -491,15 +530,20 @@ sub make_mph_from_hash { $length_all_keys += length($_) for keys %$hash; # we do this twice because often we can find longer prefixes on the second pass. - my ($smart_blob, $res_to_split)= build_split_words($hash,0,$length_all_keys); + my ($smart_blob, $res_to_split)= + build_split_words($hash, 0, $length_all_keys); { - my ($smart_blob2, $res_to_split2)= build_split_words($hash,1,$length_all_keys); + my ($smart_blob2, $res_to_split2)= + build_split_words($hash, 1, $length_all_keys); if (length($smart_blob) > length($smart_blob2)) { - printf "Using preprocess-smart blob, length: %d (vs %d)\n", length $smart_blob2, length $smart_blob; + printf "Using preprocess-smart blob, length: %d (vs %d)\n", + length $smart_blob2, length $smart_blob; $smart_blob= $smart_blob2; $res_to_split= $res_to_split2; - } else { - printf "Using greedy-smart blob, length: %d (vs %d)\n", length $smart_blob, length $smart_blob2; + } + else { + printf "Using greedy-smart blob, length: %d (vs %d)\n", + length $smart_blob, length $smart_blob2; } } my ($seed1, $second_level)= build_perfect_hash($hash); @@ -507,19 +551,20 @@ sub make_mph_from_hash { # add prefix/suffix data into the bucket info in @$second_level foreach my $bucket_info (@$second_level) { my $key= $bucket_info->{key}; - my $sp= $res_to_split->{$key} - // die "no split pos for '$key'\n"; + my $sp= $res_to_split->{$key} // die "no split pos for '$key'\n"; my ($prefix, $suffix)= unpack "A${sp}A*", $key; $bucket_info->{prefix}= $prefix; $bucket_info->{suffix}= $suffix; } - my ($rows, $defines, $tests)= build_array_of_struct($second_level, $smart_blob); - return ($second_level, $seed1, $length_all_keys, $smart_blob, $rows, $defines, $tests); + my ($rows, $defines, $tests)= + build_array_of_struct($second_level, $smart_blob); + return ($second_level, $seed1, $length_all_keys, $smart_blob, $rows, + $defines, $tests); } sub make_files { - my ($hash,$base_name)= @_; + my ($hash, $base_name)= @_; my $h_name= $base_name . "_algo.h"; my $c_name= $base_name . "_test.c"; @@ -531,14 +576,19 @@ sub make_files { my $prefix= uc($base_name); my ($second_level, $seed1, $length_all_keys, - $smart_blob, $rows, $defines, $tests)= make_mph_from_hash( $hash ); - print_algo( $h_name, - $second_level, $seed1, $length_all_keys, $smart_blob, $rows, - $blob_name, $struct_name, $table_name, $match_name, $prefix ); - print_test_binary( $c_name, $h_name, $second_level, $seed1, $length_all_keys, - $smart_blob, $rows, $defines, - $match_name, $prefix ); - print_tests( $p_name, $tests ); + $smart_blob, $rows, $defines, $tests) + = make_mph_from_hash($hash); + print_algo( + $h_name, $second_level, $seed1, $length_all_keys, + $smart_blob, $rows, $blob_name, $struct_name, + $table_name, $match_name, $prefix + ); + print_test_binary( + $c_name, $h_name, $second_level, $seed1, + $length_all_keys, $smart_blob, $rows, $defines, + $match_name, $prefix + ); + print_tests($p_name, $tests); } unless (caller) { @@ -555,7 +605,8 @@ unless (caller) { next if $to eq $loose; foreach my $key (@keys) { my $copy= $key; - if ($copy=~s/^\Q$to\E(=|\z)/$loose$1/) { + if ($copy =~ s/^\Q$to\E(=|\z)/$loose$1/) { + #print "$key => $copy\n"; $hash{$copy}= $key; } @@ -564,13 +615,13 @@ unless (caller) { } foreach my $key (keys %hash) { my $munged= uc($key); - $munged=~s/\W/__/g; - $hash{$key} = $munged; + $munged =~ s/\W/__/g; + $hash{$key}= $munged; } my $name= shift @ARGV; $name ||= "mph"; - make_files(\%hash,$name); + make_files(\%hash, $name); } 1; |