From 54c821ccdc582332e5fb908fce9a08aeddfdf6bd Mon Sep 17 00:00:00 2001 From: dmg Date: Sun, 6 Apr 2014 21:30:59 -0700 Subject: added scripts, not sure if they work --- scripts/license_matcher_modified.pl | 92 ++++++++++++++++++++ scripts/parseLicense.pl | 23 +++++ scripts/sort_package_license_list.pl | 18 ++++ scripts/unify.pl | 161 +++++++++++++++++++++++++++++++++++ 4 files changed, 294 insertions(+) create mode 100644 scripts/license_matcher_modified.pl create mode 100644 scripts/parseLicense.pl create mode 100644 scripts/sort_package_license_list.pl create mode 100644 scripts/unify.pl diff --git a/scripts/license_matcher_modified.pl b/scripts/license_matcher_modified.pl new file mode 100644 index 0000000..53b1732 --- /dev/null +++ b/scripts/license_matcher_modified.pl @@ -0,0 +1,92 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Getopt::Std; +use Set::Object; +use Tie::IxHash; +use Tie::Autotie 'Tie::IxHash'; +use Data::Dumper; + +my %opts =(); +my %licensedictionary=(); +my %numofslashdictionary=(); + +tie %licensedictionary, 'Tie::IxHash'; + +my $prefix="/big2/y-manabe/licensereration/fedora/srcrpm_sandbox/"; + +getopts ("p:l:",\%opts); + +open (PFH,$opts{"p"}); + +while (my $line=){ + $line =~ /(.*);(.*)/; + my $key = $1; + my $license = $2; + #if ($key =~ /\//){ +# $key =~ s/\//\\\//g; + # } + #print "$key\n"; + if (defined $licensedictionary{substr($key,0,2)}{$key}){ + push (@{$licensedictionary{substr($key,0,2)}{$key}},$license); +# print join(',',@{$licensedictionary{$key}}); + }else{ + my @tmp=($license); + if (!defined $licensedictionary{substr($key,0,2)}){ + #tie my %keylicensepairs,'Tie::IxHash'; + #$licensedictionary{substr($key,0,2)}=\%keylicensepairs; + } + $licensedictionary{substr($key,0,2)}{$key}=\@tmp; + my $numofslash = ($key =~ tr /\//\//); + $numofslashdictionary{$key} = $numofslash; + } +} + +close PFH; + +#print Dumper(\%licensedictionary); +#return; + +#foreach my $key(keys %licensedictionary){ +# print $key.";"; +# print join(',',@{$licensedictionary{$key}}); +# print "\n"; +#} + +open (LFH,$opts{"l"}); + +while(my $line=){ + #print $line; + $line =~ /^${prefix}(.*?);(.*?)$/; + my $filepath=$1; + my $slicense=$2; + my @slicenselist=split (/\,/,$slicense); + my $licenseset = Set::Object->new(@slicenselist); + #$licenseset= Set::Object->unique($licenseset); + @slicenselist= sort($licenseset->members); + my $uniqlicensenames = join (',',@slicenselist); + #print "$license\n"; + foreach my $key(keys %{$licensedictionary{substr($filepath,0,2)}}){ + my @tmpdirnames; + my @dirnames = split /\//,$filepath; + if ($#dirnames >= ${numofslashdictionary}{$key}){ + @tmpdirnames=@dirnames[0..$numofslashdictionary{$key}]; + }else{ + next; + } +# print "$filepath,$licensedictionary{substr($filepath,0,2)}{$key},$key\n"; + #print "@{dirnames}[0..$numofslash]\n"; + my $tmpfilepath; + $tmpfilepath = join('/',@tmpdirnames); + #print "$key\n"; + if ($tmpfilepath eq $key){ + print "${prefix}$filepath;$uniqlicensenames;"; + print join(',',@{$licensedictionary{substr($key,0,2)}{$key}}); + print "\n"; + last; + } + } +} + +close LFH; diff --git a/scripts/parseLicense.pl b/scripts/parseLicense.pl new file mode 100644 index 0000000..2dc61ef --- /dev/null +++ b/scripts/parseLicense.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl + +# this parses a line + +#/big2/y-manabe/licensereration/fedora/srcrpm_sandbox/lua-lgi-0.7.1-1.fc19/lgi-0.7.1/lgi/marshal.c;UNKNOWN + +#into +# pkg name: lua-lgi-0.7.1-1.fc19 +# file: lgi-0.7.1/lgi/marshal.c +# license: UNKNOWN + +use strict; + +while (<>) { + chomp; + my @fields = split(';'); + my $license = $fields[1]; + my @f = split('/', $fields[0]); + my $pkg = $f[6]; + my $file = join('/', splice(@f, 7, scalar(@f) -1)); + + print "$pkg;$file;$license\n"; +} diff --git a/scripts/sort_package_license_list.pl b/scripts/sort_package_license_list.pl new file mode 100644 index 0000000..cd20b50 --- /dev/null +++ b/scripts/sort_package_license_list.pl @@ -0,0 +1,18 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +open (FH, $ARGV[0]); +my @list; + +while (my $line = ){ + chomp $line; + push (@list,$line); +} + +@list = sort { ($b =~ tr /\//\//) <=> ($a =~ tr /\//\//) || $b cmp $a } @list; + +foreach my $line(@list){ + print "$line\n"; +} diff --git a/scripts/unify.pl b/scripts/unify.pl new file mode 100644 index 0000000..f518fbb --- /dev/null +++ b/scripts/unify.pl @@ -0,0 +1,161 @@ +#!/usr/bin/perl + +# first pass, unify names of licenses and remove duplicates. + +# we trick regarding gpl related licenses so they are "clustered" together.. +# +# replace GPL with __GPL +# replace exception in the text with ___exception + +use strict; + +my %equiv = ( + "boostV1Ref" => "boostV1", + "X11" => "X11mit", + "X11Festival" => "X11mit", + "X11mitNoSellNoDocDocBSDvar" => "X11mit", + "X11mitwithoutSell" => 'X11mit', + "X11mitBSDvar" => "X11mit", + "X11mitwithoutSellCMUVariant" => "X11mit", + "X11mitwithoutSellCMUVariant" => "X11mit", + "X11mitwithoutSellandNoDocumentationRequi" => "X11mit", + "MITvar3" => "X11mit", + "MITvar2" => "X11mit", + "MIT" => "X11mit", + "ZLIBref" => "ZLIB", + "BSD3NoWarranty" => "BSD3", + "BSD2EndorseInsteadOfBinary" => "BSD2", + "BSD2var2" => "BSD2", + "LesserGPLv2" => "LibraryGPLv2", + "LesserGPLv2+" => "LibraryGPLv2+", + "orLGPLVer2.1" => "LesserGPLVer2.1", + "postgresqlRef" => "postgresql", + ); + +while (<>) { + chomp; + my @f = split(/;/); + # first remove duplicates + + my $l = $f[1]; + + # do a simple rewriting of this exception which is an incomplete license + + $l =~ s/^Exception$/UNKNOWN/; + + my @l = split(/,/,$l); + my %lics = %{{ map { $_ => 1 } @l }}; + + %lics = Do_Equivalent(%lics); + %lics = Remove_Redundant(%lics); + %lics = Do_Exceptions(%lics); + + my @out = sort keys %lics; + + my $t = join(',', @out); + if ($t eq "") { + $t = "UNKNOWN"; + } + print $f[0], ";$t\n"; +} + +sub Do_Exceptions +{ + my (%lics) = @_; + + if ($lics{'digiaQTExceptionNoticeVer1.1'} ne '' and $lics{'Qt'}) { + delete $lics{'digiaQTExceptionNoticeVer1.1'}; + delete $lics{'Qt'}; + $lics{'Qt-qtExcep'} = 'Qt-qtExcep'; + } + if ($lics{'BisonException'} ne "" and $lics{"GPLv3+"} ne "") { + delete $lics{'BisonException'}; + delete $lics{"GPLv3+"}; + $lics{'GPLv3+-bisonExcep'} = 'GPLv3+-bisonExcep'; + } + if ($lics{'BisonException'} ne "" and $lics{"GPLv2+"} ne "") { + delete $lics{'BisonException'}; + delete $lics{"GPLv2+"}; + $lics{'GPLv2+-bisonExcep'} = 'GPLv2+-bisonExcep'; + } + if ($lics{'BisonException'} ne "" and $lics{"GPLv2"} ne "") { + delete $lics{'BisonException'}; + delete $lics{"GPLv2"}; + $lics{'GPLv2-bisonExcep'} = 'GPLv2-bisonExcep'; + } + if ($lics{'ClassPathException'} ne "" and $lics{"GPLv2"} ne "") { + delete $lics{'ClassPathException'}; + delete $lics{"GPLv2"}; + $lics{"GPLv2-classPathExcep"} = "GPLv2-classPathExcep"; + } + if ($lics{'CDDLorGPLv2'} ne "" and $lics{"ClassPathExceptionGPLv2"} ne "") { + delete $lics{'CDDLorGPLv2'}; + delete $lics{"ClassPathExceptionGPLv2"}; + $lics{'CDDLorGPLv2-classPathExcep'} = 'CDDLorGPLv2-classPathExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv3+"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv3+"}; + $lics{'GPLv3+-linkExcep'} = 'GPLv3+-linkExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv2+"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv2+"}; + $lics{'GPLv2+-linkExcep'} = 'GPLv2+-linkExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv3"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv3"}; + $lics{'GPLv3-linkExcep'} = 'GPLv3-linkExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv2"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv2"}; + $lics{'GPLv2-linkExcep'} = 'GPLv2-linkExcep'; + } + + return %lics; + +} + +sub Remove_Redundant +{ + my (%lics) = @_; + + if ($lics{"GPLnoVersion"} ne "" and $lics{"GPLv2"} . $lics{"GPLv2+"} .$lics{"GPLv3"} . $lics{"GPLv3+"} ne "") { + delete $lics{"GPLnoVersion"}; + } + if ($lics{"GPLv2+"} ne "" and $lics{"GPLv3+"} ne "") { + delete $lics{"GPLv2+"}; + } + if ($lics{'MPL1_1andLGPLv2_1'} ne "" and $lics{"MPLv1_1"} ne "") { + delete $lics{"MPLv1_1"}; + } + + + return %lics; + +} + +sub Do_Equivalent +{ + my (%lics) = @_; + my %outA; + + # then normalize licenses + foreach my $a (keys %lics) { + next if $a eq "SeeFile"; + if ($equiv{$a} ne "") { + $outA{$equiv{$a}} = $equiv{$a}; + } else { + $outA{$a} = $a; + } + } + return %outA; + +} + + +sub uniq { + return keys %{{ map { $_ => 1 } @_ }}; +} -- cgit v1.2.1