From b93ccb705798bd0e77f6e51e6304564d70cde8ce Mon Sep 17 00:00:00 2001 From: Seth Woodworth Date: Wed, 20 Aug 2014 14:37:54 -0400 Subject: fixes typo in readme --- README.TXT | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.TXT b/README.TXT index 0783cab..1e92a7a 100644 --- a/README.TXT +++ b/README.TXT @@ -185,7 +185,7 @@ So Ninka detects all the sentences, including the MIT variant, it finds the GPL bsd intention. But the license is not really BSD. The disclaimers are not what you expect. Now, in all fairness, maybe -this is another lience. +this is another license. Let me translate the output for you: -- cgit v1.2.1 From 39a64e3f5db4a8d023ee7a9ee0be4d04d0bf814e Mon Sep 17 00:00:00 2001 From: dmg Date: Sun, 6 Apr 2014 21:36:32 -0700 Subject: escaped aposthrophes --- extComments/extComments.pl | 33 +++++++++++++++++++---------- matcher/matcher.pl | 52 ++++++++++++++++++++++++---------------------- matcher/rules.dict | 31 +++++++++++++-------------- ninka.pl | 24 ++++++++++++--------- 4 files changed, 78 insertions(+), 62 deletions(-) diff --git a/extComments/extComments.pl b/extComments/extComments.pl index 8fbca63..d8643d6 100755 --- a/extComments/extComments.pl +++ b/extComments/extComments.pl @@ -42,6 +42,14 @@ print STDERR "Usage $0 -v my $f = $ARGV[0]; +my $original = $f; + +$f =~ s/'/\\'/g; +$f =~ s/\$/\\\$/g; +$f =~ s/;/\\;/g; +$f =~ s/ /\\ /g; + + #die "illegal file [$f]" if $f =~ m@/\.@; my $numberComments = 1; @@ -49,32 +57,35 @@ $numberComments = $opts{c} if exists $opts{c}; my $verbose = 1; $verbose = exists $opts{v}; - - -if (get_size($f) == 0) { +if (get_size($original) == 0) { print STDERR "Empty file, just exit\n" if $verbose; exit 0; # nothing to report, just end } + + + + my $commentsCmd = Determine_Comments_Extractor($f); execute("$commentsCmd"); if ($commentsCmd =~ /^comments/ and get_size("${f}.comments") == 0){ - `cat '$f' | head -700 > ${f}.comments`; + `cat $f | head -700 > ${f}.comments`; } exit 0; -sub Determine_Comments_Extractor +sub Determine_Comments_Extractor { my ($f) = @_; + if ($f =~ /\.([^\.]+)$/) { my $ext= $1; - if ($ext =~ /^(pl|pm|py)$/ + if ($ext =~ /^(pl|pm|py)$/ ) { ######################## # for the time being, let us just extract the top 400 lines @@ -84,21 +95,21 @@ sub Determine_Comments_Extractor } elsif ($ext eq 'jl' or $ext eq 'el' ) { - return "cat '$f' | head -400 > '${f}.comments'"; -# return "$path/hashComments.pl -p ';' '$f'";; + return "cat $f | head -400 > ${f}.comments"; +# return "$path/hashComments.pl -p ';' $f";; } elsif ($ext =~ /^(java|c|cpp|h|cxx|c\+\+|cc)$/ ) { my $comm = `which comments`; if ($comm ne '') { return "comments -c1 '$f' 2> /dev/null"; } else { - return "cat '$f' | head -400 > '${f}.comments'"; + return "cat $f | head -400 > ${f}.comments"; } } else { - return "cat '$f' | head -700 > '${f}.comments'"; + return "cat $f | head -700 > ${f}.comments"; } } else { print "\n>>>>>>>>>>>>>>>>>>>>>\n"; - return "cat '$f' | head -700 > '${f}.comments'"; + return "cat $f | head -700 > ${f}.comments"; } } diff --git a/matcher/matcher.pl b/matcher/matcher.pl index aa01f1a..5e4422e 100755 --- a/matcher/matcher.pl +++ b/matcher/matcher.pl @@ -20,7 +20,7 @@ # # matchter.pl # -# This script use a set of license sentence name as input +# This script use a set of license sentence name as input # and output license name corresponds to a rule which match the set. # # author: Yuki Manabe @@ -64,6 +64,9 @@ $NonCriticalRules{'LesserGPLv2.1'} = [@gplNonCritical]; $NonCriticalRules{'LGPLv2orv3'}= [@gplNonCritical]; $NonCriticalRules{'LesserGPLv2'} = [@gplNonCritical]; $NonCriticalRules{'LesserGPLv2+'} = [@gplNonCritical]; +$NonCriticalRules{'GPLVer2.1or3KDE+'} = [@gplNonCritical]; +$NonCriticalRules{'LGPLVer2.1or3KDE+'} = [@gplNonCritical]; + $NonCriticalRules{'GPLv2+'} = [@gplNonCritical]; $NonCriticalRules{'GPLv2'} = [@gplNonCritical]; @@ -75,8 +78,8 @@ $NonCriticalRules{'AGPLv3'} = [@gplNonCritical, 'AGPLreceivedVer0','AGPLseeVer0' $NonCriticalRules{'AGPLv3+'} = [@gplNonCritical, 'AGPLreceivedVer0','AGPLseeVer0']; $NonCriticalRules{'GPLnoVersion'} = [@gplNonCritical]; -$NonCriticalRules{'Apachev1.1'} = ['ApacheLic1_1']; -$NonCriticalRules{'Apachev2'} = ['ApachePre','ApacheSee']; +$NonCriticalRules{'Apache-1.1'} = ['ApacheLic1_1']; +$NonCriticalRules{'Apache-2'} = ['ApachePre','ApacheSee']; $NonCriticalRules{'LibGCJLic'} = ['LibGCJSee']; $NonCriticalRules{'CDDLicV1'} = ['Compliance','CDDLicWhere','ApachesPermLim','CDDLicIncludeFile','UseSubjectToTerm', 'useOnlyInCompliance']; @@ -106,9 +109,8 @@ $NonCriticalRules{'MPLv1_1'} = ['licenseBlockBegin','MPLsee','Copyright','licens $NonCriticalRules{'MPL1_1andLGPLv2_1'} = ['MPLoptionIfNotDelete2licsVer0','MPL_LGPLseeVer0']; $NonCriticalRules{'FreeType'} = ['FreeTypeNotice']; +$NonCriticalRules{'boostV1'} = ['boostSeev1', 'SeeFile']; -$NonCriticalRules{'GPLVer2.1or3KDE+'} = [@gplNonCritical]; -$NonCriticalRules{'LGPLVer2.1or3KDE+'} = [@gplNonCritical]; # initialize @@ -178,7 +180,7 @@ Match_License(); my $match = 0; for (my $i=0;$i<=$#licSentNames ;$i++) { - if ($licSentNames[$i] == 0 and + if ($licSentNames[$i] == 0 and ($licSentNames[$i] ne 'UNKNOWN' and $licSentNames[$i] ne '')) { # print "[$licSentNames[$i]]\n"; @@ -199,9 +201,9 @@ if ($match > 0) { #print $interRuleList[$i][0]; @licSentNames = map { $_ eq $interRuleList[$i][0] ? $interRuleList[$i][1] : $_ } @licSentNames; } - + $senttok= join(',',@licSentNames) . ','; - + Match_License(); } @@ -284,7 +286,7 @@ sub Read_Original my ($inputF, $tokens, $originals) = @_; open (INPUTFILE, $inputF) or die ("Error: $inputF is not found."); - + my $sentence; my @original; while ($sentence = ){ @@ -298,19 +300,19 @@ sub Read_Original print "NONE\n"; exit 0; } - + #print join(';',@licSentNames)."\n"; - + close INPUTFILE; } sub Match_License { - + # create a string with the sentences - + for (my $j=0;$j<=$#rulelist;$j++){ - + my $rule=$rulelist[$j][1]; my $rulename=$rulelist[$j][0]; my $lenRule = scalar(split(',', $rule)); @@ -324,7 +326,7 @@ sub Match_License # print "\n"; } } - + # print ">>>>[$senttok]\n"; my $onlyAllRight = 0; @@ -333,7 +335,7 @@ sub Match_License #print STDERR "Ending>>>>>>>$senttok\n"; #print STDERR 'Size>>' , scalar(@result), "\n"; #print STDERR 'Result>>', join(',', @result), "\n"; - + # let us remove allrights # my $onlyAllRight = 1; # for my $i (0.. scalar(@licSentNames)-1){ @@ -347,9 +349,9 @@ sub Match_License # output result if (scalar(@result) > 0){ # at this point we have matched - - - # let us clean up the rules... let us print the matched rules, and the + + + # let us clean up the rules... let us print the matched rules, and the # if (grep(/GPL/, @result)) { # print "GPL...\n"; # foreach my $r ($NonCriticalRules{GPL}) { @@ -357,7 +359,7 @@ sub Match_License # } # } # general removal of rules - + foreach my $r (@generalNonCritical) { while ($senttok =~ s/,$r,/,-1,/) { @@ -365,7 +367,7 @@ sub Match_License } } # print "[$senttok]\n"; - + foreach my $res (@result) { my $temp = $NonCriticalRules{$res}; foreach my $r (@$temp) { @@ -375,7 +377,7 @@ sub Match_License } } } -# print "[$senttok]\n"; +# print "[$senttok]\n"; } } @@ -392,7 +394,7 @@ sub Print_Result my @sections = split(',', $senttok); die 'assertion 1' if $sections[0] ne ''; die 'assertion 2' if $sections[scalar(@sections)] ne ''; - + my $ignoredLines = 0; my $licenseLines = 0; my $unknownLines = 0; @@ -410,7 +412,7 @@ sub Print_Result } } $senttok =~ s/^,(.*),$/$1/; - + # print "$ignoredLines > $licenseLines > $unknownLines > $unmatchedLines\n"; if (scalar (@result) == 0) { print 'UNKNOWN'; @@ -419,5 +421,5 @@ sub Print_Result } print ";$countMatches;$licenseLines;$ignoredLines;$unmatchedLines;$unknownLines;$senttok\n"; $senttok = $save; - + } diff --git a/matcher/rules.dict b/matcher/rules.dict index 1055eca..28d274a 100644 --- a/matcher/rules.dict +++ b/matcher/rules.dict @@ -19,8 +19,6 @@ GPL2orBSD3:BSDpre,BSDcondSourceExtrict,BSDcondBinaryExtrict,BSDcondEndorse,Altern,GPLGenVer2,BSDasIsExtrict,BSDWarrExtrict - - GPLv2:GPLv2 GPLv2+:GPLv2\+ GPLv3+:GPLv3\+ @@ -134,24 +132,25 @@ openSSL:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdv,OpenSSLendorse,OpenSSLwrit openSSLvar1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,SSLeayWindows,BSDcondAdvPart2,BSDasIs,BSDWarr,NoLicenseChangeAllowed openSSLvar3:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,BSDasIs,BSDWarr -Apachev1.0:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAckPart1,BSDcondAdvPart2,BSDasIs,BSDWarr -Apachev1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdv,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAck,BSDasIs,BSDWarr -Apachev1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr -Apachev1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr -Apachev1.1:BSDpre,BSDcondSourceExtrict,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr +Apache-1.0:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAckPart1,BSDcondAdvPart2,BSDasIs,BSDWarr +Apache-1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdv,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAck,BSDasIs,BSDWarr +Apache-1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr +Apache-1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr +Apache-1.1:BSDpre,BSDcondSourceExtrict,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr # this is dangerous. The v2 applies to apache (ninka-ism) -ApacheV2orLGPLgeneric:apacheAndLGPLgenVer2.0 +Apache-2orLGPLgeneric:apacheAndLGPLgenVer2.0 SleepyCat:BSDpre,BSDcondSource,BSDcondBinary,SleepyCatObtain,SleepyCatSourceIncluded,SleepyCatSourceComplete,SleepyCatDoesNotInclude,SleepyCatAsIs,BSDWarr -boost:boostPermission,boostPreserve,boostAsIs,boostWarr -boostV1:boostRefv1 -boostV1Ref:boostSeev1 +boost-1:boostPermission,boostPreserve,boostAsIs,boostWarr +boost-1:boostRefv1 +boost-1ref:boostSeev1 + SSLeay:SSLCopy,SSLeayAttrib,SSLeayAdType,BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvRULE,SSLeayCrypto,SSLeayWindows,BSDasIs,BSDWarr,SSLeayCantChangeLic # qt -Qt:qtCommercialuseVer0 +QPLt:qtCommercialuseVer0 orLGPLVer2.1:qtLGPLVer2.1 orLGPLVer2:qtLGPLv2 orGPLv3:qtGPLVer3.0 @@ -171,7 +170,7 @@ Cecill:CecillEn1,CecillEn2,CecillEn3,CecillEn4,CecillEn5,CecillEn6,CecillEn7,Cec SimpleOnlyKeepCopyright:SimpleOnlyKeepCopyright #------------ -QTv1:QTv1 +QPLv1:QTv1 CDDLv1orGPLv2:CDDLorGPLv2ifYouWish,CDDLorGPLv2IfYouAdd CDDLorGPLv2:CDDLorGPLVer2 @@ -233,8 +232,8 @@ EPLv1:EPLv1 CDDLic:CDDLic CDDLicV1:CDDLicV1Only #---------------------------------------------------------------------- -Apachev2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2,ApachesAsIs,ApachesPermLim -Apachev2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2 +Apache-2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2,ApachesAsIs,ApachesPermLim +Apache-2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2 # publid comain publicDomain:publicDomain @@ -312,7 +311,7 @@ SameAsPerl:SameAsPerl ArtisticLicensev1:ArtisticLicensev1 # QT triple license, outdated :) -QtGPLv2or3:qtGPLv2or3 +QplGPLv2or3:qtGPLv2or3 FreeType:FreeType ## diff --git a/ninka.pl b/ninka.pl index a74dc83..5117630 100755 --- a/ninka.pl +++ b/ninka.pl @@ -74,9 +74,13 @@ my $forceLicense = exists $opts{L}; my $f = $ARGV[0]; -my $original = $f; +my $original = $f; +$f =~ s/'/\\'/g; +$f =~ s/\$/\\\$/g; +$f =~ s/;/\\;/g; +$f =~ s/ /\\ /g; print "Starting: $original;\n" if ($verbose); @@ -87,38 +91,38 @@ my $sentencesFile = "${f}.sentences"; my $goodsentFile = "${f}.goodsent"; my $sentokFile = "${f}.senttok"; -if (not (-f "$f")) { - print "ERROR;[${f}] is not a file\n" ; +if (not (-f $original)) { + print "ERROR;[${original}] is not a file\n" ; exit 0; } Do_File_Process($original, $commentsFile, ($force or $forceComments), - "$path/extComments/extComments.pl -c1 '${original}'", + "$path/extComments/extComments.pl -c1 ${f}", "Creating comments file", exists $opts{c}); Do_File_Process($commentsFile, $sentencesFile, ($force or $forceSentences), - "$path/splitter/splitter.pl '${commentsFile}'", + "$path/splitter/splitter.pl ${commentsFile}", "Splitting sentences", exists $opts{s} ); Do_File_Process( $sentencesFile, $goodsentFile, ($force or $forceGood), - "$path/filter/filter.pl '${sentencesFile}'", + "$path/filter/filter.pl ${sentencesFile}", "Filtering good sentences", exists $opts{s} ); Do_File_Process($goodsentFile, $sentokFile, ($force or $forceSentok), - "$path/senttok/senttok.pl '${goodsentFile}' > '${sentokFile}'", + "$path/senttok/senttok.pl ${goodsentFile} > ${sentokFile}", "Matching sentences against rules", exists $opts{t} ); print "Matching ${f}.senttok against rules" if ($verbose); -execute("$path/matcher/matcher.pl '${f}.senttok' > '${f}.license'"); +execute("$path/matcher/matcher.pl ${f}.senttok > ${f}.license"); -print `cat '${f}.license'`; +print `cat ${f}.license`; unlink("${f}.code"); @@ -126,7 +130,7 @@ if ($delete) { unlink("${f}.badsent"); unlink("${f}.comments"); unlink("${f}.goodsent"); -# unlink("${f}.sentences"); + unlink("${f}.sentences"); unlink("${f}.senttok"); } -- cgit v1.2.1 From 54c821ccdc582332e5fb908fce9a08aeddfdf6bd Mon Sep 17 00:00:00 2001 From: dmg Date: Sun, 6 Apr 2014 21:30:59 -0700 Subject: added scripts, not sure if they work --- scripts/license_matcher_modified.pl | 92 ++++++++++++++++++++ scripts/parseLicense.pl | 23 +++++ scripts/sort_package_license_list.pl | 18 ++++ scripts/unify.pl | 161 +++++++++++++++++++++++++++++++++++ 4 files changed, 294 insertions(+) create mode 100644 scripts/license_matcher_modified.pl create mode 100644 scripts/parseLicense.pl create mode 100644 scripts/sort_package_license_list.pl create mode 100644 scripts/unify.pl diff --git a/scripts/license_matcher_modified.pl b/scripts/license_matcher_modified.pl new file mode 100644 index 0000000..53b1732 --- /dev/null +++ b/scripts/license_matcher_modified.pl @@ -0,0 +1,92 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Getopt::Std; +use Set::Object; +use Tie::IxHash; +use Tie::Autotie 'Tie::IxHash'; +use Data::Dumper; + +my %opts =(); +my %licensedictionary=(); +my %numofslashdictionary=(); + +tie %licensedictionary, 'Tie::IxHash'; + +my $prefix="/big2/y-manabe/licensereration/fedora/srcrpm_sandbox/"; + +getopts ("p:l:",\%opts); + +open (PFH,$opts{"p"}); + +while (my $line=){ + $line =~ /(.*);(.*)/; + my $key = $1; + my $license = $2; + #if ($key =~ /\//){ +# $key =~ s/\//\\\//g; + # } + #print "$key\n"; + if (defined $licensedictionary{substr($key,0,2)}{$key}){ + push (@{$licensedictionary{substr($key,0,2)}{$key}},$license); +# print join(',',@{$licensedictionary{$key}}); + }else{ + my @tmp=($license); + if (!defined $licensedictionary{substr($key,0,2)}){ + #tie my %keylicensepairs,'Tie::IxHash'; + #$licensedictionary{substr($key,0,2)}=\%keylicensepairs; + } + $licensedictionary{substr($key,0,2)}{$key}=\@tmp; + my $numofslash = ($key =~ tr /\//\//); + $numofslashdictionary{$key} = $numofslash; + } +} + +close PFH; + +#print Dumper(\%licensedictionary); +#return; + +#foreach my $key(keys %licensedictionary){ +# print $key.";"; +# print join(',',@{$licensedictionary{$key}}); +# print "\n"; +#} + +open (LFH,$opts{"l"}); + +while(my $line=){ + #print $line; + $line =~ /^${prefix}(.*?);(.*?)$/; + my $filepath=$1; + my $slicense=$2; + my @slicenselist=split (/\,/,$slicense); + my $licenseset = Set::Object->new(@slicenselist); + #$licenseset= Set::Object->unique($licenseset); + @slicenselist= sort($licenseset->members); + my $uniqlicensenames = join (',',@slicenselist); + #print "$license\n"; + foreach my $key(keys %{$licensedictionary{substr($filepath,0,2)}}){ + my @tmpdirnames; + my @dirnames = split /\//,$filepath; + if ($#dirnames >= ${numofslashdictionary}{$key}){ + @tmpdirnames=@dirnames[0..$numofslashdictionary{$key}]; + }else{ + next; + } +# print "$filepath,$licensedictionary{substr($filepath,0,2)}{$key},$key\n"; + #print "@{dirnames}[0..$numofslash]\n"; + my $tmpfilepath; + $tmpfilepath = join('/',@tmpdirnames); + #print "$key\n"; + if ($tmpfilepath eq $key){ + print "${prefix}$filepath;$uniqlicensenames;"; + print join(',',@{$licensedictionary{substr($key,0,2)}{$key}}); + print "\n"; + last; + } + } +} + +close LFH; diff --git a/scripts/parseLicense.pl b/scripts/parseLicense.pl new file mode 100644 index 0000000..2dc61ef --- /dev/null +++ b/scripts/parseLicense.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl + +# this parses a line + +#/big2/y-manabe/licensereration/fedora/srcrpm_sandbox/lua-lgi-0.7.1-1.fc19/lgi-0.7.1/lgi/marshal.c;UNKNOWN + +#into +# pkg name: lua-lgi-0.7.1-1.fc19 +# file: lgi-0.7.1/lgi/marshal.c +# license: UNKNOWN + +use strict; + +while (<>) { + chomp; + my @fields = split(';'); + my $license = $fields[1]; + my @f = split('/', $fields[0]); + my $pkg = $f[6]; + my $file = join('/', splice(@f, 7, scalar(@f) -1)); + + print "$pkg;$file;$license\n"; +} diff --git a/scripts/sort_package_license_list.pl b/scripts/sort_package_license_list.pl new file mode 100644 index 0000000..cd20b50 --- /dev/null +++ b/scripts/sort_package_license_list.pl @@ -0,0 +1,18 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +open (FH, $ARGV[0]); +my @list; + +while (my $line = ){ + chomp $line; + push (@list,$line); +} + +@list = sort { ($b =~ tr /\//\//) <=> ($a =~ tr /\//\//) || $b cmp $a } @list; + +foreach my $line(@list){ + print "$line\n"; +} diff --git a/scripts/unify.pl b/scripts/unify.pl new file mode 100644 index 0000000..f518fbb --- /dev/null +++ b/scripts/unify.pl @@ -0,0 +1,161 @@ +#!/usr/bin/perl + +# first pass, unify names of licenses and remove duplicates. + +# we trick regarding gpl related licenses so they are "clustered" together.. +# +# replace GPL with __GPL +# replace exception in the text with ___exception + +use strict; + +my %equiv = ( + "boostV1Ref" => "boostV1", + "X11" => "X11mit", + "X11Festival" => "X11mit", + "X11mitNoSellNoDocDocBSDvar" => "X11mit", + "X11mitwithoutSell" => 'X11mit', + "X11mitBSDvar" => "X11mit", + "X11mitwithoutSellCMUVariant" => "X11mit", + "X11mitwithoutSellCMUVariant" => "X11mit", + "X11mitwithoutSellandNoDocumentationRequi" => "X11mit", + "MITvar3" => "X11mit", + "MITvar2" => "X11mit", + "MIT" => "X11mit", + "ZLIBref" => "ZLIB", + "BSD3NoWarranty" => "BSD3", + "BSD2EndorseInsteadOfBinary" => "BSD2", + "BSD2var2" => "BSD2", + "LesserGPLv2" => "LibraryGPLv2", + "LesserGPLv2+" => "LibraryGPLv2+", + "orLGPLVer2.1" => "LesserGPLVer2.1", + "postgresqlRef" => "postgresql", + ); + +while (<>) { + chomp; + my @f = split(/;/); + # first remove duplicates + + my $l = $f[1]; + + # do a simple rewriting of this exception which is an incomplete license + + $l =~ s/^Exception$/UNKNOWN/; + + my @l = split(/,/,$l); + my %lics = %{{ map { $_ => 1 } @l }}; + + %lics = Do_Equivalent(%lics); + %lics = Remove_Redundant(%lics); + %lics = Do_Exceptions(%lics); + + my @out = sort keys %lics; + + my $t = join(',', @out); + if ($t eq "") { + $t = "UNKNOWN"; + } + print $f[0], ";$t\n"; +} + +sub Do_Exceptions +{ + my (%lics) = @_; + + if ($lics{'digiaQTExceptionNoticeVer1.1'} ne '' and $lics{'Qt'}) { + delete $lics{'digiaQTExceptionNoticeVer1.1'}; + delete $lics{'Qt'}; + $lics{'Qt-qtExcep'} = 'Qt-qtExcep'; + } + if ($lics{'BisonException'} ne "" and $lics{"GPLv3+"} ne "") { + delete $lics{'BisonException'}; + delete $lics{"GPLv3+"}; + $lics{'GPLv3+-bisonExcep'} = 'GPLv3+-bisonExcep'; + } + if ($lics{'BisonException'} ne "" and $lics{"GPLv2+"} ne "") { + delete $lics{'BisonException'}; + delete $lics{"GPLv2+"}; + $lics{'GPLv2+-bisonExcep'} = 'GPLv2+-bisonExcep'; + } + if ($lics{'BisonException'} ne "" and $lics{"GPLv2"} ne "") { + delete $lics{'BisonException'}; + delete $lics{"GPLv2"}; + $lics{'GPLv2-bisonExcep'} = 'GPLv2-bisonExcep'; + } + if ($lics{'ClassPathException'} ne "" and $lics{"GPLv2"} ne "") { + delete $lics{'ClassPathException'}; + delete $lics{"GPLv2"}; + $lics{"GPLv2-classPathExcep"} = "GPLv2-classPathExcep"; + } + if ($lics{'CDDLorGPLv2'} ne "" and $lics{"ClassPathExceptionGPLv2"} ne "") { + delete $lics{'CDDLorGPLv2'}; + delete $lics{"ClassPathExceptionGPLv2"}; + $lics{'CDDLorGPLv2-classPathExcep'} = 'CDDLorGPLv2-classPathExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv3+"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv3+"}; + $lics{'GPLv3+-linkExcep'} = 'GPLv3+-linkExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv2+"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv2+"}; + $lics{'GPLv2+-linkExcep'} = 'GPLv2+-linkExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv3"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv3"}; + $lics{'GPLv3-linkExcep'} = 'GPLv3-linkExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv2"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv2"}; + $lics{'GPLv2-linkExcep'} = 'GPLv2-linkExcep'; + } + + return %lics; + +} + +sub Remove_Redundant +{ + my (%lics) = @_; + + if ($lics{"GPLnoVersion"} ne "" and $lics{"GPLv2"} . $lics{"GPLv2+"} .$lics{"GPLv3"} . $lics{"GPLv3+"} ne "") { + delete $lics{"GPLnoVersion"}; + } + if ($lics{"GPLv2+"} ne "" and $lics{"GPLv3+"} ne "") { + delete $lics{"GPLv2+"}; + } + if ($lics{'MPL1_1andLGPLv2_1'} ne "" and $lics{"MPLv1_1"} ne "") { + delete $lics{"MPLv1_1"}; + } + + + return %lics; + +} + +sub Do_Equivalent +{ + my (%lics) = @_; + my %outA; + + # then normalize licenses + foreach my $a (keys %lics) { + next if $a eq "SeeFile"; + if ($equiv{$a} ne "") { + $outA{$equiv{$a}} = $equiv{$a}; + } else { + $outA{$a} = $a; + } + } + return %outA; + +} + + +sub uniq { + return keys %{{ map { $_ => 1 } @_ }}; +} -- cgit v1.2.1 From d74ffddb46f42c4f15468b21b765046bab1d9831 Mon Sep 17 00:00:00 2001 From: dmg Date: Mon, 9 Jun 2014 17:56:42 +0900 Subject: added variations of the LGPLv2 and the copyright --- matcher/rules.dict | 1 + senttok/licensesentence.dict | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/matcher/rules.dict b/matcher/rules.dict index 28d274a..f417b66 100644 --- a/matcher/rules.dict +++ b/matcher/rules.dict @@ -60,6 +60,7 @@ AGPLv3+:AGPLGenVer3\+ LesserGPLv3+:LesserGPLGenVer3\+ LesserGPLv3:LesserGPLGenVer3 LesserGPLv2.1:LesserGPLGenVer2\.1 +LesserGPLv2.1:LGPLV2_1Ver0 LesserGPLv2.1+:LesserGPLGenVer2\.1\+ LibraryGPLv2:LibraryGPLGenVer2\.0\+|LibraryGPLGenVer2 LibraryGPLv2+:LibraryGPLGenVer2\.0\+|LibraryGPLGenVer2\+ diff --git a/senttok/licensesentence.dict b/senttok/licensesentence.dict index 8b13f09..89c949f 100644 --- a/senttok/licensesentence.dict +++ b/senttok/licensesentence.dict @@ -129,6 +129,7 @@ GNUurl:10:0:^If not, see $ # Special cases GPL # GPLv2orv3:10:0:^This file may be used under the terms of the GPL versions 2\.0 or 3\.0 as published by the Free Software Foundation and appearing in the files LICENSE\.GPL2 and LICENSE\.GPL3 included in the packaging of this file$ +LGPLV2_1:11:0:^Lesser GPL 2\.1 applies: LGPLv2_1orv3:10:0:^([^,;]+) is free software; you can redistribute it and\/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2\.1 or 3 of the License$ LGPLv2orv3:10:0:^([^,;]+) is free software; you can redistribute it and\/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or \(at your option\) version 3$ ###XXXGPLCopy:50:2:^You should have received a copy of the GPL along with (.*); see the file (.*); see the file (.*)$: @@ -219,8 +220,6 @@ BSDlikeRef:10:1:^Use of this source code is governed by a BSD\-style license tha MITname:10:0:MIT License -Copyright:10:1:Copyright \(c\)(.+) -Copyright:20:1:^Copyright (.+)$: MITpermissionExtrict:10:0:Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files \(the Software \), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: X11noticeExtrict:10:0:The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software X11asIsExtrict:10:0:THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT: @@ -712,3 +711,5 @@ FSFUnlimited:10:0:^This configure script is free software; the Free Software Fou # these should be the last one Exception:52:0:As a special exception,: +Copyright:10:1:Copyright \(c\)(.+) +Copyright:20:1:^Copyright (.+)$: -- cgit v1.2.1 From 2e1f0f6fc95887b5a6847b16fd87276dbcad4481 Mon Sep 17 00:00:00 2001 From: dmg Date: Mon, 9 Jun 2014 18:20:46 +0900 Subject: changed license to GPLv2+ --- ChangeLog | 6 ++++++ README.TXT | 10 +++++----- extComments/extComments.pl | 17 +++++++++-------- extComments/hashComments.pl | 18 +++++++++--------- filter/criticalword.dict | 10 +++++----- filter/filter.pl | 13 +++++++------ matcher/interrules.dict | 10 +++++----- matcher/matcher.pl | 17 +++++++++-------- matcher/rules.dict | 10 +++++----- ninka.pl | 14 +++++++------- senttok/licensesentence.dict | 10 +++++----- senttok/senttok.pl | 10 +++++----- splitter/splitter.pl | 8 ++++---- 13 files changed, 81 insertions(+), 72 deletions(-) diff --git a/ChangeLog b/ChangeLog index c31fe26..9e3a70e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2014-06-09 dmg + + * Version 1.2 + + * Changed license to GPLv2+ + 2013-07-07 dmg * Version 1.1 diff --git a/README.TXT b/README.TXT index 1e92a7a..565465c 100644 --- a/README.TXT +++ b/README.TXT @@ -28,21 +28,21 @@ the above paper. * License Except for the directories comments and splitter, Ninka is licensed - under the AGPLv3+ + under the GPLv2+ Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as - published by the Free Software Foundation, either version 3 of the + it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. + GNU General Public License for more details. - You should have received a copy of the GNU Affero General Public License + You should have received a copy of the GNU General Public License along with this program. If not, see . - splitter.pl is a derivative work of the Rule-based sentence diff --git a/extComments/extComments.pl b/extComments/extComments.pl index d8643d6..0509012 100755 --- a/extComments/extComments.pl +++ b/extComments/extComments.pl @@ -1,20 +1,21 @@ #!/usr/bin/perl # -# Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German +# Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# GNU General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . # use Getopt::Std; diff --git a/extComments/hashComments.pl b/extComments/hashComments.pl index a4834d4..377fe91 100755 --- a/extComments/hashComments.pl +++ b/extComments/hashComments.pl @@ -1,23 +1,23 @@ #!/usr/bin/perl # -# Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German +# Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# GNU General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . # - # this is to extract the first comments from any language that # uses the same prefix diff --git a/filter/criticalword.dict b/filter/criticalword.dict index b58a586..477c32b 100755 --- a/filter/criticalword.dict +++ b/filter/criticalword.dict @@ -1,17 +1,17 @@ # -# Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German +# Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the +# it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# GNU Affero General Public License for more details. +# GNU General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License +# You should have received a copy of the GNU General Public License # along with this program. If not, see . # as is diff --git a/filter/filter.pl b/filter/filter.pl index 626904f..b2fcce1 100755 --- a/filter/filter.pl +++ b/filter/filter.pl @@ -1,19 +1,20 @@ #!/usr/bin/perl # -# Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German +# Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the +# it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# GNU General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . # diff --git a/matcher/interrules.dict b/matcher/interrules.dict index d395805..9ef2b76 100644 --- a/matcher/interrules.dict +++ b/matcher/interrules.dict @@ -1,17 +1,17 @@ # -# Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German +# Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the +# it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# GNU General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License +# You should have received a copy of the GNU General Public License # along with this program. If not, see . # # intermediate rules diff --git a/matcher/matcher.pl b/matcher/matcher.pl index 5e4422e..366cf6b 100755 --- a/matcher/matcher.pl +++ b/matcher/matcher.pl @@ -1,20 +1,21 @@ #!/usr/bin/perl # -# Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German +# Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# GNU General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . # # diff --git a/matcher/rules.dict b/matcher/rules.dict index f417b66..531df5f 100644 --- a/matcher/rules.dict +++ b/matcher/rules.dict @@ -1,17 +1,17 @@ # -# Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German +# Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the +# it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# GNU General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License +# You should have received a copy of the GNU General Public License # along with this program. If not, see . # ########## This ones are deprecated, I think diff --git a/ninka.pl b/ninka.pl index 5117630..765ec01 100755 --- a/ninka.pl +++ b/ninka.pl @@ -2,17 +2,17 @@ # # Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License +# You should have received a copy of the GNU General Public License # along with this program. If not, see . # diff --git a/senttok/licensesentence.dict b/senttok/licensesentence.dict index 89c949f..4216275 100644 --- a/senttok/licensesentence.dict +++ b/senttok/licensesentence.dict @@ -1,17 +1,17 @@ # -# Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German +# Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the +# it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# GNU General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License +# You should have received a copy of the GNU General Public License # along with this program. If not, see . # #Format diff --git a/senttok/senttok.pl b/senttok/senttok.pl index 543f818..a5eff10 100755 --- a/senttok/senttok.pl +++ b/senttok/senttok.pl @@ -1,18 +1,18 @@ #!/usr/bin/perl # -# Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German +# Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # # This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the +# it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# GNU General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License +# You should have received a copy of the GNU General Public License # along with this program. If not, see . # use strict; diff --git a/splitter/splitter.pl b/splitter/splitter.pl index 4ff1723..e00c54f 100755 --- a/splitter/splitter.pl +++ b/splitter/splitter.pl @@ -14,16 +14,16 @@ # which are under the following license: # # This patch is free software; you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the +# it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This patch is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. +# GNU General Public License for more details. # -# You should have received a copy of the GNU Affero General Public License +# You should have received a copy of the GNU General Public License # along with this patch. If not, see . -- cgit v1.2.1 From 5023e3b856b07dd66bbbd7cf8b7327170ed3d81f Mon Sep 17 00:00:00 2001 From: dmg Date: Mon, 9 Jun 2014 18:49:39 +0900 Subject: forgot to upgrade version of ninka.pl --- ninka.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ninka.pl b/ninka.pl index 765ec01..b2fcd9a 100755 --- a/ninka.pl +++ b/ninka.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl # -# Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German +# Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as @@ -21,7 +21,7 @@ use Getopt::Std; my %opts = (); if (!getopts ("vfCcSsGgTtLd",\%opts) or scalar(@ARGV) == 0) { -print STDERR "Ninka version 1.1 +print STDERR "Ninka version 1.2 Usage $0 -fCtTvcgsGd -- cgit v1.2.1 From fa81ddcfa925acc52632fd72de419abe29796e2d Mon Sep 17 00:00:00 2001 From: dmg Date: Tue, 29 Jul 2014 22:22:58 -0700 Subject: added experimental code --- ninka-sqlite.pl | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ unify.pl | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 335 insertions(+) create mode 100644 ninka-sqlite.pl create mode 100644 unify.pl diff --git a/ninka-sqlite.pl b/ninka-sqlite.pl new file mode 100644 index 0000000..eee0c0f --- /dev/null +++ b/ninka-sqlite.pl @@ -0,0 +1,174 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use DBI; +use File::Temp; +use File::Find; +use File::Basename; +use Scalar::Util qw(looks_like_number); + +if(scalar(@ARGV) != 2){ + print STDERR "Incorrect number of arguments\n"; + print STDERR "Correct usage is: perl ninka-wrapper \n"; + exit 1; +} + +my $path = $0; + +$path =~ s/\/+[^\/]+$//; +if ($path eq "") { + $path = "./"; +} + +my ($pack, $db) = @ARGV; + +my $dbh = DBI->connect("DBI:SQLite:dbname=$db", "", "", {RaiseError => 1}) + or die $DBI::errstr; +$dbh->do("CREATE TABLE IF NOT EXISTS + comments (filename TEXT, path TEXT, container TEXT, content TEXT, + PRIMARY KEY(filename, path, container))"); +$dbh->do("CREATE TABLE IF NOT EXISTS + sentences (filename TEXT, path TEXT, container TEXT, content TEXT, + PRIMARY KEY(filename, path, container))"); +$dbh->do("CREATE TABLE IF NOT EXISTS + goodsents (filename TEXT, path TEXT, container TEXT, content TEXT, + PRIMARY KEY(filename, path, container))"); +$dbh->do("CREATE TABLE IF NOT EXISTS + badsents (filename TEXT, path TEXT, container TEXT, content TEXT, + PRIMARY KEY(filename, path, container))"); +$dbh->do("CREATE TABLE IF NOT EXISTS + senttoks (filename TEXT, path TEXT, container TEXT, content TEXT, + PRIMARY KEY(filename, path, container))"); +$dbh->do("CREATE TABLE IF NOT EXISTS + licenses (filename TEXT, path TEXT, container TEXT, licenses TEXT, + num_found INT, lines INT, toks_ignored INT, toks_unmatched INT, + toks_unknown INT, tokens TEXT, + PRIMARY KEY(filename, path, container))"); + +my $tempdir = File::Temp->newdir(); +my $dirname = $tempdir->dirname; + +print "***** Extracting file [$pack] to temporary directory [$dirname] *****\n"; +my $packext = getExtension($pack); +if ($packext eq ".bz2" || $packext eq ".gz") { + execute("tar -xvf '$pack' --directory '$dirname'"); +} elsif ($packext eq ".jar" || $packext eq ".zip") { + execute("unzip -d $dirname $pack"); +} else { + print "ninka-wrapper does not support packages with extension [$packext]\n"; +} + +my @files; +find( + sub { push @files, $File::Find::name unless -d; }, + $dirname +); + +print "***** Beginning Execution of Ninka *****\n"; +foreach my $file (@files) { + print "Running ninka on file [$file]\n"; + execute("perl ${path}/ninka.pl '$file'"); +} + +my @ninkafiles; +find( + sub { + my $ext = getExtension($File::Find::name); + if($ext =~ m/(comments|sentences|goodsent|badsent|senttok|license)$/){ + push @ninkafiles, $File::Find::name; + } + }, + $dirname +); + +print "***** Entering Ninka Data into Database [$db] *****\n"; +foreach my $file (@ninkafiles) { + + my $filepath = dirname($file); + $filepath =~ s/$dirname//; + my $basefile = basename($file); + my $rootfile = removeExtension($basefile); + my $packname = basename($pack); + + #Read entire file into a string + open (my $fh, '<', $file) or die "Can't open file $!"; + my $filedata = do { local $/; <$fh> }; + + my $sth; + switch (getExtension($basefile)){ + case ".comments" { + print "Inserting [$basefile] into table comments\n"; + $sth = $dbh->prepare("INSERT INTO comments VALUES + ('$rootfile', '$filepath', '$packname', ?)"); + } + case ".sentences" { + print "Inserting [$basefile] into table sentences\n"; + $sth = $dbh->prepare("INSERT INTO sentences VALUES + ('$rootfile', '$filepath', '$packname', ?)"); + } + case ".goodsent" { + print "Inserting [$basefile] into table goodsents\n"; + $sth = $dbh->prepare("INSERT INTO goodsents VALUES + ('$rootfile', '$filepath', '$packname', ?)"); + } + case ".badsent" { + print "Inserting [$basefile] into table goodsents\n"; + $sth = $dbh->prepare("INSERT INTO badsents VALUES + ('$rootfile', '$filepath', '$packname', ?)"); + } + case ".senttok" { + print "Inserting [$basefile] into table senttoks\n"; + $sth = $dbh->prepare("INSERT INTO senttoks VALUES + ('$rootfile', '$filepath', '$packname', ?)"); + } + case ".license" { + print "Inserting [$basefile] into table licenses\n"; + my @columns = parseLicenseData($filedata); + $sth = $dbh->prepare("INSERT INTO licenses VALUES + ('$rootfile', '$filepath', '$packname', '$columns[0]', '$columns[1]', + '$columns[2]', '$columns[3]', '$columns[4]', '$columns[5]', '$columns[6]')"); + } + } + + $sth->bind_param(1, $filedata); + $sth->execute; + close($fh); +} + +$dbh->disconnect; + +sub parseLicenseData { + my ($data) = @_; + + my @columns; + my @fields = split(';', $data); + if($fields[0] eq "NONE\n"){ + @columns = '' x 7; + @columns[0] = 'NONE'; + } else { + @columns = @fields; + } + return @columns; +} + +sub getExtension { + my ($file) = @_; + my $filename = basename($file); + my ($ext) = $filename =~ /(\.[^.]+)$/; + return $ext; +} + +sub removeExtension { + my ($file) = @_; + (my $filename = $file) =~ s/\.[^.]+$//; + return $filename; +} + +sub execute { + my ($command) = @_; + my $output = `$command`; + my $status = ($? >> 8); + die "execution of [$command] failed: status [$status]\n" if ($status != 0); + return $output; +} diff --git a/unify.pl b/unify.pl new file mode 100644 index 0000000..f518fbb --- /dev/null +++ b/unify.pl @@ -0,0 +1,161 @@ +#!/usr/bin/perl + +# first pass, unify names of licenses and remove duplicates. + +# we trick regarding gpl related licenses so they are "clustered" together.. +# +# replace GPL with __GPL +# replace exception in the text with ___exception + +use strict; + +my %equiv = ( + "boostV1Ref" => "boostV1", + "X11" => "X11mit", + "X11Festival" => "X11mit", + "X11mitNoSellNoDocDocBSDvar" => "X11mit", + "X11mitwithoutSell" => 'X11mit', + "X11mitBSDvar" => "X11mit", + "X11mitwithoutSellCMUVariant" => "X11mit", + "X11mitwithoutSellCMUVariant" => "X11mit", + "X11mitwithoutSellandNoDocumentationRequi" => "X11mit", + "MITvar3" => "X11mit", + "MITvar2" => "X11mit", + "MIT" => "X11mit", + "ZLIBref" => "ZLIB", + "BSD3NoWarranty" => "BSD3", + "BSD2EndorseInsteadOfBinary" => "BSD2", + "BSD2var2" => "BSD2", + "LesserGPLv2" => "LibraryGPLv2", + "LesserGPLv2+" => "LibraryGPLv2+", + "orLGPLVer2.1" => "LesserGPLVer2.1", + "postgresqlRef" => "postgresql", + ); + +while (<>) { + chomp; + my @f = split(/;/); + # first remove duplicates + + my $l = $f[1]; + + # do a simple rewriting of this exception which is an incomplete license + + $l =~ s/^Exception$/UNKNOWN/; + + my @l = split(/,/,$l); + my %lics = %{{ map { $_ => 1 } @l }}; + + %lics = Do_Equivalent(%lics); + %lics = Remove_Redundant(%lics); + %lics = Do_Exceptions(%lics); + + my @out = sort keys %lics; + + my $t = join(',', @out); + if ($t eq "") { + $t = "UNKNOWN"; + } + print $f[0], ";$t\n"; +} + +sub Do_Exceptions +{ + my (%lics) = @_; + + if ($lics{'digiaQTExceptionNoticeVer1.1'} ne '' and $lics{'Qt'}) { + delete $lics{'digiaQTExceptionNoticeVer1.1'}; + delete $lics{'Qt'}; + $lics{'Qt-qtExcep'} = 'Qt-qtExcep'; + } + if ($lics{'BisonException'} ne "" and $lics{"GPLv3+"} ne "") { + delete $lics{'BisonException'}; + delete $lics{"GPLv3+"}; + $lics{'GPLv3+-bisonExcep'} = 'GPLv3+-bisonExcep'; + } + if ($lics{'BisonException'} ne "" and $lics{"GPLv2+"} ne "") { + delete $lics{'BisonException'}; + delete $lics{"GPLv2+"}; + $lics{'GPLv2+-bisonExcep'} = 'GPLv2+-bisonExcep'; + } + if ($lics{'BisonException'} ne "" and $lics{"GPLv2"} ne "") { + delete $lics{'BisonException'}; + delete $lics{"GPLv2"}; + $lics{'GPLv2-bisonExcep'} = 'GPLv2-bisonExcep'; + } + if ($lics{'ClassPathException'} ne "" and $lics{"GPLv2"} ne "") { + delete $lics{'ClassPathException'}; + delete $lics{"GPLv2"}; + $lics{"GPLv2-classPathExcep"} = "GPLv2-classPathExcep"; + } + if ($lics{'CDDLorGPLv2'} ne "" and $lics{"ClassPathExceptionGPLv2"} ne "") { + delete $lics{'CDDLorGPLv2'}; + delete $lics{"ClassPathExceptionGPLv2"}; + $lics{'CDDLorGPLv2-classPathExcep'} = 'CDDLorGPLv2-classPathExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv3+"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv3+"}; + $lics{'GPLv3+-linkExcep'} = 'GPLv3+-linkExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv2+"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv2+"}; + $lics{'GPLv2+-linkExcep'} = 'GPLv2+-linkExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv3"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv3"}; + $lics{'GPLv3-linkExcep'} = 'GPLv3-linkExcep'; + } + if ($lics{'LinkException'} ne "" and $lics{"GPLv2"} ne "") { + delete $lics{'LinkException'}; + delete $lics{"GPLv2"}; + $lics{'GPLv2-linkExcep'} = 'GPLv2-linkExcep'; + } + + return %lics; + +} + +sub Remove_Redundant +{ + my (%lics) = @_; + + if ($lics{"GPLnoVersion"} ne "" and $lics{"GPLv2"} . $lics{"GPLv2+"} .$lics{"GPLv3"} . $lics{"GPLv3+"} ne "") { + delete $lics{"GPLnoVersion"}; + } + if ($lics{"GPLv2+"} ne "" and $lics{"GPLv3+"} ne "") { + delete $lics{"GPLv2+"}; + } + if ($lics{'MPL1_1andLGPLv2_1'} ne "" and $lics{"MPLv1_1"} ne "") { + delete $lics{"MPLv1_1"}; + } + + + return %lics; + +} + +sub Do_Equivalent +{ + my (%lics) = @_; + my %outA; + + # then normalize licenses + foreach my $a (keys %lics) { + next if $a eq "SeeFile"; + if ($equiv{$a} ne "") { + $outA{$equiv{$a}} = $equiv{$a}; + } else { + $outA{$a} = $a; + } + } + return %outA; + +} + + +sub uniq { + return keys %{{ map { $_ => 1 } @_ }}; +} -- cgit v1.2.1 From 3493612d6b453df4fe7154843ee877f5812a5d2c Mon Sep 17 00:00:00 2001 From: dmg Date: Tue, 29 Jul 2014 23:26:28 -0700 Subject: added excel processor --- ninka-excel.pl | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ninka-sqlite.pl | 5 +- 2 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 ninka-excel.pl diff --git a/ninka-excel.pl b/ninka-excel.pl new file mode 100644 index 0000000..bfa59c6 --- /dev/null +++ b/ninka-excel.pl @@ -0,0 +1,175 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use File::Temp; +use File::Find; +use File::Basename; +use Scalar::Util qw(looks_like_number); +use Spreadsheet::WriteExcel; + + + +if(scalar(@ARGV) != 2){ + print STDERR "Incorrect number of arguments\n"; + print STDERR "Correct usage is: perl ninka-wrapper \n"; + exit 1; +} + +my $path = $0; + +$path =~ s/\/+[^\/]+$//; +if ($path eq "") { + $path = "./"; +} + +my ($pack, $excelFile) = @ARGV; + +my $workbook = Spreadsheet::WriteExcel->new($excelFile); +my $worksheet = $workbook->add_worksheet(); +my $format = $workbook->add_format(); # Add a format +$format->set_bold(); +$format->set_color('blue'); +$format->set_align('center'); + +$worksheet->set_column(0, 9, 30); +$worksheet->write(0, 0, 'Container File', $format); +$worksheet->write(0, 1, 'Path', $format); +$worksheet->write(0, 2, 'Filename', $format); +$worksheet->write(0, 3, 'Licenses', $format); +$worksheet->write(0, 4, 'Num found', $format); +$worksheet->write(0, 5, 'Lines', $format); +$worksheet->write(0, 6, 'TokensIgnored', $format); +$worksheet->write(0, 7, 'TokensUnmatched', $format); +$worksheet->write(0, 8, 'TokensUnknown', $format); +$worksheet->write(0, 9, 'Tokens', $format); + +my $tempdir = File::Temp->newdir(); +my $dirname = $tempdir->dirname; + +print "***** Extracting file [$pack] to temporary directory [$dirname] *****\n"; +my $packext = getExtension($pack); +if ($packext eq ".bz2" || $packext eq ".gz") { + execute("tar -xvf '$pack' --directory '$dirname'"); +} elsif ($packext eq ".jar" || $packext eq ".zip") { + execute("unzip -d $dirname $pack"); +} else { + print "ninka-wrapper does not support packages with extension [$packext]\n"; +} + +my @files; +find( + sub { push @files, $File::Find::name unless -d; }, + $dirname +); + +print "***** Beginning Execution of Ninka *****\n"; +foreach my $file (@files) { + print "Running ninka on file [$file]\n"; + execute("perl ${path}/ninka.pl '$file'"); +} + +my @ninkafiles; +find( + sub { + my $ext = getExtension($File::Find::name); + if($ext =~ m/(comments|sentences|goodsent|badsent|senttok|license)$/){ + push @ninkafiles, $File::Find::name; + } + }, + $dirname +); + +print "***** Entering Ninka Data into excell file [$excelFile] *****\n"; +my $row = 1; + +foreach my $file (@ninkafiles) { + + my $filepath = dirname($file); + $filepath =~ s/$dirname//; + my $basefile = basename($file); + my $rootfile = removeExtension($basefile); + my $packname = basename($pack); + + #Read entire file into a string + open (my $fh, '<', $file) or die "Can't open file $!"; + my $filedata = do { local $/; <$fh> }; + + my $sth; + switch (getExtension($basefile)){ + case ".comments" { + ; + } + case ".sentences" { + ; + } + case ".goodsent" { + ; + } + case ".badsent" { + ; + } + case ".senttok" { + ; + } + case ".license" { + print "Inserting [$basefile] into table licenses\n"; + my @columns = parseLicenseData($filedata); + + $worksheet->write($row, 0, $packname); + $worksheet->write($row, 1, $filepath); + $worksheet->write($row, 2, $rootfile); + + my $originalFile = $file; + $originalFile =~ s/\.license$//; + + if (-T $originalFile) { + foreach my $i (0..7) { + $worksheet->write($row, $i+3, $columns[$i]); + } + } else { + $worksheet->write($row, 3, "Binary File"); + } + + $row++; + } + } + close($fh); +} + +$workbook->close(); + +sub parseLicenseData { + my ($data) = @_; + chomp($data); + my @columns; + my @fields = split(';', $data); + if($fields[0] eq "NONE\n"){ + @columns = '' x 7; + @columns[0] = 'NONE'; + } else { + @columns = @fields; + } + return @columns; +} + +sub getExtension { + my ($file) = @_; + my $filename = basename($file); + my ($ext) = $filename =~ /(\.[^.]+)$/; + return $ext; +} + +sub removeExtension { + my ($file) = @_; + (my $filename = $file) =~ s/\.[^.]+$//; + return $filename; +} + +sub execute { + my ($command) = @_; + my $output = `$command`; + my $status = ($? >> 8); + die "execution of [$command] failed: status [$status]\n" if ($status != 0); + return $output; +} diff --git a/ninka-sqlite.pl b/ninka-sqlite.pl index eee0c0f..49b7576 100644 --- a/ninka-sqlite.pl +++ b/ninka-sqlite.pl @@ -23,7 +23,7 @@ if ($path eq "") { my ($pack, $db) = @ARGV; -my $dbh = DBI->connect("DBI:SQLite:dbname=$db", "", "", {RaiseError => 1}) +my $dbh = DBI->connect("DBI:SQLite:dbname=$db", "", "", {RaiseError => 1, AutoCommit => 0}) or die $DBI::errstr; $dbh->do("CREATE TABLE IF NOT EXISTS comments (filename TEXT, path TEXT, container TEXT, content TEXT, @@ -136,7 +136,8 @@ foreach my $file (@ninkafiles) { close($fh); } -$dbh->disconnect; +$dbh->commit(); +$dbh->disconnect(); sub parseLicenseData { my ($data) = @_; -- cgit v1.2.1 From e4882adf2adf0e8e899a4efda5f48a26ed982dcf Mon Sep 17 00:00:00 2001 From: dmg Date: Sat, 2 Aug 2014 00:49:15 -0700 Subject: updated the excel extractor to skip binary files, added license and author --- ninka-excel.pl | 102 ++++++++++++++++++++++++++------------------------------- 1 file changed, 47 insertions(+), 55 deletions(-) diff --git a/ninka-excel.pl b/ninka-excel.pl index bfa59c6..c06c830 100644 --- a/ninka-excel.pl +++ b/ninka-excel.pl @@ -1,4 +1,20 @@ #!/usr/bin/perl +# +# Copyright (C) 2014 Anthony Kohan and Daniel M. German +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# use strict; use Switch; @@ -65,76 +81,52 @@ find( print "***** Beginning Execution of Ninka *****\n"; foreach my $file (@files) { - print "Running ninka on file [$file]\n"; - execute("perl ${path}/ninka.pl '$file'"); + if (-T $file) { + print "Running ninka on file [$file]\n"; + execute("perl ${path}/ninka.pl '$file'"); + } } -my @ninkafiles; -find( - sub { - my $ext = getExtension($File::Find::name); - if($ext =~ m/(comments|sentences|goodsent|badsent|senttok|license)$/){ - push @ninkafiles, $File::Find::name; - } - }, - $dirname -); print "***** Entering Ninka Data into excell file [$excelFile] *****\n"; my $row = 1; -foreach my $file (@ninkafiles) { +foreach my $file (@files) { my $filepath = dirname($file); $filepath =~ s/$dirname//; - my $basefile = basename($file); - my $rootfile = removeExtension($basefile); + my $basefile = fileparse($file, ()); my $packname = basename($pack); #Read entire file into a string - open (my $fh, '<', $file) or die "Can't open file $!"; - my $filedata = do { local $/; <$fh> }; + my $filename = "${file}.license"; - my $sth; - switch (getExtension($basefile)){ - case ".comments" { - ; - } - case ".sentences" { - ; - } - case ".goodsent" { - ; - } - case ".badsent" { - ; - } - case ".senttok" { - ; - } - case ".license" { - print "Inserting [$basefile] into table licenses\n"; - my @columns = parseLicenseData($filedata); - - $worksheet->write($row, 0, $packname); - $worksheet->write($row, 1, $filepath); - $worksheet->write($row, 2, $rootfile); - - my $originalFile = $file; - $originalFile =~ s/\.license$//; - - if (-T $originalFile) { - foreach my $i (0..7) { - $worksheet->write($row, $i+3, $columns[$i]); - } - } else { - $worksheet->write($row, 3, "Binary File"); - } - - $row++; + $worksheet->write($row, 0, $packname); + $worksheet->write($row, 1, $filepath); + $worksheet->write($row, 2, $basefile); + + print "Inserting [$basefile] into table spreedsheet\n"; + + if (-T $filename) { + + open (my $fh, '<', $filename) or die "Can't open file $!"; + my $filedata = do { local $/; <$fh> }; + + my @columns = parseLicenseData($filedata); + + + my $originalFile = $file; + $originalFile =~ s/\.license$//; + + foreach my $i (0..7) { + $worksheet->write($row, $i+3, $columns[$i]); } + close($fh); + + } else { + $worksheet->write($row, 3, "Binary File"); } - close($fh); + $row++; } $workbook->close(); -- cgit v1.2.1 From f548eaf81b402994622ac6488a5868f2727bc065 Mon Sep 17 00:00:00 2001 From: Daniel M German Date: Sat, 10 Jan 2015 01:21:38 -0800 Subject: updated some variants of licenses... MIT variants need to be improved --- ChangeLog | 23 +++++++++++++---------- filter/criticalword.dict | 1 + senttok/licensesentence.dict | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9e3a70e..67b9191 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2015-01-05 dmg + + * filter/criticalword.dict: MIT+no-false-attribs license missing one sentence + + * senttok/licensesentence.dict (MITpermissionExtrict): there was a space where no space should have been + 2014-06-09 dmg * Version 1.2 @@ -13,15 +19,15 @@ 2013-07-03 dmg * Version to 1.1 RC2 - - * Added qt variants and fixed more GPL related unknowns. + + * Added qt variants and fixed more GPL related unknowns. * When the license is unknown "UNKNOWN" is printed. 2013-07-02 dmg * Added version to ninka.pl, set to 1.1 RC1 - + * matcher/rules.dict: Added CDDLorGPLv2, CPLv1orGPLv2+orLGPLv2, QtorGPLv2orv3 * senttok/licensesentence.dict: the boost license sometimes contains just the URL refernece, fixed several regressions. @@ -56,7 +62,7 @@ * matcher/rules.dict: Fixed a bug in the sunRPM license. It wasn't detected. - * filter/criticalword.dict: strenghten some rules to avoid some + * filter/criticalword.dict: strenghten some rules to avoid some false positives in good sentences. 2011-01-28 @@ -76,11 +82,11 @@ 2010-11-10 dmg * Released pre-release version 1.0-pre2 - + * Added path to the files in the tar file. - + * README: Fixed errors (patch by Armijn Hemel) - + * extComments/extComments.pl (Determine_Comments_Extractor): Added the option to not use the comments extractor. @@ -91,6 +97,3 @@ 2010-08-07 dmg * Prepared the release to the public. - - - diff --git a/filter/criticalword.dict b/filter/criticalword.dict index 477c32b..7e51f45 100755 --- a/filter/criticalword.dict +++ b/filter/criticalword.dict @@ -131,3 +131,4 @@ computer program whose purpose disclaims copyright software is covered Copyright +shall be modified such diff --git a/senttok/licensesentence.dict b/senttok/licensesentence.dict index 4216275..0d7d287 100644 --- a/senttok/licensesentence.dict +++ b/senttok/licensesentence.dict @@ -220,7 +220,7 @@ BSDlikeRef:10:1:^Use of this source code is governed by a BSD\-style license tha MITname:10:0:MIT License -MITpermissionExtrict:10:0:Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files \(the Software \), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +MITpermissionExtrict:10:0:Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files \(the Software\), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: X11noticeExtrict:10:0:The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software X11asIsExtrict:10:0:THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT: X11asLiableExtrict:10:0:IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE -- cgit v1.2.1 From 9262f0100f567548a67e2c5fbdb36ef5c59002ba Mon Sep 17 00:00:00 2001 From: Daniel M German Date: Sat, 10 Jan 2015 02:27:55 -0800 Subject: added support for excel and sqlite. bumped version to 1.3 RC1 --- ChangeLog | 10 ++++++++++ README.TXT | 27 +++++++++++++++++---------- ninka-excel.pl | 6 ++++-- ninka-sqlite.pl | 22 +++++++++++++++++++++- ninka.pl | 20 +++++++++----------- 5 files changed, 61 insertions(+), 24 deletions(-) mode change 100644 => 100755 ninka-excel.pl mode change 100644 => 100755 ninka-sqlite.pl diff --git a/ChangeLog b/ChangeLog index 67b9191..f04ac65 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2015-01-10 dmg + + * Bumped to version 1.3 RC 1 + + * ninka-sqlite.pl, ninka-excel.pl: prepared them for release + + * ninka.pl: fixed bug in finding the path of where ninka was being executed from (reported by Ryan Biesemeyer) + + * Fixed quotes in perl (René bScheibe) + 2015-01-05 dmg * filter/criticalword.dict: MIT+no-false-attribs license missing one sentence diff --git a/README.TXT b/README.TXT index 565465c..6dc14d9 100644 --- a/README.TXT +++ b/README.TXT @@ -1,4 +1,6 @@ -* Contact information. +-*-org-*- + +* Contact information. Any feedback will be appreciated. You can email us at Daniel M. German and Yuki Manabe @@ -25,11 +27,16 @@ http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf If you use Ninka for research purposes, we would appreciate you cite the above paper. +* Contributors + +- Anthony Kohan for writing the excel and sqlite backends. +- Armijn Hemel from Tjaldur Software Governance Solutions for multiple bug reports and suggestions + * License - + Except for the directories comments and splitter, Ninka is licensed under the GPLv2+ - + Copyright (C) 2009-2010 Yuki Manabe and Daniel M. German This program is free software: you can redistribute it and/or modify @@ -69,7 +76,7 @@ Perl version 5 Ninka uses a pipe model (see below). Each step of the "pipe" creates a file, but -ninka.pl [options] [filename] +ninka.pl [options] [filename] Available options -v verbose @@ -107,26 +114,26 @@ It will create five files: - Licenses - Unmatched sentences in *.senttok that were not matched - + * Ninka model Ninka uses a pipe-model. Each stage of the pipe does something very specific: - 1. Comment extractor. + 1. Comment extractor. - directory: extComments - command: extComments.pl, might use comments (included in distribution) - + - Purpose: Extracts top comments of source code. If no comment extractor is known for the language, then extracts top lines from source (currently 700) - Creates .comments file 2. Split sentences in comments - + - directory: splitter - command: splitter.pl @@ -151,7 +158,7 @@ Ninka uses a pipe-model. Each stage of the pipe does something very specific: 4. Tokenizes sentences - Directory senttok - + - command: senttok.pl - Purpose: It creates a file that corresponds to the recognized @@ -168,7 +175,7 @@ Ninka uses a pipe-model. Each stage of the pipe does something very specific: - Purpose: looks at the sequence of sentence tokens and outputs the licenses found - Output: .license - + The script ninka.pl takes care of all these steps, and optionally removes intermediary files, and writes to the stdout the licenses found. diff --git a/ninka-excel.pl b/ninka-excel.pl old mode 100644 new mode 100755 index c06c830..71adddf --- a/ninka-excel.pl +++ b/ninka-excel.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl # -# Copyright (C) 2014 Anthony Kohan and Daniel M. German +# Copyright (C) 2014,2015 Anthony Kohan and Daniel M. German # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as @@ -27,8 +27,10 @@ use Spreadsheet::WriteExcel; if(scalar(@ARGV) != 2){ + print STDERR "Ninka 1.3. sqlite wrapper\n"; + print STDERR "Processes package file (.tar.gz, zip, jar. etc) and outputs to excel file\n"; print STDERR "Incorrect number of arguments\n"; - print STDERR "Correct usage is: perl ninka-wrapper \n"; + print STDERR "Correct usage is: $0 \n"; exit 1; } diff --git a/ninka-sqlite.pl b/ninka-sqlite.pl old mode 100644 new mode 100755 index 49b7576..65d746a --- a/ninka-sqlite.pl +++ b/ninka-sqlite.pl @@ -1,4 +1,20 @@ #!/usr/bin/perl +# +# Copyright (C) 2014,2015 Anthony Kohan and Daniel M. German +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# use strict; use Switch; @@ -8,9 +24,13 @@ use File::Find; use File::Basename; use Scalar::Util qw(looks_like_number); + + if(scalar(@ARGV) != 2){ + print STDERR "Ninka 1.3. sqlite wrapper\n"; + print STDERR "Processes package file (.tar.gz, zip, jar. etc) and outputs to sqlite file\n"; print STDERR "Incorrect number of arguments\n"; - print STDERR "Correct usage is: perl ninka-wrapper \n"; + print STDERR "Correct usage is: $0 \n"; exit 1; } diff --git a/ninka.pl b/ninka.pl index b2fcd9a..3987a41 100755 --- a/ninka.pl +++ b/ninka.pl @@ -18,10 +18,11 @@ use strict; use Getopt::Std; +use File::Basename; my %opts = (); if (!getopts ("vfCcSsGgTtLd",\%opts) or scalar(@ARGV) == 0) { -print STDERR "Ninka version 1.2 +print STDERR "Ninka version 1.3 Usage $0 -fCtTvcgsGd @@ -42,7 +43,7 @@ Usage $0 -fCtTvcgsGd -L force creation of matching - -d delete intermediate files + -d delete intermediate files \n"; @@ -55,14 +56,12 @@ my $verbose = exists $opts{v}; my $delete = exists $opts{d}; #$delete = 1; -my $path = $0; +my $path = dirname($0); -$path =~ s/\/+[^\/]+$//; if ($path eq "") { $path = "./"; } - my $force = exists $opts{f}; my $forceGood = exists $opts{G}; my $forceSentences = exists $opts{S}; @@ -97,23 +96,23 @@ if (not (-f $original)) { } -Do_File_Process($original, $commentsFile, ($force or $forceComments), +Do_File_Process($original, $commentsFile, ($force or $forceComments), "$path/extComments/extComments.pl -c1 ${f}", "Creating comments file", exists $opts{c}); -Do_File_Process($commentsFile, $sentencesFile, ($force or $forceSentences), +Do_File_Process($commentsFile, $sentencesFile, ($force or $forceSentences), "$path/splitter/splitter.pl ${commentsFile}", "Splitting sentences", exists $opts{s} ); -Do_File_Process( $sentencesFile, $goodsentFile, ($force or $forceGood), +Do_File_Process( $sentencesFile, $goodsentFile, ($force or $forceGood), "$path/filter/filter.pl ${sentencesFile}", "Filtering good sentences", exists $opts{s} ); -Do_File_Process($goodsentFile, $sentokFile, ($force or $forceSentok), +Do_File_Process($goodsentFile, $sentokFile, ($force or $forceSentok), "$path/senttok/senttok.pl ${goodsentFile} > ${sentokFile}", "Matching sentences against rules", exists $opts{t} ); @@ -147,7 +146,7 @@ sub Do_File_Process print "Running ${cmd}:" if ($verbose); execute($cmd); } else { - print "File [$output] newer than input [$input], not creating:" if ($verbose); + print "File [$output] newer than input [$input], not creating:" if ($verbose); } if ($end) { print "Existing after $message" if $verbose; @@ -180,4 +179,3 @@ sub newer return 1; } } - -- cgit v1.2.1 From b7472a8179ca8e280f97aae34da5640c2aa1d239 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Sat, 10 Jan 2015 20:34:48 +0100 Subject: README.TXT: document requirements for excel and sqlite wrappers --- README.TXT | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.TXT b/README.TXT index 6dc14d9..bbea335 100644 --- a/README.TXT +++ b/README.TXT @@ -62,7 +62,11 @@ the above paper. * Requirements -Perl version 5 +- Perl version 5 or above +- for ninka-excel.pl: Perl module Spreadsheet::WriteExcel + https://metacpan.org/release/Spreadsheet-WriteExcel/ +- for ninka-sqlite.pl: Perl module DBD::SQLite + https://metacpan.org/release/DBD-SQLite * How to install -- cgit v1.2.1