summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordmg <dmg@uvic.ca>2014-04-06 21:36:32 -0700
committerDaniel M German <dmg@uvic.ca>2015-01-10 02:24:27 -0800
commit39a64e3f5db4a8d023ee7a9ee0be4d04d0bf814e (patch)
treec4fa837a75260b7f058a1364e17718ab4a6ab05b
parent6a536fe1daee1847a8666b620ac59743253d2ccd (diff)
downloadninka-39a64e3f5db4a8d023ee7a9ee0be4d04d0bf814e.tar.gz
escaped aposthrophes
-rwxr-xr-xextComments/extComments.pl33
-rwxr-xr-xmatcher/matcher.pl52
-rw-r--r--matcher/rules.dict31
-rwxr-xr-xninka.pl24
4 files changed, 78 insertions, 62 deletions
diff --git a/extComments/extComments.pl b/extComments/extComments.pl
index 8fbca63..d8643d6 100755
--- a/extComments/extComments.pl
+++ b/extComments/extComments.pl
@@ -42,6 +42,14 @@ print STDERR "Usage $0 -v
my $f = $ARGV[0];
+my $original = $f;
+
+$f =~ s/'/\\'/g;
+$f =~ s/\$/\\\$/g;
+$f =~ s/;/\\;/g;
+$f =~ s/ /\\ /g;
+
+
#die "illegal file [$f]" if $f =~ m@/\.@;
my $numberComments = 1;
@@ -49,32 +57,35 @@ $numberComments = $opts{c} if exists $opts{c};
my $verbose = 1;
$verbose = exists $opts{v};
-
-
-if (get_size($f) == 0) {
+if (get_size($original) == 0) {
print STDERR "Empty file, just exit\n" if $verbose;
exit 0; # nothing to report, just end
}
+
+
+
+
my $commentsCmd = Determine_Comments_Extractor($f);
execute("$commentsCmd");
if ($commentsCmd =~ /^comments/ and
get_size("${f}.comments") == 0){
- `cat '$f' | head -700 > ${f}.comments`;
+ `cat $f | head -700 > ${f}.comments`;
}
exit 0;
-sub Determine_Comments_Extractor
+sub Determine_Comments_Extractor
{
my ($f) = @_;
+
if ($f =~ /\.([^\.]+)$/) {
my $ext= $1;
- if ($ext =~ /^(pl|pm|py)$/
+ if ($ext =~ /^(pl|pm|py)$/
) {
########################
# for the time being, let us just extract the top 400 lines
@@ -84,21 +95,21 @@ sub Determine_Comments_Extractor
} elsif ($ext eq 'jl' or
$ext eq 'el'
) {
- return "cat '$f' | head -400 > '${f}.comments'";
-# return "$path/hashComments.pl -p ';' '$f'";;
+ return "cat $f | head -400 > ${f}.comments";
+# return "$path/hashComments.pl -p ';' $f";;
} elsif ($ext =~ /^(java|c|cpp|h|cxx|c\+\+|cc)$/ ) {
my $comm = `which comments`;
if ($comm ne '') {
return "comments -c1 '$f' 2> /dev/null";
} else {
- return "cat '$f' | head -400 > '${f}.comments'";
+ return "cat $f | head -400 > ${f}.comments";
}
} else {
- return "cat '$f' | head -700 > '${f}.comments'";
+ return "cat $f | head -700 > ${f}.comments";
}
} else {
print "\n>>>>>>>>>>>>>>>>>>>>>\n";
- return "cat '$f' | head -700 > '${f}.comments'";
+ return "cat $f | head -700 > ${f}.comments";
}
}
diff --git a/matcher/matcher.pl b/matcher/matcher.pl
index aa01f1a..5e4422e 100755
--- a/matcher/matcher.pl
+++ b/matcher/matcher.pl
@@ -20,7 +20,7 @@
#
# matchter.pl
#
-# This script use a set of license sentence name as input
+# This script use a set of license sentence name as input
# and output license name corresponds to a rule which match the set.
#
# author: Yuki Manabe
@@ -64,6 +64,9 @@ $NonCriticalRules{'LesserGPLv2.1'} = [@gplNonCritical];
$NonCriticalRules{'LGPLv2orv3'}= [@gplNonCritical];
$NonCriticalRules{'LesserGPLv2'} = [@gplNonCritical];
$NonCriticalRules{'LesserGPLv2+'} = [@gplNonCritical];
+$NonCriticalRules{'GPLVer2.1or3KDE+'} = [@gplNonCritical];
+$NonCriticalRules{'LGPLVer2.1or3KDE+'} = [@gplNonCritical];
+
$NonCriticalRules{'GPLv2+'} = [@gplNonCritical];
$NonCriticalRules{'GPLv2'} = [@gplNonCritical];
@@ -75,8 +78,8 @@ $NonCriticalRules{'AGPLv3'} = [@gplNonCritical, 'AGPLreceivedVer0','AGPLseeVer0'
$NonCriticalRules{'AGPLv3+'} = [@gplNonCritical, 'AGPLreceivedVer0','AGPLseeVer0'];
$NonCriticalRules{'GPLnoVersion'} = [@gplNonCritical];
-$NonCriticalRules{'Apachev1.1'} = ['ApacheLic1_1'];
-$NonCriticalRules{'Apachev2'} = ['ApachePre','ApacheSee'];
+$NonCriticalRules{'Apache-1.1'} = ['ApacheLic1_1'];
+$NonCriticalRules{'Apache-2'} = ['ApachePre','ApacheSee'];
$NonCriticalRules{'LibGCJLic'} = ['LibGCJSee'];
$NonCriticalRules{'CDDLicV1'} = ['Compliance','CDDLicWhere','ApachesPermLim','CDDLicIncludeFile','UseSubjectToTerm', 'useOnlyInCompliance'];
@@ -106,9 +109,8 @@ $NonCriticalRules{'MPLv1_1'} = ['licenseBlockBegin','MPLsee','Copyright','licens
$NonCriticalRules{'MPL1_1andLGPLv2_1'} = ['MPLoptionIfNotDelete2licsVer0','MPL_LGPLseeVer0'];
$NonCriticalRules{'FreeType'} = ['FreeTypeNotice'];
+$NonCriticalRules{'boostV1'} = ['boostSeev1', 'SeeFile'];
-$NonCriticalRules{'GPLVer2.1or3KDE+'} = [@gplNonCritical];
-$NonCriticalRules{'LGPLVer2.1or3KDE+'} = [@gplNonCritical];
# initialize
@@ -178,7 +180,7 @@ Match_License();
my $match = 0;
for (my $i=0;$i<=$#licSentNames ;$i++) {
- if ($licSentNames[$i] == 0 and
+ if ($licSentNames[$i] == 0 and
($licSentNames[$i] ne 'UNKNOWN' and
$licSentNames[$i] ne '')) {
# print "[$licSentNames[$i]]\n";
@@ -199,9 +201,9 @@ if ($match > 0) {
#print $interRuleList[$i][0];
@licSentNames = map { $_ eq $interRuleList[$i][0] ? $interRuleList[$i][1] : $_ } @licSentNames;
}
-
+
$senttok= join(',',@licSentNames) . ',';
-
+
Match_License();
}
@@ -284,7 +286,7 @@ sub Read_Original
my ($inputF, $tokens, $originals) = @_;
open (INPUTFILE, $inputF) or die ("Error: $inputF is not found.");
-
+
my $sentence;
my @original;
while ($sentence = <INPUTFILE>){
@@ -298,19 +300,19 @@ sub Read_Original
print "NONE\n";
exit 0;
}
-
+
#print join(';',@licSentNames)."\n";
-
+
close INPUTFILE;
}
sub Match_License
{
-
+
# create a string with the sentences
-
+
for (my $j=0;$j<=$#rulelist;$j++){
-
+
my $rule=$rulelist[$j][1];
my $rulename=$rulelist[$j][0];
my $lenRule = scalar(split(',', $rule));
@@ -324,7 +326,7 @@ sub Match_License
# print "\n";
}
}
-
+
# print ">>>>[$senttok]\n";
my $onlyAllRight = 0;
@@ -333,7 +335,7 @@ sub Match_License
#print STDERR "Ending>>>>>>>$senttok\n";
#print STDERR 'Size>>' , scalar(@result), "\n";
#print STDERR 'Result>>', join(',', @result), "\n";
-
+
# let us remove allrights
# my $onlyAllRight = 1;
# for my $i (0.. scalar(@licSentNames)-1){
@@ -347,9 +349,9 @@ sub Match_License
# output result
if (scalar(@result) > 0){
# at this point we have matched
-
-
- # let us clean up the rules... let us print the matched rules, and the
+
+
+ # let us clean up the rules... let us print the matched rules, and the
# if (grep(/GPL/, @result)) {
# print "GPL...\n";
# foreach my $r ($NonCriticalRules{GPL}) {
@@ -357,7 +359,7 @@ sub Match_License
# }
# }
# general removal of rules
-
+
foreach my $r (@generalNonCritical) {
while ($senttok =~ s/,$r,/,-1,/) {
@@ -365,7 +367,7 @@ sub Match_License
}
}
# print "[$senttok]\n";
-
+
foreach my $res (@result) {
my $temp = $NonCriticalRules{$res};
foreach my $r (@$temp) {
@@ -375,7 +377,7 @@ sub Match_License
}
}
}
-# print "[$senttok]\n";
+# print "[$senttok]\n";
}
}
@@ -392,7 +394,7 @@ sub Print_Result
my @sections = split(',', $senttok);
die 'assertion 1' if $sections[0] ne '';
die 'assertion 2' if $sections[scalar(@sections)] ne '';
-
+
my $ignoredLines = 0;
my $licenseLines = 0;
my $unknownLines = 0;
@@ -410,7 +412,7 @@ sub Print_Result
}
}
$senttok =~ s/^,(.*),$/$1/;
-
+
# print "$ignoredLines > $licenseLines > $unknownLines > $unmatchedLines\n";
if (scalar (@result) == 0) {
print 'UNKNOWN';
@@ -419,5 +421,5 @@ sub Print_Result
}
print ";$countMatches;$licenseLines;$ignoredLines;$unmatchedLines;$unknownLines;$senttok\n";
$senttok = $save;
-
+
}
diff --git a/matcher/rules.dict b/matcher/rules.dict
index 1055eca..28d274a 100644
--- a/matcher/rules.dict
+++ b/matcher/rules.dict
@@ -19,8 +19,6 @@
GPL2orBSD3:BSDpre,BSDcondSourceExtrict,BSDcondBinaryExtrict,BSDcondEndorse,Altern,GPLGenVer2,BSDasIsExtrict,BSDWarrExtrict
-
-
GPLv2:GPLv2
GPLv2+:GPLv2\+
GPLv3+:GPLv3\+
@@ -134,24 +132,25 @@ openSSL:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdv,OpenSSLendorse,OpenSSLwrit
openSSLvar1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,SSLeayWindows,BSDcondAdvPart2,BSDasIs,BSDWarr,NoLicenseChangeAllowed
openSSLvar3:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,BSDasIs,BSDWarr
-Apachev1.0:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAckPart1,BSDcondAdvPart2,BSDasIs,BSDWarr
-Apachev1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdv,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAck,BSDasIs,BSDWarr
-Apachev1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr
-Apachev1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr
-Apachev1.1:BSDpre,BSDcondSourceExtrict,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr
+Apache-1.0:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAckPart1,BSDcondAdvPart2,BSDasIs,BSDWarr
+Apache-1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdv,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAck,BSDasIs,BSDWarr
+Apache-1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr
+Apache-1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr
+Apache-1.1:BSDpre,BSDcondSourceExtrict,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr
# this is dangerous. The v2 applies to apache (ninka-ism)
-ApacheV2orLGPLgeneric:apacheAndLGPLgenVer2.0
+Apache-2orLGPLgeneric:apacheAndLGPLgenVer2.0
SleepyCat:BSDpre,BSDcondSource,BSDcondBinary,SleepyCatObtain,SleepyCatSourceIncluded,SleepyCatSourceComplete,SleepyCatDoesNotInclude,SleepyCatAsIs,BSDWarr
-boost:boostPermission,boostPreserve,boostAsIs,boostWarr
-boostV1:boostRefv1
-boostV1Ref:boostSeev1
+boost-1:boostPermission,boostPreserve,boostAsIs,boostWarr
+boost-1:boostRefv1
+boost-1ref:boostSeev1
+
SSLeay:SSLCopy,SSLeayAttrib,SSLeayAdType,BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvRULE,SSLeayCrypto,SSLeayWindows,BSDasIs,BSDWarr,SSLeayCantChangeLic
# qt
-Qt:qtCommercialuseVer0
+QPLt:qtCommercialuseVer0
orLGPLVer2.1:qtLGPLVer2.1
orLGPLVer2:qtLGPLv2
orGPLv3:qtGPLVer3.0
@@ -171,7 +170,7 @@ Cecill:CecillEn1,CecillEn2,CecillEn3,CecillEn4,CecillEn5,CecillEn6,CecillEn7,Cec
SimpleOnlyKeepCopyright:SimpleOnlyKeepCopyright
#------------
-QTv1:QTv1
+QPLv1:QTv1
CDDLv1orGPLv2:CDDLorGPLv2ifYouWish,CDDLorGPLv2IfYouAdd
CDDLorGPLv2:CDDLorGPLVer2
@@ -233,8 +232,8 @@ EPLv1:EPLv1
CDDLic:CDDLic
CDDLicV1:CDDLicV1Only
#----------------------------------------------------------------------
-Apachev2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2,ApachesAsIs,ApachesPermLim
-Apachev2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2
+Apache-2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2,ApachesAsIs,ApachesPermLim
+Apache-2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2
# publid comain
publicDomain:publicDomain
@@ -312,7 +311,7 @@ SameAsPerl:SameAsPerl
ArtisticLicensev1:ArtisticLicensev1
# QT triple license, outdated :)
-QtGPLv2or3:qtGPLv2or3
+QplGPLv2or3:qtGPLv2or3
FreeType:FreeType
##
diff --git a/ninka.pl b/ninka.pl
index a74dc83..5117630 100755
--- a/ninka.pl
+++ b/ninka.pl
@@ -74,9 +74,13 @@ my $forceLicense = exists $opts{L};
my $f = $ARGV[0];
-my $original = $f;
+my $original = $f;
+$f =~ s/'/\\'/g;
+$f =~ s/\$/\\\$/g;
+$f =~ s/;/\\;/g;
+$f =~ s/ /\\ /g;
print "Starting: $original;\n" if ($verbose);
@@ -87,38 +91,38 @@ my $sentencesFile = "${f}.sentences";
my $goodsentFile = "${f}.goodsent";
my $sentokFile = "${f}.senttok";
-if (not (-f "$f")) {
- print "ERROR;[${f}] is not a file\n" ;
+if (not (-f $original)) {
+ print "ERROR;[${original}] is not a file\n" ;
exit 0;
}
Do_File_Process($original, $commentsFile, ($force or $forceComments),
- "$path/extComments/extComments.pl -c1 '${original}'",
+ "$path/extComments/extComments.pl -c1 ${f}",
"Creating comments file",
exists $opts{c});
Do_File_Process($commentsFile, $sentencesFile, ($force or $forceSentences),
- "$path/splitter/splitter.pl '${commentsFile}'",
+ "$path/splitter/splitter.pl ${commentsFile}",
"Splitting sentences", exists $opts{s}
);
Do_File_Process( $sentencesFile, $goodsentFile, ($force or $forceGood),
- "$path/filter/filter.pl '${sentencesFile}'",
+ "$path/filter/filter.pl ${sentencesFile}",
"Filtering good sentences", exists $opts{s}
);
Do_File_Process($goodsentFile, $sentokFile, ($force or $forceSentok),
- "$path/senttok/senttok.pl '${goodsentFile}' > '${sentokFile}'",
+ "$path/senttok/senttok.pl ${goodsentFile} > ${sentokFile}",
"Matching sentences against rules", exists $opts{t}
);
print "Matching ${f}.senttok against rules" if ($verbose);
-execute("$path/matcher/matcher.pl '${f}.senttok' > '${f}.license'");
+execute("$path/matcher/matcher.pl ${f}.senttok > ${f}.license");
-print `cat '${f}.license'`;
+print `cat ${f}.license`;
unlink("${f}.code");
@@ -126,7 +130,7 @@ if ($delete) {
unlink("${f}.badsent");
unlink("${f}.comments");
unlink("${f}.goodsent");
-# unlink("${f}.sentences");
+ unlink("${f}.sentences");
unlink("${f}.senttok");
}