diff options
author | dmg <dmg@uvic.ca> | 2014-04-06 21:36:32 -0700 |
---|---|---|
committer | Daniel M German <dmg@uvic.ca> | 2015-01-10 02:24:27 -0800 |
commit | 39a64e3f5db4a8d023ee7a9ee0be4d04d0bf814e (patch) | |
tree | c4fa837a75260b7f058a1364e17718ab4a6ab05b | |
parent | 6a536fe1daee1847a8666b620ac59743253d2ccd (diff) | |
download | ninka-39a64e3f5db4a8d023ee7a9ee0be4d04d0bf814e.tar.gz |
escaped aposthrophes
-rwxr-xr-x | extComments/extComments.pl | 33 | ||||
-rwxr-xr-x | matcher/matcher.pl | 52 | ||||
-rw-r--r-- | matcher/rules.dict | 31 | ||||
-rwxr-xr-x | ninka.pl | 24 |
4 files changed, 78 insertions, 62 deletions
diff --git a/extComments/extComments.pl b/extComments/extComments.pl index 8fbca63..d8643d6 100755 --- a/extComments/extComments.pl +++ b/extComments/extComments.pl @@ -42,6 +42,14 @@ print STDERR "Usage $0 -v my $f = $ARGV[0]; +my $original = $f; + +$f =~ s/'/\\'/g; +$f =~ s/\$/\\\$/g; +$f =~ s/;/\\;/g; +$f =~ s/ /\\ /g; + + #die "illegal file [$f]" if $f =~ m@/\.@; my $numberComments = 1; @@ -49,32 +57,35 @@ $numberComments = $opts{c} if exists $opts{c}; my $verbose = 1; $verbose = exists $opts{v}; - - -if (get_size($f) == 0) { +if (get_size($original) == 0) { print STDERR "Empty file, just exit\n" if $verbose; exit 0; # nothing to report, just end } + + + + my $commentsCmd = Determine_Comments_Extractor($f); execute("$commentsCmd"); if ($commentsCmd =~ /^comments/ and get_size("${f}.comments") == 0){ - `cat '$f' | head -700 > ${f}.comments`; + `cat $f | head -700 > ${f}.comments`; } exit 0; -sub Determine_Comments_Extractor +sub Determine_Comments_Extractor { my ($f) = @_; + if ($f =~ /\.([^\.]+)$/) { my $ext= $1; - if ($ext =~ /^(pl|pm|py)$/ + if ($ext =~ /^(pl|pm|py)$/ ) { ######################## # for the time being, let us just extract the top 400 lines @@ -84,21 +95,21 @@ sub Determine_Comments_Extractor } elsif ($ext eq 'jl' or $ext eq 'el' ) { - return "cat '$f' | head -400 > '${f}.comments'"; -# return "$path/hashComments.pl -p ';' '$f'";; + return "cat $f | head -400 > ${f}.comments"; +# return "$path/hashComments.pl -p ';' $f";; } elsif ($ext =~ /^(java|c|cpp|h|cxx|c\+\+|cc)$/ ) { my $comm = `which comments`; if ($comm ne '') { return "comments -c1 '$f' 2> /dev/null"; } else { - return "cat '$f' | head -400 > '${f}.comments'"; + return "cat $f | head -400 > ${f}.comments"; } } else { - return "cat '$f' | head -700 > '${f}.comments'"; + return "cat $f | head -700 > ${f}.comments"; } } else { print "\n>>>>>>>>>>>>>>>>>>>>>\n"; - return "cat '$f' | head -700 > '${f}.comments'"; + return "cat $f | head -700 > ${f}.comments"; } } diff --git a/matcher/matcher.pl b/matcher/matcher.pl index aa01f1a..5e4422e 100755 --- a/matcher/matcher.pl +++ b/matcher/matcher.pl @@ -20,7 +20,7 @@ # # matchter.pl # -# This script use a set of license sentence name as input +# This script use a set of license sentence name as input # and output license name corresponds to a rule which match the set. # # author: Yuki Manabe @@ -64,6 +64,9 @@ $NonCriticalRules{'LesserGPLv2.1'} = [@gplNonCritical]; $NonCriticalRules{'LGPLv2orv3'}= [@gplNonCritical]; $NonCriticalRules{'LesserGPLv2'} = [@gplNonCritical]; $NonCriticalRules{'LesserGPLv2+'} = [@gplNonCritical]; +$NonCriticalRules{'GPLVer2.1or3KDE+'} = [@gplNonCritical]; +$NonCriticalRules{'LGPLVer2.1or3KDE+'} = [@gplNonCritical]; + $NonCriticalRules{'GPLv2+'} = [@gplNonCritical]; $NonCriticalRules{'GPLv2'} = [@gplNonCritical]; @@ -75,8 +78,8 @@ $NonCriticalRules{'AGPLv3'} = [@gplNonCritical, 'AGPLreceivedVer0','AGPLseeVer0' $NonCriticalRules{'AGPLv3+'} = [@gplNonCritical, 'AGPLreceivedVer0','AGPLseeVer0']; $NonCriticalRules{'GPLnoVersion'} = [@gplNonCritical]; -$NonCriticalRules{'Apachev1.1'} = ['ApacheLic1_1']; -$NonCriticalRules{'Apachev2'} = ['ApachePre','ApacheSee']; +$NonCriticalRules{'Apache-1.1'} = ['ApacheLic1_1']; +$NonCriticalRules{'Apache-2'} = ['ApachePre','ApacheSee']; $NonCriticalRules{'LibGCJLic'} = ['LibGCJSee']; $NonCriticalRules{'CDDLicV1'} = ['Compliance','CDDLicWhere','ApachesPermLim','CDDLicIncludeFile','UseSubjectToTerm', 'useOnlyInCompliance']; @@ -106,9 +109,8 @@ $NonCriticalRules{'MPLv1_1'} = ['licenseBlockBegin','MPLsee','Copyright','licens $NonCriticalRules{'MPL1_1andLGPLv2_1'} = ['MPLoptionIfNotDelete2licsVer0','MPL_LGPLseeVer0']; $NonCriticalRules{'FreeType'} = ['FreeTypeNotice']; +$NonCriticalRules{'boostV1'} = ['boostSeev1', 'SeeFile']; -$NonCriticalRules{'GPLVer2.1or3KDE+'} = [@gplNonCritical]; -$NonCriticalRules{'LGPLVer2.1or3KDE+'} = [@gplNonCritical]; # initialize @@ -178,7 +180,7 @@ Match_License(); my $match = 0; for (my $i=0;$i<=$#licSentNames ;$i++) { - if ($licSentNames[$i] == 0 and + if ($licSentNames[$i] == 0 and ($licSentNames[$i] ne 'UNKNOWN' and $licSentNames[$i] ne '')) { # print "[$licSentNames[$i]]\n"; @@ -199,9 +201,9 @@ if ($match > 0) { #print $interRuleList[$i][0]; @licSentNames = map { $_ eq $interRuleList[$i][0] ? $interRuleList[$i][1] : $_ } @licSentNames; } - + $senttok= join(',',@licSentNames) . ','; - + Match_License(); } @@ -284,7 +286,7 @@ sub Read_Original my ($inputF, $tokens, $originals) = @_; open (INPUTFILE, $inputF) or die ("Error: $inputF is not found."); - + my $sentence; my @original; while ($sentence = <INPUTFILE>){ @@ -298,19 +300,19 @@ sub Read_Original print "NONE\n"; exit 0; } - + #print join(';',@licSentNames)."\n"; - + close INPUTFILE; } sub Match_License { - + # create a string with the sentences - + for (my $j=0;$j<=$#rulelist;$j++){ - + my $rule=$rulelist[$j][1]; my $rulename=$rulelist[$j][0]; my $lenRule = scalar(split(',', $rule)); @@ -324,7 +326,7 @@ sub Match_License # print "\n"; } } - + # print ">>>>[$senttok]\n"; my $onlyAllRight = 0; @@ -333,7 +335,7 @@ sub Match_License #print STDERR "Ending>>>>>>>$senttok\n"; #print STDERR 'Size>>' , scalar(@result), "\n"; #print STDERR 'Result>>', join(',', @result), "\n"; - + # let us remove allrights # my $onlyAllRight = 1; # for my $i (0.. scalar(@licSentNames)-1){ @@ -347,9 +349,9 @@ sub Match_License # output result if (scalar(@result) > 0){ # at this point we have matched - - - # let us clean up the rules... let us print the matched rules, and the + + + # let us clean up the rules... let us print the matched rules, and the # if (grep(/GPL/, @result)) { # print "GPL...\n"; # foreach my $r ($NonCriticalRules{GPL}) { @@ -357,7 +359,7 @@ sub Match_License # } # } # general removal of rules - + foreach my $r (@generalNonCritical) { while ($senttok =~ s/,$r,/,-1,/) { @@ -365,7 +367,7 @@ sub Match_License } } # print "[$senttok]\n"; - + foreach my $res (@result) { my $temp = $NonCriticalRules{$res}; foreach my $r (@$temp) { @@ -375,7 +377,7 @@ sub Match_License } } } -# print "[$senttok]\n"; +# print "[$senttok]\n"; } } @@ -392,7 +394,7 @@ sub Print_Result my @sections = split(',', $senttok); die 'assertion 1' if $sections[0] ne ''; die 'assertion 2' if $sections[scalar(@sections)] ne ''; - + my $ignoredLines = 0; my $licenseLines = 0; my $unknownLines = 0; @@ -410,7 +412,7 @@ sub Print_Result } } $senttok =~ s/^,(.*),$/$1/; - + # print "$ignoredLines > $licenseLines > $unknownLines > $unmatchedLines\n"; if (scalar (@result) == 0) { print 'UNKNOWN'; @@ -419,5 +421,5 @@ sub Print_Result } print ";$countMatches;$licenseLines;$ignoredLines;$unmatchedLines;$unknownLines;$senttok\n"; $senttok = $save; - + } diff --git a/matcher/rules.dict b/matcher/rules.dict index 1055eca..28d274a 100644 --- a/matcher/rules.dict +++ b/matcher/rules.dict @@ -19,8 +19,6 @@ GPL2orBSD3:BSDpre,BSDcondSourceExtrict,BSDcondBinaryExtrict,BSDcondEndorse,Altern,GPLGenVer2,BSDasIsExtrict,BSDWarrExtrict - - GPLv2:GPLv2 GPLv2+:GPLv2\+ GPLv3+:GPLv3\+ @@ -134,24 +132,25 @@ openSSL:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdv,OpenSSLendorse,OpenSSLwrit openSSLvar1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,SSLeayWindows,BSDcondAdvPart2,BSDasIs,BSDWarr,NoLicenseChangeAllowed openSSLvar3:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,BSDasIs,BSDWarr -Apachev1.0:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAckPart1,BSDcondAdvPart2,BSDasIs,BSDWarr -Apachev1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdv,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAck,BSDasIs,BSDWarr -Apachev1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr -Apachev1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr -Apachev1.1:BSDpre,BSDcondSourceExtrict,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr +Apache-1.0:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAckPart1,BSDcondAdvPart2,BSDasIs,BSDWarr +Apache-1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdv,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,OpenSSLAck,BSDasIs,BSDWarr +Apache-1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart1,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr +Apache-1.1:BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr +Apache-1.1:BSDpre,BSDcondSourceExtrict,BSDcondBinary,BSDcondAdvPart2,OpenSSLendorse,OpenSSLwritCond,OpenSSLName,BSDasIs,BSDWarr # this is dangerous. The v2 applies to apache (ninka-ism) -ApacheV2orLGPLgeneric:apacheAndLGPLgenVer2.0 +Apache-2orLGPLgeneric:apacheAndLGPLgenVer2.0 SleepyCat:BSDpre,BSDcondSource,BSDcondBinary,SleepyCatObtain,SleepyCatSourceIncluded,SleepyCatSourceComplete,SleepyCatDoesNotInclude,SleepyCatAsIs,BSDWarr -boost:boostPermission,boostPreserve,boostAsIs,boostWarr -boostV1:boostRefv1 -boostV1Ref:boostSeev1 +boost-1:boostPermission,boostPreserve,boostAsIs,boostWarr +boost-1:boostRefv1 +boost-1ref:boostSeev1 + SSLeay:SSLCopy,SSLeayAttrib,SSLeayAdType,BSDpre,BSDcondSource,BSDcondBinary,BSDcondAdvRULE,SSLeayCrypto,SSLeayWindows,BSDasIs,BSDWarr,SSLeayCantChangeLic # qt -Qt:qtCommercialuseVer0 +QPLt:qtCommercialuseVer0 orLGPLVer2.1:qtLGPLVer2.1 orLGPLVer2:qtLGPLv2 orGPLv3:qtGPLVer3.0 @@ -171,7 +170,7 @@ Cecill:CecillEn1,CecillEn2,CecillEn3,CecillEn4,CecillEn5,CecillEn6,CecillEn7,Cec SimpleOnlyKeepCopyright:SimpleOnlyKeepCopyright #------------ -QTv1:QTv1 +QPLv1:QTv1 CDDLv1orGPLv2:CDDLorGPLv2ifYouWish,CDDLorGPLv2IfYouAdd CDDLorGPLv2:CDDLorGPLVer2 @@ -233,8 +232,8 @@ EPLv1:EPLv1 CDDLic:CDDLic CDDLicV1:CDDLicV1Only #---------------------------------------------------------------------- -Apachev2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2,ApachesAsIs,ApachesPermLim -Apachev2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2 +Apache-2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2,ApachesAsIs,ApachesPermLim +Apache-2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2 # publid comain publicDomain:publicDomain @@ -312,7 +311,7 @@ SameAsPerl:SameAsPerl ArtisticLicensev1:ArtisticLicensev1 # QT triple license, outdated :) -QtGPLv2or3:qtGPLv2or3 +QplGPLv2or3:qtGPLv2or3 FreeType:FreeType ## @@ -74,9 +74,13 @@ my $forceLicense = exists $opts{L}; my $f = $ARGV[0]; -my $original = $f; +my $original = $f; +$f =~ s/'/\\'/g; +$f =~ s/\$/\\\$/g; +$f =~ s/;/\\;/g; +$f =~ s/ /\\ /g; print "Starting: $original;\n" if ($verbose); @@ -87,38 +91,38 @@ my $sentencesFile = "${f}.sentences"; my $goodsentFile = "${f}.goodsent"; my $sentokFile = "${f}.senttok"; -if (not (-f "$f")) { - print "ERROR;[${f}] is not a file\n" ; +if (not (-f $original)) { + print "ERROR;[${original}] is not a file\n" ; exit 0; } Do_File_Process($original, $commentsFile, ($force or $forceComments), - "$path/extComments/extComments.pl -c1 '${original}'", + "$path/extComments/extComments.pl -c1 ${f}", "Creating comments file", exists $opts{c}); Do_File_Process($commentsFile, $sentencesFile, ($force or $forceSentences), - "$path/splitter/splitter.pl '${commentsFile}'", + "$path/splitter/splitter.pl ${commentsFile}", "Splitting sentences", exists $opts{s} ); Do_File_Process( $sentencesFile, $goodsentFile, ($force or $forceGood), - "$path/filter/filter.pl '${sentencesFile}'", + "$path/filter/filter.pl ${sentencesFile}", "Filtering good sentences", exists $opts{s} ); Do_File_Process($goodsentFile, $sentokFile, ($force or $forceSentok), - "$path/senttok/senttok.pl '${goodsentFile}' > '${sentokFile}'", + "$path/senttok/senttok.pl ${goodsentFile} > ${sentokFile}", "Matching sentences against rules", exists $opts{t} ); print "Matching ${f}.senttok against rules" if ($verbose); -execute("$path/matcher/matcher.pl '${f}.senttok' > '${f}.license'"); +execute("$path/matcher/matcher.pl ${f}.senttok > ${f}.license"); -print `cat '${f}.license'`; +print `cat ${f}.license`; unlink("${f}.code"); @@ -126,7 +130,7 @@ if ($delete) { unlink("${f}.badsent"); unlink("${f}.comments"); unlink("${f}.goodsent"); -# unlink("${f}.sentences"); + unlink("${f}.sentences"); unlink("${f}.senttok"); } |