summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordmgerman <dmg@uvic.ca>2015-05-28 01:03:26 -0700
committerdmgerman <dmg@uvic.ca>2015-05-28 01:03:26 -0700
commitd081a2bd8c06596356c4e2983461214fe0ad5003 (patch)
tree4bf111be404e417ecffe0c466b0b2257c5087882
parent6bee8cd1b174191b2c0f4b069725fe62160bad8d (diff)
parentc22ab1eff2b2d4bc1abd5e3677d24fd3843eccb5 (diff)
downloadninka-d081a2bd8c06596356c4e2983461214fe0ad5003.tar.gz
Merge pull request #14 from joshovi/fossy
Now the unofficial testfiles yield the expected output
-rwxr-xr-xbin/ninka6
-rw-r--r--lib/Ninka.pm23
-rw-r--r--lib/Ninka/CommentExtractor.pm2
-rw-r--r--lib/Ninka/licensesentence.dict4
-rw-r--r--lib/Ninka/rules.dict4
-rw-r--r--t/data/expected_output/Apache-2.02
6 files changed, 31 insertions, 10 deletions
diff --git a/bin/ninka b/bin/ninka
index a09ba51..4732cbe 100755
--- a/bin/ninka
+++ b/bin/ninka
@@ -7,20 +7,22 @@ use Ninka;
my %opts = parse_cmdline_parameters();
my $input_file = $ARGV[0];
+my $create_intermediary_files = exists $opts{i};
my $verbose = exists $opts{v};
-my $license_result = Ninka::process_file($input_file, $verbose);
+my $license_result = Ninka::process_file($input_file, $create_intermediary_files, $verbose);
print "$input_file;$license_result\n";
exit 0;
sub parse_cmdline_parameters {
my %opts = ();
- if (!getopts('v', \%opts) || scalar(@ARGV) == 0) {
+ if (!getopts('iv', \%opts) || scalar(@ARGV) == 0) {
print STDERR "Ninka v${Ninka::VERSION}
Usage: $0 [options] <filename>
Options:
+ -i create intermediary files
-v verbose\n";
exit 1;
diff --git a/lib/Ninka.pm b/lib/Ninka.pm
index 1816549..dc9fbff 100644
--- a/lib/Ninka.pm
+++ b/lib/Ninka.pm
@@ -12,7 +12,7 @@ use Ninka::SentenceTokenizer;
our $VERSION = '1.3';
sub process_file {
- my ($input_file, $verbose) = @_;
+ my ($input_file, $create_intermediary_files, $verbose) = @_;
print STDERR "analysing file [$input_file]\n" if $verbose;
@@ -41,9 +41,27 @@ sub process_file {
my %parameters_step5 = (%common_parameters, license_tokens => $license_tokens_ref);
my $license_result = Ninka::LicenseMatcher->new(%parameters_step5)->execute();
+ if ($create_intermediary_files) {
+ create_intermediary_file($input_file, 'comments', $comments);
+ create_intermediary_file($input_file, 'sentences', join("\n", @$sentences_ref));
+ create_intermediary_file($input_file, 'goodsent', join("\n", @$good_sentences_ref));
+ create_intermediary_file($input_file, 'badsent', join("\n", @$bad_sentences_ref));
+ create_intermediary_file($input_file, 'senttok', join("\n", @$license_tokens_ref));
+ create_intermediary_file($input_file, 'license', $license_result);
+ }
+
return $license_result;
}
+sub create_intermediary_file {
+ my ($input_file, $output_extension, $content) = @_;
+
+ my $output_file = "$input_file.$output_extension";
+ open my $output_fh, '>', $output_file or die "can't create output file [$output_file]: $!";
+ print $output_fh $content;
+ close $output_fh;
+}
+
1;
__END__
@@ -57,9 +75,10 @@ Ninka - Find licenses in source files.
use Ninka;
my $input_file = 'some/path/file_of_interest';
+ my $create_intermediary_files = 0;
my $verbose = 0;
- my $license_result = Ninka::process_file($input_file, $verbose);
+ my $license_result = Ninka::process_file($input_file, $create_intermediary_files, $verbose);
=head1 DESCRIPTION
diff --git a/lib/Ninka/CommentExtractor.pm b/lib/Ninka/CommentExtractor.pm
index cd7e060..fd62c02 100644
--- a/lib/Ninka/CommentExtractor.pm
+++ b/lib/Ninka/CommentExtractor.pm
@@ -60,7 +60,7 @@ sub determine_comments_command {
sub create_head_cmd {
my ($input_file, $count_lines) = @_;
- return "head -$count_lines '$input_file'";
+ return "head -$count_lines $input_file";
}
sub execute_command {
diff --git a/lib/Ninka/licensesentence.dict b/lib/Ninka/licensesentence.dict
index 83d2f92..5ab826c 100644
--- a/lib/Ninka/licensesentence.dict
+++ b/lib/Ninka/licensesentence.dict
@@ -286,8 +286,8 @@ ApachePre:52:0:Licensed to the Apache Software Foundation \(ASF\) under one or m
ApacheSee:52:0:See the NOTICE file distributed with this work for additional information regarding copyright ownership:
ApachesAsIs:52:0:Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an <quotes>AS IS<quotes> BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied:
ApachesPermLim:52:0:See the License for the specific language governing permissions and limitations under the License:
-Apachev2:52:0:Licensed under the Apache License, Version 2.0 \(the <quotes>License<quotes>\); You may not use this file except in compliance with the License:
-Apachev2:52:0:The ASF licenses this file to You under the Apache License, Version 2\.0 \(the <quotes>License<quotes>\); you may not use this file except in compliance with the License:
+Apache-2:52:0:Licensed under the Apache License, Version 2.0 \(the <quotes>License<quotes>\); You may not use this file except in compliance with the License:
+Apache-2:52:0:The ASF licenses this file to You under the Apache License, Version 2\.0 \(the <quotes>License<quotes>\); you may not use this file except in compliance with the License:
apacheAndLGPLgen:10:0:^<LICENSED> under the terms of either the Apache License \(<VERSION>\) or the Lesser GPL, as specified in the COPYING file$
diff --git a/lib/Ninka/rules.dict b/lib/Ninka/rules.dict
index 6eaaad9..d405c12 100644
--- a/lib/Ninka/rules.dict
+++ b/lib/Ninka/rules.dict
@@ -219,8 +219,8 @@ EPLv1:EPLv1
CDDLic:CDDLic
CDDLicV1:CDDLicV1Only
-Apachev2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2,ApachesAsIs,ApachesPermLim
-Apachev2:Apachev2,ApacheLicWherePart1,ApacheLicWherePart2v2
+Apache-2:Apache-2,ApacheLicWherePart1,ApacheLicWherePart2v2,ApachesAsIs,ApachesPermLim
+Apache-2:Apache-2,ApacheLicWherePart1,ApacheLicWherePart2v2
# publid comain
publicDomain:publicDomain
diff --git a/t/data/expected_output/Apache-2.0 b/t/data/expected_output/Apache-2.0
index 470067b..d6867d6 100644
--- a/t/data/expected_output/Apache-2.0
+++ b/t/data/expected_output/Apache-2.0
@@ -1 +1 @@
-Apachev2;1;5;0;1;42;UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,Copyright,5
+Apache-2;1;5;0;1;42;UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,Copyright,5