summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordmgerman <dmg@uvic.ca>2015-06-09 00:24:08 -0700
committerdmgerman <dmg@uvic.ca>2015-06-09 00:24:08 -0700
commitea063ea2b8032383939deabfe55d183dbb35a930 (patch)
tree2577b617cab54595f7ce091df8133c0ba3ed9b7b
parent0147f4736bd1d6d509ee20bedfd79aa893da8d0b (diff)
parent794490b0bb279cd2f6d673aecdf68c653a7a9dfd (diff)
downloadninka-fossy.tar.gz
Merge pull request #15 from darxriggs/dmgerman/fossyfossy
documentation update & improved project structure
-rw-r--r--Changes2
-rw-r--r--Copyright.SCOWL (renamed from lib/Ninka/Copyright.SCOWL)0
-rw-r--r--MANIFEST9
-rw-r--r--Makefile.PL11
-rw-r--r--README124
-rwxr-xr-xbin/ninka70
-rwxr-xr-xbin/ninka-excel (renamed from ninka-excel.pl)15
-rwxr-xr-xbin/ninka-sqlite (renamed from ninka-sqlite.pl)87
-rw-r--r--lib/Ninka.pm26
-rw-r--r--lib/Ninka/CommentExtractor.pm24
-rw-r--r--lib/Ninka/FileCleaner.pm24
-rw-r--r--lib/Ninka/LicenseMatcher.pm22
-rw-r--r--lib/Ninka/LicenseRules.pm22
-rw-r--r--lib/Ninka/README.txt9
-rw-r--r--lib/Ninka/SentenceExtractor.pm15
-rw-r--r--man/ninka.183
-rwxr-xr-x[-rw-r--r--]scripts/license_matcher_modified.pl0
-rwxr-xr-x[-rw-r--r--]scripts/parseLicense.pl0
-rwxr-xr-x[-rw-r--r--]scripts/sort_package_license_list.pl0
-rwxr-xr-x[-rw-r--r--]scripts/unify.pl0
-rw-r--r--t/pod_ok.t5
-rw-r--r--unify.pl161
22 files changed, 252 insertions, 457 deletions
diff --git a/Changes b/Changes
index ad02822..04ec33e 100644
--- a/Changes
+++ b/Changes
@@ -22,7 +22,7 @@
* ninka.pl: fixed bug in finding the path of where ninka was being executed from (reported by Ryan Biesemeyer)
- * Fixed quotes in perl (René bScheibe)
+ * Fixed quotes in perl (René Scheibe)
2015-01-05 dmg <dmg@uvic.ca>
diff --git a/lib/Ninka/Copyright.SCOWL b/Copyright.SCOWL
index a4654a3..a4654a3 100644
--- a/lib/Ninka/Copyright.SCOWL
+++ b/Copyright.SCOWL
diff --git a/MANIFEST b/MANIFEST
index 3379706..1508f1e 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1,9 +1,13 @@
bin/ninka
+bin/ninka-excel
+bin/ninka-sqlite
Changes
+Copyright.SCOWL
lib/Ninka.pm
lib/Ninka/abbreviations.dict
lib/Ninka/CommentExtractor.pm
lib/Ninka/criticalwords.dict
+lib/Ninka/FileCleaner.pm
lib/Ninka/interrules.dict
lib/Ninka/LicenseMatcher.pm
lib/Ninka/LicenseRules.pm
@@ -17,6 +21,10 @@ Makefile.PL
MANIFEST
MANIFEST.SKIP
README
+scripts/license_matcher_modified.pl
+scripts/parseLicense.pl
+scripts/sort_package_license_list.pl
+scripts/unify.pl
t/data/expected_output/AAL
t/data/expected_output/AFL-1.1
t/data/expected_output/AFL-1.2
@@ -209,5 +217,6 @@ t/data/licenses/OSL-2.1
t/data/licenses/OSL-3.0
t/data/licenses/PRESERVE_COPYRIGHT_NOTICE
t/data/licenses/Public-domain
+t/pod_ok.t
t/reference_licenses.t
t/syntax_ok_and_use_strict.t
diff --git a/Makefile.PL b/Makefile.PL
index af4730c..b29cf02 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,12 +1,13 @@
use strict;
use warnings;
+use utf8;
use ExtUtils::MakeMaker;
WriteMakefile(
NAME => 'Ninka',
VERSION_FROM => 'lib/Ninka.pm',
ABSTRACT_FROM => 'lib/Ninka.pm',
- LICENSE => 'agpl_3',
+ LICENSE => 'gpl_2',
AUTHOR => [
'Daniel M. German <dmg@uvic.ca>',
'Yuki Manabe <y-manabe@ist.osaka-u.ac.jp>',
@@ -17,21 +18,27 @@ WriteMakefile(
'ExtUtils::MakeMaker' => '6.52',
},
PREREQ_PM => {
+ 'DBI' => '0',
+ 'DBD::SQLite' => '0',
'File::Basename' => '0',
+ 'File::Find' => '0',
'File::Spec::Functions' => '0',
+ 'File::Temp' => '0',
'Getopt::Std' => '0',
'IPC::Open3' => '0',
+ 'Spreadsheet::WriteExcel' => '0',
},
TEST_REQUIRES => {
'File::Temp' => '0',
'Test::More' => '0.98',
+ 'Test::Pod' => '1.00',
'Test::Strict' => '0',
},
META_MERGE => {
resources => {
homepage => 'http://ninka.turingmachine.org/',
repository => 'https://github.com/dmgerman/ninka',
- license => 'http://www.gnu.org/licenses/agpl-3.0.html',
+ license => 'http://www.gnu.org/licenses/gpl-2.0.html',
},
},
);
diff --git a/README b/README
index b80a187..dbbe6f1 100644
--- a/README
+++ b/README
@@ -11,16 +11,13 @@ under which a source file is made available.
This tool uses a source file as input and outputs the licenses
identified within that file.
-If you need to know the detail of Ninka, please see the following
-paper:
+If you need to know the detail of Ninka, please see the following paper:
Daniel M. German, Yuki Manabe and Katsuro Inoue. A sentence-matching
method for automatic license identification of source code files. In
25nd IEEE/ACM International Conference on Automated Software
Engineering (ASE 2010). You can email me (dmg@uvic.ca) for a copy or
-download it from
-
-http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf
+download it from http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf.
If you use Ninka for research purposes, we would appreciate you cite
the above paper.
@@ -28,13 +25,13 @@ the above paper.
* Contributors
- Paul Clough for his code to split sentences
-- Anthony Kohan for writing the excel and sqlite backends.
-- Armijn Hemel from Tjaldur Software Governance Solutions for multiple bug reports and suggestions
+- Anthony Kohan for writing the excel and sqlite backends
+- Armijn Hemel from Tjaldur Software Governance Solutions for multiple bug reports and suggestions
+- René Scheibe for modularizing the code
* License
- Except for the directories comments and splitter, Ninka is licensed
- under the GPLv2+
+ Ninka is licensed under the GPLv2+:
Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German
@@ -51,59 +48,41 @@ the above paper.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
- - splitter.pl is a derivative work of the Rule-based sentence
- splitter script by Paul Paul Clough. Please see splitter/README
- for details.
+ Ninka::SentenceExtraxtor is a derivative work of the rule-based sentence
+ splitter script by Paul Paul Clough.
- - comments is based on a program to remove comments by Jon Newman,
- it is released under the GNU General Public License Version 2 or
- (at your option) any later version.
+ comments is based on a program to remove comments by Jon Newman.
* Requirements
- Perl version 5 or above
-- for ninka-excel.pl: Perl module Spreadsheet::WriteExcel
- https://metacpan.org/release/Spreadsheet-WriteExcel/
-- for ninka-sqlite.pl: Perl module DBD::SQLite
+- for ninka-excel: Perl module Spreadsheet::WriteExcel
+ https://metacpan.org/release/Spreadsheet-WriteExcel
+- for ninka-sqlite: Perl module DBD::SQLite
https://metacpan.org/release/DBD-SQLite
* How to install
1. Unpack the distribution in a directory.
- 2. Optional: Build and install comments (make sure it is somwehere in the
- path) (see directory comments)
-
+ 2. Optional: Build and install comments (make sure it is somwehere in the path) (see directory comments)
-* Usage:
+* Usage
-Ninka uses a pipe model (see below). Each step of the "pipe" creates a
-file, but
+ninka [options] filename
-ninka.pl [options] [filename]
+Available options:
-Available options
+ -i create intermediary files
-v verbose
- -d delete intermediate files
- -C force creation of comments file
- -c stop after creation of comments
- -S force creation of sentences file
- -s stop after creation of sentences
- -G force creation of goodsent file
- -g stop after creation of goodsent
- -T force creation of senttok file
- -t stop after creation of senttok
- -L force creation of license file
- -f force all processing
-
Example:
- ninka.pl foo.c
+ ninka -i foo.c
It will create five files:
- 1. foo.c.comments: extracted the first two comments blocks, where
- the license is usually
+ 1. foo.c.comments: extracted the first comments blocks, where
+ the license is usually included
2. foo.c.sentences: creates the list of sentences in the license
statement
3. foo.c.goodsent: contains sentences that are likely to be part of
@@ -117,69 +96,60 @@ It will create five files:
- Licenses
- Unmatched sentences in *.senttok that were not matched
-
-
+The files are not required for Ninka's functionality. But they can help
+to debug license detection issues.
* Ninka model
Ninka uses a pipe-model. Each stage of the pipe does something very specific:
- 1. Comment extractor.
+1. Comment extractor
- - directory: extComments
+ - Module: Ninka::CommentExtractor
- - command: extComments.pl, might use comments (included in distribution)
+ - Purpose: Extracts top comments of source code.
+ If no comment extractor is known for the language,
+ then extracts top lines from source (currently 700)
- - Purpose: Extracts top comments of source code. If no
- comment extractor is known for the language, then extracts top lines from source (currently 700)
-
- - Creates <filename>.comments file
+ - Output: <filename>.comments
2. Split sentences in comments
- - directory: splitter
-
- - command: splitter.pl
-
- - Purpose: Ninka works by matching sentences of licenses, hence
- it needs to properly break text into sentences.
-
- - Outputs <filename>.sentences
-
-3. Filter "good" sentences.
+ - Module: Ninka::SentenceExtractor
- - directory filter
+ - Purpose: Ninka works by matching sentences of licenses,
+ hence it needs to properly break text into sentences.
- - command: filter.pl
+ - Output: <filename>.sentences
- - Purpose: some sentences are related to a license, some are
- not. It is valuable to know if a file contains lines that look
- like a license or not (e.g. to know that a file has no license)
+3. Filter "good" sentences
- - Outputs: <filename>.goodsent, and <filename>.badsent (not used)
+ - Module: Ninka::SentenceFilter
-4. Tokenizes sentences
+ - Purpose: Some sentences are related to a license, some are not.
+ It is valuable to know if a file contains lines that look like
+ a license or not (e.g. to know that a file has no license).
- - Directory senttok
+ - Output: <filename>.goodsent and <filename>.badsent
- - command: senttok.pl
+4. Tokenize sentences
- - Purpose: It creates a file that corresponds to the recognized
- sentence tokens. For each sentence, it outputs its sentence token, or unknown otherwise.
+ - Module: Ninka::SentenceTokenizer
- - Outputs: <filename>.senttok
+ - Purpose: It creates a file that corresponds to the recognized sentence tokens.
+ For each sentence, it outputs its sentence token, or unknown otherwise.
-5. Matches sentences to licenses
+ - Output: <filename>.senttok
- - Directory matcher
+5. Match sentences to licenses
- - Command: matcher.pl
+ - Module: Ninka::LicenseMatcher
- - Purpose: looks at the sequence of sentence tokens and outputs the licenses found
+ - Purpose: It looks at the sentence tokens and outputs the licenses found.
- Output: <filename>.license
-The script ninka.pl takes care of all these steps, and optionally removes
+The script ninka takes care of all these steps, and optionally creates
intermediary files, and writes to the stdout the licenses found.
------
diff --git a/bin/ninka b/bin/ninka
index 4732cbe..9cfd6aa 100755
--- a/bin/ninka
+++ b/bin/ninka
@@ -1,4 +1,4 @@
-#!/usr/bin/env perl
+#!/usr/bin/perl
use strict;
use warnings;
@@ -19,7 +19,7 @@ sub parse_cmdline_parameters {
if (!getopts('iv', \%opts) || scalar(@ARGV) == 0) {
print STDERR "Ninka v${Ninka::VERSION}
-Usage: $0 [options] <filename>
+Usage: ninka [options] <filename>
Options:
-i create intermediary files
@@ -32,29 +32,79 @@ Options:
__END__
+=encoding utf8
+
=head1 NAME
-ninka
+ninka - source file license identification tool
+
+=head1 SYNOPSYS
+
+B<ninka> [options] F<filename>
=head1 DESCRIPTION
-Scans a file and returns the found licenses.
+Scans a source file and returns the found licenses.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-i>
+
+create intermediary files (for debugging)
+
+=item B<-v>
+
+verbose
+
+=back
+
+=head1 EXAMPLES
+
+=over
+
+=item B<ninka> F<foo.c>
+
+Determine the licenses in file F<foo.c>.
+
+=item B<ninka -i> F<foo.c>
+
+Determine the licenses in file F<foo.c> and create intermediary files (for debugging).
+
+=item find * | xargs -n1 -I@ B<ninka> '@'
+
+Determine the licenses of files in a directory.
+
+=back
+
+=head1 AUTHOR
+
+B<ninka> was written by Daniel M. German <dmg@uvic.ca> and Yuki Manabe <y-manabe@ist.osaka-u.ac.jp>.
+
+=head1 SEE ALSO
+
+Daniel M. German, Yuki Manabe and Katsuro Inoue. A sentence-matching method
+for automatic license identification of source code files. In 25nd IEEE/ACM
+International Conference on Automated Software Engineering (ASE 2010).
+
+You can download it from http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf.
=head1 COPYRIGHT AND LICENSE
-Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German
+Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German, 2015 René Scheibe
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as
-published by the Free Software Foundation, either version 3 of the
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
+GNU General Public License for more details.
-You should have received a copy of the GNU Affero General Public License
+You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
=cut
diff --git a/ninka-excel.pl b/bin/ninka-excel
index 71adddf..2e2d35d 100755
--- a/ninka-excel.pl
+++ b/bin/ninka-excel
@@ -17,20 +17,17 @@
#
use strict;
-use Switch;
use File::Temp;
use File::Find;
use File::Basename;
-use Scalar::Util qw(looks_like_number);
+use Ninka;
use Spreadsheet::WriteExcel;
-
-
-if(scalar(@ARGV) != 2){
- print STDERR "Ninka 1.3. sqlite wrapper\n";
+if (scalar(@ARGV) != 2) {
+ print STDERR "Ninka v${Ninka::VERSION}. sqlite wrapper\n";
print STDERR "Processes package file (.tar.gz, zip, jar. etc) and outputs to excel file\n";
print STDERR "Incorrect number of arguments\n";
- print STDERR "Correct usage is: $0 <path to package file> <excel-file>\n";
+ print STDERR "Usage: $0 <path to package file> <excel-file>\n";
exit 1;
}
@@ -85,11 +82,10 @@ print "***** Beginning Execution of Ninka *****\n";
foreach my $file (@files) {
if (-T $file) {
print "Running ninka on file [$file]\n";
- execute("perl ${path}/ninka.pl '$file'");
+ execute("perl ${path}/ninka -i '$file'");
}
}
-
print "***** Entering Ninka Data into excell file [$excelFile] *****\n";
my $row = 1;
@@ -116,7 +112,6 @@ foreach my $file (@files) {
my @columns = parseLicenseData($filedata);
-
my $originalFile = $file;
$originalFile =~ s/\.license$//;
diff --git a/ninka-sqlite.pl b/bin/ninka-sqlite
index d53f60f..6b27ea9 100755
--- a/ninka-sqlite.pl
+++ b/bin/ninka-sqlite
@@ -17,20 +17,17 @@
#
use strict;
-use Switch;
use DBI;
use File::Temp;
use File::Find;
use File::Basename;
-use Scalar::Util qw(looks_like_number);
+use Ninka;
-
-
-if(scalar(@ARGV) != 2){
- print STDERR "Ninka 1.3. sqlite wrapper\n";
+if (scalar(@ARGV) != 2) {
+ print STDERR "Ninka v${Ninka::VERSION}. sqlite wrapper\n";
print STDERR "Processes package file (.tar.gz, zip, jar. etc) and outputs to sqlite file\n";
print STDERR "Incorrect number of arguments\n";
- print STDERR "Correct usage is: $0 <path to package file> <database name>\n";
+ print STDERR "Usage: $0 <path to package file> <database name>\n";
exit 1;
}
@@ -88,7 +85,7 @@ find(
print "***** Beginning Execution of Ninka *****\n";
foreach my $file (@files) {
print "Running ninka on file [$file]\n";
- execute("perl ${path}/ninka.pl '$file'");
+ execute("perl ${path}/ninka -i '$file'");
}
my @ninkafiles;
@@ -116,44 +113,46 @@ foreach my $file (@ninkafiles) {
my $filedata = do { local $/; <$fh> };
my $sth;
- switch (getExtension($basefile)){
-
-# case ".comments" {
-# print "Inserting [$basefile] into table comments\n";
-# $sth = $dbh->prepare("INSERT INTO comments VALUES
-# ('$rootfile', '$filepath', '$packname', ?)");
-# }
- case ".sentences" {
- print "Inserting [$basefile] into table sentences\n";
- $sth = $dbh->prepare("INSERT INTO sentences VALUES
- ('$rootfile', '$filepath', '$packname', ?)");
- }
- case ".goodsent" {
- print "Inserting [$basefile] into table goodsents\n";
- $sth = $dbh->prepare("INSERT INTO goodsents VALUES
- ('$rootfile', '$filepath', '$packname', ?)");
- }
- case ".badsent" {
- print "Inserting [$basefile] into table goodsents\n";
- $sth = $dbh->prepare("INSERT INTO badsents VALUES
- ('$rootfile', '$filepath', '$packname', ?)");
- }
- case ".senttok" {
- print "Inserting [$basefile] into table senttoks\n";
- $sth = $dbh->prepare("INSERT INTO senttoks VALUES
- ('$rootfile', '$filepath', '$packname', ?)");
- }
- case ".license" {
- print "Inserting [$basefile] into table licenses\n";
- my @columns = parseLicenseData($filedata);
- $sth = $dbh->prepare("INSERT INTO licenses VALUES
- ('$rootfile', '$filepath', '$packname', '$columns[0]', '$columns[1]',
- '$columns[2]', '$columns[3]', '$columns[4]', '$columns[5]', '$columns[6]')");
- }
+ my $ext = getExtension($basefile);
+
+ if ($ext eq ".comments") {
+ print "Inserting [$basefile] into table comments\n";
+ $sth = $dbh->prepare("INSERT INTO comments VALUES
+ ('$rootfile', '$filepath', '$packname', ?)");
+ }
+ if ($ext eq ".sentences") {
+ print "Inserting [$basefile] into table sentences\n";
+ $sth = $dbh->prepare("INSERT INTO sentences VALUES
+ ('$rootfile', '$filepath', '$packname', ?)");
+ }
+ if ($ext eq ".goodsent") {
+ print "Inserting [$basefile] into table goodsents\n";
+ $sth = $dbh->prepare("INSERT INTO goodsents VALUES
+ ('$rootfile', '$filepath', '$packname', ?)");
+ }
+ if ($ext eq ".badsent") {
+ print "Inserting [$basefile] into table badsents\n";
+ $sth = $dbh->prepare("INSERT INTO badsents VALUES
+ ('$rootfile', '$filepath', '$packname', ?)");
+ }
+ if ($ext eq ".senttok") {
+ print "Inserting [$basefile] into table senttoks\n";
+ $sth = $dbh->prepare("INSERT INTO senttoks VALUES
+ ('$rootfile', '$filepath', '$packname', ?)");
+ }
+ if ($ext eq ".license") {
+ print "Inserting [$basefile] into table licenses\n";
+ my @columns = parseLicenseData($filedata);
+ $sth = $dbh->prepare("INSERT INTO licenses VALUES
+ ('$rootfile', '$filepath', '$packname', '$columns[0]', '$columns[1]',
+ '$columns[2]', '$columns[3]', '$columns[4]', '$columns[5]', '$columns[6]')");
+ }
+
+ if (defined $sth) {
+ $sth->bind_param(1, $filedata);
+ $sth->execute;
}
- $sth->bind_param(1, $filedata);
- $sth->execute;
close($fh);
}
diff --git a/lib/Ninka.pm b/lib/Ninka.pm
index dc9fbff..8f454cd 100644
--- a/lib/Ninka.pm
+++ b/lib/Ninka.pm
@@ -68,7 +68,7 @@ __END__
=head1 NAME
-Ninka - Find licenses in source files.
+Ninka - source file license identification tool
=head1 SYNOPSIS
@@ -82,23 +82,23 @@ Ninka - Find licenses in source files.
=head1 DESCRIPTION
-Scans a file and returns the found licenses.
+Scans a source file and returns the found licenses.
=head1 COPYRIGHT AND LICENSE
- Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German
+Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
=cut
diff --git a/lib/Ninka/CommentExtractor.pm b/lib/Ninka/CommentExtractor.pm
index fd62c02..8a2e66f 100644
--- a/lib/Ninka/CommentExtractor.pm
+++ b/lib/Ninka/CommentExtractor.pm
@@ -93,19 +93,19 @@ If no comment extractor is known for a language, then extracts top lines from so
=head1 COPYRIGHT AND LICENSE
- Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German
+Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
-=cut \ No newline at end of file
+=cut
diff --git a/lib/Ninka/FileCleaner.pm b/lib/Ninka/FileCleaner.pm
index c3dd912..825b1fb 100644
--- a/lib/Ninka/FileCleaner.pm
+++ b/lib/Ninka/FileCleaner.pm
@@ -49,19 +49,19 @@ Escapes apostrophes and other potentially disturbing characters
=head1 COPYRIGHT AND LICENSE
- Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German, 2015 Daniele Fognini and Johannes Najjar
+Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German, 2015 Daniele Fognini and Johannes Najjar
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
-=cut \ No newline at end of file
+=cut
diff --git a/lib/Ninka/LicenseMatcher.pm b/lib/Ninka/LicenseMatcher.pm
index 1cb402a..ba73b26 100644
--- a/lib/Ninka/LicenseMatcher.pm
+++ b/lib/Ninka/LicenseMatcher.pm
@@ -261,19 +261,19 @@ Uses a set of license sentence names as input and outputs license names correspo
=head1 COPYRIGHT AND LICENSE
- Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German
+Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
=cut
diff --git a/lib/Ninka/LicenseRules.pm b/lib/Ninka/LicenseRules.pm
index c7810c8..ee5af4b 100644
--- a/lib/Ninka/LicenseRules.pm
+++ b/lib/Ninka/LicenseRules.pm
@@ -96,19 +96,19 @@ Contains rules used by Ninka::LicenseMatcher.
=head1 COPYRIGHT AND LICENSE
- Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German
+Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
=cut
diff --git a/lib/Ninka/README.txt b/lib/Ninka/README.txt
deleted file mode 100644
index 2ca7f46..0000000
--- a/lib/Ninka/README.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-This list of abbreviations was extracted from SCOWL (Spell Checker Oriented Word Lists) by by Kevin Atkinson (kevina@gnu.org) version 2015.04.24.
-
-Specifically it was created from scowl-2015.04.24.tar.gz. by running:
-
-cat *abbrev* | sort -u > abbreviations.dict
-
-It also contains some additions by D.M German.
-
-See Copyright.scowl for license.
diff --git a/lib/Ninka/SentenceExtractor.pm b/lib/Ninka/SentenceExtractor.pm
index c27d199..6aeee4e 100644
--- a/lib/Ninka/SentenceExtractor.pm
+++ b/lib/Ninka/SentenceExtractor.pm
@@ -93,7 +93,7 @@ sub execute {
$count2++ if ($c ge 'A' && $c le 'z');
}
my $clean_sentence = clean_sentence($sentence);
- push @clean_sentences, $clean_sentence if $clean_sentence,
+ push @clean_sentences, $clean_sentence if $clean_sentence;
}
if ($count1 != $count2) {
@@ -251,6 +251,19 @@ Ninka::SentenceExtractor
Breaks comments into sentences.
+=head1 NOTES
+
+This list of abbreviations was extracted from SCOWL (Spell Checker Oriented Word Lists)
+by Kevin Atkinson (kevina@gnu.org) version 2015.04.24.
+
+Specifically it was created from scowl-2015.04.24.tar.gz. by running:
+
+ cat *abbrev* | sort -u > abbreviations.dict
+
+It also contains some additions by D.M German.
+
+See Copyright.SCOWL for license.
+
=head1 COPYRIGHT AND LICENSE
Author: Paul Clough
diff --git a/man/ninka.1 b/man/ninka.1
deleted file mode 100644
index 9cd2d57..0000000
--- a/man/ninka.1
+++ /dev/null
@@ -1,83 +0,0 @@
-.TH NINKA 1.3 "May 2015" ninka
-.SH NAME
-ninka \- source file license identification tool
-.SH SYNOPSYS
-.SY ninka
-.OP \-vfCcSsGgTtLd
-.OP \-\-
-.RI [ file ]
-.YS
-
-.SH DESCRIPTION
-
-Analyses source files to determine the license they fall under. Takes a source
-file as input and outputs the file's license.
-
-.SH OPTIONS
-
-.IP \-v
-verbose
-
-.IP \-f
-force all processing
-
-.IP \-C
-force creation of comments
-.IP \-c
-stop after creation of comments
-
-.IP \-S
-force creation of sentences
-.IP \-s
-stop after creation of sentences
-
-.IP \-G
-force creation of goodsent
-.IP \-g
-stop after creation of goodsent
-
-.IP \-T
-force creation of senttok
-.IP \-t
-stop after creation of senttok
-
-.IP \-L
-force creation of matching
-
-.IP \-d
-delete intermediate files
-
-.IP \-\-
-Stop processing options
-
-.SH EXAMPLES
-
-.TP
-\fBninka\fR \fIfoo.c\fR
-Determine the licenses in file foo.c
-
-.TP
-.BI ninka\ \-d \ foo.c
-Determine the license in file foo.c and delete intermediary files
-
-.TP
-find * | xargs \-n1 \-I@ \fBninka\fR '@'
-Determine the licenses of files in a directory.
-
-
-.SH AUTHOR
-
-\fBninka\fR was written by Daniel M. German <dmg@uvic.ca> and Yuki Manabe
-<y-manabe@ist.osaka-u.ac.jp>. ninka itself is licensed under the AGPLv3+. This
-manpage was written by Ryan Kavanagh <ryanakca@kubuntu.org> for the Debian
-project and is also licensed under the AGPLv3+.
-
-.SH SEE ALSO
-
-Daniel M. German, Yuki Manabe and Katsuro Inoue. A sentence-matching method
-for automatic license identification of source code files. In 25nd IEEE/ACM
-International Conference on Automated Software Engineering (ASE 2010).
-
-You can email Daniel M. German <dmg@uvic.ca> for a copy or download it from
-.UR http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf
-.UE
diff --git a/scripts/license_matcher_modified.pl b/scripts/license_matcher_modified.pl
index 53b1732..53b1732 100644..100755
--- a/scripts/license_matcher_modified.pl
+++ b/scripts/license_matcher_modified.pl
diff --git a/scripts/parseLicense.pl b/scripts/parseLicense.pl
index 2dc61ef..2dc61ef 100644..100755
--- a/scripts/parseLicense.pl
+++ b/scripts/parseLicense.pl
diff --git a/scripts/sort_package_license_list.pl b/scripts/sort_package_license_list.pl
index cd20b50..cd20b50 100644..100755
--- a/scripts/sort_package_license_list.pl
+++ b/scripts/sort_package_license_list.pl
diff --git a/scripts/unify.pl b/scripts/unify.pl
index f518fbb..f518fbb 100644..100755
--- a/scripts/unify.pl
+++ b/scripts/unify.pl
diff --git a/t/pod_ok.t b/t/pod_ok.t
new file mode 100644
index 0000000..57b423a
--- /dev/null
+++ b/t/pod_ok.t
@@ -0,0 +1,5 @@
+use strict;
+use warnings;
+use Test::Pod;
+
+all_pod_files_ok();
diff --git a/unify.pl b/unify.pl
deleted file mode 100644
index f518fbb..0000000
--- a/unify.pl
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/perl
-
-# first pass, unify names of licenses and remove duplicates.
-
-# we trick regarding gpl related licenses so they are "clustered" together..
-#
-# replace GPL with __GPL
-# replace exception in the text with ___exception
-
-use strict;
-
-my %equiv = (
- "boostV1Ref" => "boostV1",
- "X11" => "X11mit",
- "X11Festival" => "X11mit",
- "X11mitNoSellNoDocDocBSDvar" => "X11mit",
- "X11mitwithoutSell" => 'X11mit',
- "X11mitBSDvar" => "X11mit",
- "X11mitwithoutSellCMUVariant" => "X11mit",
- "X11mitwithoutSellCMUVariant" => "X11mit",
- "X11mitwithoutSellandNoDocumentationRequi" => "X11mit",
- "MITvar3" => "X11mit",
- "MITvar2" => "X11mit",
- "MIT" => "X11mit",
- "ZLIBref" => "ZLIB",
- "BSD3NoWarranty" => "BSD3",
- "BSD2EndorseInsteadOfBinary" => "BSD2",
- "BSD2var2" => "BSD2",
- "LesserGPLv2" => "LibraryGPLv2",
- "LesserGPLv2+" => "LibraryGPLv2+",
- "orLGPLVer2.1" => "LesserGPLVer2.1",
- "postgresqlRef" => "postgresql",
- );
-
-while (<>) {
- chomp;
- my @f = split(/;/);
- # first remove duplicates
-
- my $l = $f[1];
-
- # do a simple rewriting of this exception which is an incomplete license
-
- $l =~ s/^Exception$/UNKNOWN/;
-
- my @l = split(/,/,$l);
- my %lics = %{{ map { $_ => 1 } @l }};
-
- %lics = Do_Equivalent(%lics);
- %lics = Remove_Redundant(%lics);
- %lics = Do_Exceptions(%lics);
-
- my @out = sort keys %lics;
-
- my $t = join(',', @out);
- if ($t eq "") {
- $t = "UNKNOWN";
- }
- print $f[0], ";$t\n";
-}
-
-sub Do_Exceptions
-{
- my (%lics) = @_;
-
- if ($lics{'digiaQTExceptionNoticeVer1.1'} ne '' and $lics{'Qt'}) {
- delete $lics{'digiaQTExceptionNoticeVer1.1'};
- delete $lics{'Qt'};
- $lics{'Qt-qtExcep'} = 'Qt-qtExcep';
- }
- if ($lics{'BisonException'} ne "" and $lics{"GPLv3+"} ne "") {
- delete $lics{'BisonException'};
- delete $lics{"GPLv3+"};
- $lics{'GPLv3+-bisonExcep'} = 'GPLv3+-bisonExcep';
- }
- if ($lics{'BisonException'} ne "" and $lics{"GPLv2+"} ne "") {
- delete $lics{'BisonException'};
- delete $lics{"GPLv2+"};
- $lics{'GPLv2+-bisonExcep'} = 'GPLv2+-bisonExcep';
- }
- if ($lics{'BisonException'} ne "" and $lics{"GPLv2"} ne "") {
- delete $lics{'BisonException'};
- delete $lics{"GPLv2"};
- $lics{'GPLv2-bisonExcep'} = 'GPLv2-bisonExcep';
- }
- if ($lics{'ClassPathException'} ne "" and $lics{"GPLv2"} ne "") {
- delete $lics{'ClassPathException'};
- delete $lics{"GPLv2"};
- $lics{"GPLv2-classPathExcep"} = "GPLv2-classPathExcep";
- }
- if ($lics{'CDDLorGPLv2'} ne "" and $lics{"ClassPathExceptionGPLv2"} ne "") {
- delete $lics{'CDDLorGPLv2'};
- delete $lics{"ClassPathExceptionGPLv2"};
- $lics{'CDDLorGPLv2-classPathExcep'} = 'CDDLorGPLv2-classPathExcep';
- }
- if ($lics{'LinkException'} ne "" and $lics{"GPLv3+"} ne "") {
- delete $lics{'LinkException'};
- delete $lics{"GPLv3+"};
- $lics{'GPLv3+-linkExcep'} = 'GPLv3+-linkExcep';
- }
- if ($lics{'LinkException'} ne "" and $lics{"GPLv2+"} ne "") {
- delete $lics{'LinkException'};
- delete $lics{"GPLv2+"};
- $lics{'GPLv2+-linkExcep'} = 'GPLv2+-linkExcep';
- }
- if ($lics{'LinkException'} ne "" and $lics{"GPLv3"} ne "") {
- delete $lics{'LinkException'};
- delete $lics{"GPLv3"};
- $lics{'GPLv3-linkExcep'} = 'GPLv3-linkExcep';
- }
- if ($lics{'LinkException'} ne "" and $lics{"GPLv2"} ne "") {
- delete $lics{'LinkException'};
- delete $lics{"GPLv2"};
- $lics{'GPLv2-linkExcep'} = 'GPLv2-linkExcep';
- }
-
- return %lics;
-
-}
-
-sub Remove_Redundant
-{
- my (%lics) = @_;
-
- if ($lics{"GPLnoVersion"} ne "" and $lics{"GPLv2"} . $lics{"GPLv2+"} .$lics{"GPLv3"} . $lics{"GPLv3+"} ne "") {
- delete $lics{"GPLnoVersion"};
- }
- if ($lics{"GPLv2+"} ne "" and $lics{"GPLv3+"} ne "") {
- delete $lics{"GPLv2+"};
- }
- if ($lics{'MPL1_1andLGPLv2_1'} ne "" and $lics{"MPLv1_1"} ne "") {
- delete $lics{"MPLv1_1"};
- }
-
-
- return %lics;
-
-}
-
-sub Do_Equivalent
-{
- my (%lics) = @_;
- my %outA;
-
- # then normalize licenses
- foreach my $a (keys %lics) {
- next if $a eq "SeeFile";
- if ($equiv{$a} ne "") {
- $outA{$equiv{$a}} = $equiv{$a};
- } else {
- $outA{$a} = $a;
- }
- }
- return %outA;
-
-}
-
-
-sub uniq {
- return keys %{{ map { $_ => 1 } @_ }};
-}