22 files changed, 252 insertions, 457 deletions
diff --git a/Changes b/Changes
index ad02822..04ec33e 100644
--- a/Changes
+++ b/Changes
@@ -22,7 +22,7 @@
 
 	* ninka.pl: fixed bug in finding the path of where ninka was being executed from (reported by Ryan Biesemeyer)
 
-	* Fixed quotes in perl (René bScheibe)
+	* Fixed quotes in perl (René Scheibe)
 
 2015-01-05  dmg  <dmg@uvic.ca>
 
diff --git a/lib/Ninka/Copyright.SCOWL b/Copyright.SCOWL
index a4654a3..a4654a3 100644
--- a/lib/Ninka/Copyright.SCOWL
+++ b/Copyright.SCOWL
diff --git a/MANIFEST b/MANIFEST
index 3379706..1508f1e 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1,9 +1,13 @@
 bin/ninka
+bin/ninka-excel
+bin/ninka-sqlite
 Changes
+Copyright.SCOWL
 lib/Ninka.pm
 lib/Ninka/abbreviations.dict
 lib/Ninka/CommentExtractor.pm
 lib/Ninka/criticalwords.dict
+lib/Ninka/FileCleaner.pm
 lib/Ninka/interrules.dict
 lib/Ninka/LicenseMatcher.pm
 lib/Ninka/LicenseRules.pm
@@ -17,6 +21,10 @@ Makefile.PL
 MANIFEST
 MANIFEST.SKIP
 README
+scripts/license_matcher_modified.pl
+scripts/parseLicense.pl
+scripts/sort_package_license_list.pl
+scripts/unify.pl
 t/data/expected_output/AAL
 t/data/expected_output/AFL-1.1
 t/data/expected_output/AFL-1.2
@@ -209,5 +217,6 @@ t/data/licenses/OSL-2.1
 t/data/licenses/OSL-3.0
 t/data/licenses/PRESERVE_COPYRIGHT_NOTICE
 t/data/licenses/Public-domain
+t/pod_ok.t
 t/reference_licenses.t
 t/syntax_ok_and_use_strict.t
diff --git a/Makefile.PL b/Makefile.PL
index af4730c..b29cf02 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,12 +1,13 @@
 use strict;
 use warnings;
+use utf8;
 use ExtUtils::MakeMaker;
 
 WriteMakefile(
     NAME => 'Ninka',
     VERSION_FROM => 'lib/Ninka.pm',
     ABSTRACT_FROM => 'lib/Ninka.pm',
-    LICENSE => 'agpl_3',
+    LICENSE => 'gpl_2',
     AUTHOR => [
         'Daniel M. German <dmg@uvic.ca>',
         'Yuki Manabe <y-manabe@ist.osaka-u.ac.jp>',
@@ -17,21 +18,27 @@ WriteMakefile(
         'ExtUtils::MakeMaker' => '6.52',
     },
     PREREQ_PM => {
+        'DBI' => '0',
+        'DBD::SQLite' => '0',
         'File::Basename' => '0',
+        'File::Find' => '0',
         'File::Spec::Functions' => '0',
+        'File::Temp' => '0',
         'Getopt::Std' => '0',
         'IPC::Open3' => '0',
+        'Spreadsheet::WriteExcel' => '0',
     },
     TEST_REQUIRES => {
         'File::Temp' => '0',
         'Test::More' => '0.98',
+        'Test::Pod' => '1.00',
         'Test::Strict' => '0',
     },
     META_MERGE => {
         resources => {
             homepage => 'http://ninka.turingmachine.org/',
             repository => 'https://github.com/dmgerman/ninka',
-            license => 'http://www.gnu.org/licenses/agpl-3.0.html',
+            license => 'http://www.gnu.org/licenses/gpl-2.0.html',
        },
     },
 );
diff --git a/README b/README
index b80a187..dbbe6f1 100644
--- a/README
+++ b/README
@@ -11,16 +11,13 @@ under which a source file is made available.
 This tool uses a source file as input and outputs the licenses
 identified within that file.
 
-If you need to know the detail of Ninka, please see the following
-paper:
+If you need to know the detail of Ninka, please see the following paper:
 
 Daniel M. German, Yuki Manabe and Katsuro Inoue. A sentence-matching
 method for automatic license identification of source code files. In
 25nd IEEE/ACM International Conference on Automated Software
 Engineering (ASE 2010). You can email me (dmg@uvic.ca) for a copy or
-download it from
-
-http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf
+download it from http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf.
 
 If you use Ninka for research purposes, we would appreciate you cite
 the above paper.
@@ -28,13 +25,13 @@ the above paper.
 * Contributors
 
 - Paul Clough for his code to split sentences
-- Anthony Kohan for writing the excel and sqlite backends.
-- Armijn Hemel from Tjaldur Software Governance Solutions  for multiple bug reports and suggestions
+- Anthony Kohan for writing the excel and sqlite backends
+- Armijn Hemel from Tjaldur Software Governance Solutions for multiple bug reports and suggestions
+- René Scheibe for modularizing the code
 
 * License
 
-  Except for the directories comments and splitter, Ninka is licensed
-  under the GPLv2+
+  Ninka is licensed under the GPLv2+:
 
     Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German
 
@@ -51,59 +48,41 @@ the above paper.
     You should have received a copy of the GNU General Public License
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-  - splitter.pl is a derivative work of the Rule-based sentence
-    splitter script by Paul Paul Clough. Please see splitter/README
-    for details.
+  Ninka::SentenceExtraxtor is a derivative work of the rule-based sentence
+  splitter script by Paul Paul Clough.
 
-  - comments is based on a program to remove comments by Jon Newman,
-    it is released under the GNU General Public License Version 2 or
-    (at your option) any later version.
+  comments is based on a program to remove comments by Jon Newman.
 
 * Requirements
 
 - Perl version 5 or above
-- for ninka-excel.pl: Perl module Spreadsheet::WriteExcel
-  https://metacpan.org/release/Spreadsheet-WriteExcel/
-- for ninka-sqlite.pl: Perl module DBD::SQLite
+- for ninka-excel: Perl module Spreadsheet::WriteExcel
+  https://metacpan.org/release/Spreadsheet-WriteExcel
+- for ninka-sqlite: Perl module DBD::SQLite
   https://metacpan.org/release/DBD-SQLite
 
 * How to install
 
   1. Unpack the distribution in a directory.
-  2. Optional: Build and install comments (make sure it is somwehere in the
-     path) (see directory comments)
-
+  2. Optional: Build and install comments (make sure it is somwehere in the path) (see directory comments)
 
-* Usage:
+* Usage
 
-Ninka uses a pipe model (see below). Each step of the "pipe" creates a
-file, but
+ninka [options] filename
 
-ninka.pl [options] [filename]
+Available options:
 
-Available options
+  -i create intermediary files
   -v verbose
-  -d delete intermediate files
-  -C force creation of comments file
-  -c stop after creation of comments
-  -S force creation of sentences file
-  -s stop after creation of sentences
-  -G force creation of goodsent file
-  -g stop after creation of goodsent
-  -T force creation of senttok file
-  -t stop after creation of senttok
-  -L force creation of license file
-  -f force all processing
-
 
 Example:
 
-   ninka.pl foo.c
+  ninka -i foo.c
 
 It will create five files:
 
-  1. foo.c.comments: extracted the first two comments blocks, where
-     the license is usually
+  1. foo.c.comments: extracted the first comments blocks, where
+     the license is usually included
   2. foo.c.sentences: creates the list of sentences in the license
      statement
   3. foo.c.goodsent: contains sentences that are likely to be part of
@@ -117,69 +96,60 @@ It will create five files:
      - Licenses
      - Unmatched sentences in *.senttok that were not matched
 
-
-
+The files are not required for Ninka's functionality. But they can help
+to debug license detection issues.
 
 * Ninka model
 
 Ninka uses a pipe-model. Each stage of the pipe does something very specific:
 
- 1. Comment extractor.
+1. Comment extractor
 
-    - directory: extComments
+    - Module: Ninka::CommentExtractor
 
-    - command: extComments.pl, might use comments (included in distribution)
+    - Purpose: Extracts top comments of source code.
+               If no comment extractor is known for the language,
+               then extracts top lines from source (currently 700)
 
-    - Purpose: Extracts top comments of source code. If no
-          comment extractor is known for the language, then extracts top lines from source (currently 700)
-
-    - Creates <filename>.comments file
+    - Output: <filename>.comments
 
 2. Split sentences in comments
 
-     - directory: splitter
-
-     - command: splitter.pl
-
-     - Purpose: Ninka works by matching sentences of licenses, hence
-       it needs to properly break text into sentences.
-
-     - Outputs <filename>.sentences
-
-3. Filter "good" sentences.
+     - Module: Ninka::SentenceExtractor
 
-     - directory filter
+     - Purpose: Ninka works by matching sentences of licenses,
+                hence it needs to properly break text into sentences.
 
-     - command: filter.pl
+     - Output: <filename>.sentences
 
-     - Purpose: some sentences are related to a license, some are
-       not. It is valuable to know if a file contains lines that look
-       like a license or not (e.g. to know that a file has no license)
+3. Filter "good" sentences
 
-     - Outputs: <filename>.goodsent, and <filename>.badsent (not used)
+     - Module: Ninka::SentenceFilter
 
-4. Tokenizes sentences
+     - Purpose: Some sentences are related to a license, some are not.
+                It is valuable to know if a file contains lines that look like
+                a license or not (e.g. to know that a file has no license).
 
-     - Directory senttok
+     - Output: <filename>.goodsent and <filename>.badsent
 
-     - command: senttok.pl
+4. Tokenize sentences
 
-     - Purpose: It creates a file that corresponds to the recognized
-       sentence tokens. For each sentence, it outputs its sentence token, or unknown otherwise.
+     - Module: Ninka::SentenceTokenizer
 
-     - Outputs: <filename>.senttok
+     - Purpose: It creates a file that corresponds to the recognized sentence tokens.
+                For each sentence, it outputs its sentence token, or unknown otherwise.
 
-5. Matches sentences to licenses
+     - Output: <filename>.senttok
 
-     - Directory matcher
+5. Match sentences to licenses
 
-     - Command: matcher.pl
+     - Module: Ninka::LicenseMatcher
 
-     - Purpose: looks at the sequence of sentence tokens and outputs the licenses found
+     - Purpose: It looks at the sentence tokens and outputs the licenses found.
 
      - Output: <filename>.license
 
-The script ninka.pl takes care of all these steps, and optionally removes
+The script ninka takes care of all these steps, and optionally creates
 intermediary files, and writes to the stdout the licenses found.
 
 ------
diff --git a/bin/ninka b/bin/ninka
index 4732cbe..9cfd6aa 100755
--- a/bin/ninka
+++ b/bin/ninka
@@ -1,4 +1,4 @@
-#!/usr/bin/env perl
+#!/usr/bin/perl
 
 use strict;
 use warnings;
@@ -19,7 +19,7 @@ sub parse_cmdline_parameters {
     if (!getopts('iv', \%opts) || scalar(@ARGV) == 0) {
         print STDERR "Ninka v${Ninka::VERSION}
 
-Usage: $0 [options] <filename>
+Usage: ninka [options] <filename>
 
 Options:
   -i create intermediary files
@@ -32,29 +32,79 @@ Options:
 
 __END__
 
+=encoding utf8
+
 =head1 NAME
 
-ninka
+ninka - source file license identification tool
+
+=head1 SYNOPSYS
+
+B<ninka> [options] F<filename>
 
 =head1 DESCRIPTION
 
-Scans a file and returns the found licenses.
+Scans a source file and returns the found licenses.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-i>
+
+create intermediary files (for debugging)
+
+=item B<-v>
+
+verbose
+
+=back
+
+=head1 EXAMPLES
+
+=over
+
+=item B<ninka> F<foo.c>
+
+Determine the licenses in file F<foo.c>.
+
+=item B<ninka -i> F<foo.c>
+
+Determine the licenses in file F<foo.c> and create intermediary files (for debugging).
+
+=item find * | xargs -n1 -I@ B<ninka> '@'
+
+Determine the licenses of files in a directory.
+
+=back
+
+=head1 AUTHOR
+
+B<ninka> was written by Daniel M. German <dmg@uvic.ca> and Yuki Manabe <y-manabe@ist.osaka-u.ac.jp>.
+
+=head1 SEE ALSO
+
+Daniel M. German, Yuki Manabe and Katsuro Inoue. A sentence-matching method
+for automatic license identification of source code files. In 25nd IEEE/ACM
+International Conference on Automated Software Engineering (ASE 2010).
+
+You can download it from http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf.
 
 =head1 COPYRIGHT AND LICENSE
 
-Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German
+Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German, 2015 René Scheibe
 
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as
-published by the Free Software Foundation, either version 3 of the
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
 License, or (at your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU Affero General Public License for more details.
+GNU General Public License for more details.
 
-You should have received a copy of the GNU Affero General Public License
+You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 =cut
diff --git a/ninka-excel.pl b/bin/ninka-excel
index 71adddf..2e2d35d 100755
--- a/ninka-excel.pl
+++ b/bin/ninka-excel
@@ -17,20 +17,17 @@
 #
 
 use strict;
-use Switch;
 use File::Temp;
 use File::Find;
 use File::Basename;
-use Scalar::Util qw(looks_like_number);
+use Ninka;
 use Spreadsheet::WriteExcel;
 
-
-
-if(scalar(@ARGV) != 2){
-    print STDERR "Ninka 1.3. sqlite wrapper\n";
+if (scalar(@ARGV) != 2) {
+    print STDERR "Ninka v${Ninka::VERSION}. sqlite wrapper\n";
     print STDERR "Processes package file (.tar.gz, zip, jar. etc) and outputs to excel file\n";
     print STDERR "Incorrect number of arguments\n";
-    print STDERR "Correct usage is: $0 <path to package file> <excel-file>\n";
+    print STDERR "Usage: $0 <path to package file> <excel-file>\n";
     exit 1;
 }
 
@@ -85,11 +82,10 @@ print "***** Beginning Execution of Ninka *****\n";
 foreach my $file (@files) {
     if (-T $file) {
 	print "Running ninka on file [$file]\n";
-	execute("perl ${path}/ninka.pl '$file'");
+	execute("perl ${path}/ninka -i '$file'");
     }
 }
 
-
 print "***** Entering Ninka Data into excell file [$excelFile] *****\n";
 my $row = 1;
 
@@ -116,7 +112,6 @@ foreach my $file (@files) {
 
 	my @columns = parseLicenseData($filedata);
 
-
 	my $originalFile = $file;
 	$originalFile =~ s/\.license$//;
 
diff --git a/ninka-sqlite.pl b/bin/ninka-sqlite
index d53f60f..6b27ea9 100755
--- a/ninka-sqlite.pl
+++ b/bin/ninka-sqlite
@@ -17,20 +17,17 @@
 #
 
 use strict;
-use Switch;
 use DBI;
 use File::Temp;
 use File::Find;
 use File::Basename;
-use Scalar::Util qw(looks_like_number);
+use Ninka;
 
-
-
-if(scalar(@ARGV) != 2){
-    print STDERR "Ninka 1.3. sqlite wrapper\n";
+if (scalar(@ARGV) != 2) {
+    print STDERR "Ninka v${Ninka::VERSION}. sqlite wrapper\n";
     print STDERR "Processes package file (.tar.gz, zip, jar. etc) and outputs to sqlite file\n";
     print STDERR "Incorrect number of arguments\n";
-    print STDERR "Correct usage is: $0 <path to package file> <database name>\n";
+    print STDERR "Usage: $0 <path to package file> <database name>\n";
     exit 1;
 }
 
@@ -88,7 +85,7 @@ find(
 print "***** Beginning Execution of Ninka *****\n";
 foreach my $file (@files) {
     print "Running ninka on file [$file]\n";
-    execute("perl ${path}/ninka.pl '$file'");
+    execute("perl ${path}/ninka -i '$file'");
 }
 
 my @ninkafiles;
@@ -116,44 +113,46 @@ foreach my $file (@ninkafiles) {
     my $filedata = do { local $/; <$fh> };
 
     my $sth;
-    switch (getExtension($basefile)){
-
-#	case ".comments" {
-#	    print "Inserting [$basefile] into table comments\n";
-#	    $sth = $dbh->prepare("INSERT INTO comments VALUES
-#                                  ('$rootfile', '$filepath', '$packname', ?)");
-#	}
-	case ".sentences" {
-	    print "Inserting [$basefile] into table sentences\n";
-	    $sth = $dbh->prepare("INSERT INTO sentences VALUES
-                                  ('$rootfile', '$filepath', '$packname', ?)");
-	}
-	case ".goodsent" {
-	    print "Inserting [$basefile] into table goodsents\n";
-	    $sth = $dbh->prepare("INSERT INTO goodsents VALUES
-                                  ('$rootfile', '$filepath', '$packname', ?)");
-	}
-	case ".badsent" {
-	    print "Inserting [$basefile] into table goodsents\n";
-	    $sth = $dbh->prepare("INSERT INTO badsents VALUES
-                                  ('$rootfile', '$filepath', '$packname', ?)");
-	}
-	case ".senttok" {
-	    print "Inserting [$basefile] into table senttoks\n";
-	    $sth = $dbh->prepare("INSERT INTO senttoks VALUES
-                                  ('$rootfile', '$filepath', '$packname', ?)");
-	}
-	case ".license" {
-	    print "Inserting [$basefile] into table licenses\n";
-	    my @columns = parseLicenseData($filedata);
-	    $sth = $dbh->prepare("INSERT INTO licenses VALUES
-                                  ('$rootfile', '$filepath', '$packname', '$columns[0]', '$columns[1]',
-                                   '$columns[2]', '$columns[3]', '$columns[4]', '$columns[5]', '$columns[6]')");
-	}
+    my $ext = getExtension($basefile);
+
+    if ($ext eq ".comments") {
+        print "Inserting [$basefile] into table comments\n";
+        $sth = $dbh->prepare("INSERT INTO comments VALUES
+               ('$rootfile', '$filepath', '$packname', ?)");
+    }
+    if ($ext eq ".sentences") {
+        print "Inserting [$basefile] into table sentences\n";
+        $sth = $dbh->prepare("INSERT INTO sentences VALUES
+               ('$rootfile', '$filepath', '$packname', ?)");
+    }
+    if ($ext eq ".goodsent") {
+        print "Inserting [$basefile] into table goodsents\n";
+        $sth = $dbh->prepare("INSERT INTO goodsents VALUES
+               ('$rootfile', '$filepath', '$packname', ?)");
+    }
+    if ($ext eq ".badsent") {
+        print "Inserting [$basefile] into table badsents\n";
+        $sth = $dbh->prepare("INSERT INTO badsents VALUES
+               ('$rootfile', '$filepath', '$packname', ?)");
+    }
+    if ($ext eq ".senttok") {
+        print "Inserting [$basefile] into table senttoks\n";
+        $sth = $dbh->prepare("INSERT INTO senttoks VALUES
+               ('$rootfile', '$filepath', '$packname', ?)");
+    }
+    if ($ext eq ".license") {
+        print "Inserting [$basefile] into table licenses\n";
+        my @columns = parseLicenseData($filedata);
+        $sth = $dbh->prepare("INSERT INTO licenses VALUES
+               ('$rootfile', '$filepath', '$packname', '$columns[0]', '$columns[1]',
+                '$columns[2]', '$columns[3]', '$columns[4]', '$columns[5]', '$columns[6]')");
+    }
+
+    if (defined $sth) {
+        $sth->bind_param(1, $filedata);
+        $sth->execute;
     }
 
-    $sth->bind_param(1, $filedata);
-    $sth->execute;
     close($fh);
 }
 
diff --git a/lib/Ninka.pm b/lib/Ninka.pm
index dc9fbff..8f454cd 100644
--- a/lib/Ninka.pm
+++ b/lib/Ninka.pm
@@ -68,7 +68,7 @@ __END__
 
 =head1 NAME
 
-Ninka - Find licenses in source files.
+Ninka - source file license identification tool
 
 =head1 SYNOPSIS
 
@@ -82,23 +82,23 @@ Ninka - Find licenses in source files.
 
 =head1 DESCRIPTION
 
-Scans a file and returns the found licenses.
+Scans a source file and returns the found licenses.
 
 =head1 COPYRIGHT AND LICENSE
 
-    Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German
+Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German
 
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as
-    published by the Free Software Foundation; either version 2 of the
-    License, or (at your option) any later version.
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
 
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
 
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 =cut
diff --git a/lib/Ninka/CommentExtractor.pm b/lib/Ninka/CommentExtractor.pm
index fd62c02..8a2e66f 100644
--- a/lib/Ninka/CommentExtractor.pm
+++ b/lib/Ninka/CommentExtractor.pm
@@ -93,19 +93,19 @@ If no comment extractor is known for a language, then extracts top lines from so
 
 =head1 COPYRIGHT AND LICENSE
 
-    Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German
+Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German
 
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as
-    published by the Free Software Foundation; either version 2 of the
-    License, or (at your option) any later version.
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
 
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
 
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-=cut
-\ No newline at end of file
+=cut
diff --git a/lib/Ninka/FileCleaner.pm b/lib/Ninka/FileCleaner.pm
index c3dd912..825b1fb 100644
--- a/lib/Ninka/FileCleaner.pm
+++ b/lib/Ninka/FileCleaner.pm
@@ -49,19 +49,19 @@ Escapes apostrophes and other potentially disturbing characters
 
 =head1 COPYRIGHT AND LICENSE
 
-    Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German, 2015 Daniele Fognini and Johannes Najjar
+Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German, 2015 Daniele Fognini and Johannes Najjar
 
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as
-    published by the Free Software Foundation; either version 2 of the
-    License, or (at your option) any later version.
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
 
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
 
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-=cut
-\ No newline at end of file
+=cut
diff --git a/lib/Ninka/LicenseMatcher.pm b/lib/Ninka/LicenseMatcher.pm
index 1cb402a..ba73b26 100644
--- a/lib/Ninka/LicenseMatcher.pm
+++ b/lib/Ninka/LicenseMatcher.pm
@@ -261,19 +261,19 @@ Uses a set of license sentence names as input and outputs license names correspo
 
 =head1 COPYRIGHT AND LICENSE
 
-    Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German
+Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German
 
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as
-    published by the Free Software Foundation; either version 2 of the
-    License, or (at your option) any later version.
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
 
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
 
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 =cut
diff --git a/lib/Ninka/LicenseRules.pm b/lib/Ninka/LicenseRules.pm
index c7810c8..ee5af4b 100644
--- a/lib/Ninka/LicenseRules.pm
+++ b/lib/Ninka/LicenseRules.pm
@@ -96,19 +96,19 @@ Contains rules used by Ninka::LicenseMatcher.
 
 =head1 COPYRIGHT AND LICENSE
 
-    Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German
+Copyright (C) 2009-2014  Yuki Manabe and Daniel M. German
 
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as
-    published by the Free Software Foundation; either version 2 of the
-    License, or (at your option) any later version.
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
 
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
 
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 =cut
diff --git a/lib/Ninka/README.txt b/lib/Ninka/README.txt
deleted file mode 100644
index 2ca7f46..0000000
--- a/lib/Ninka/README.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-This list of abbreviations was extracted from SCOWL (Spell Checker Oriented Word Lists) by by Kevin Atkinson (kevina@gnu.org) version 2015.04.24.
-
-Specifically it was created from scowl-2015.04.24.tar.gz. by running:
-
-cat *abbrev* | sort -u > abbreviations.dict
-
-It also contains some additions by D.M German.
-
-See Copyright.scowl for license.
diff --git a/lib/Ninka/SentenceExtractor.pm b/lib/Ninka/SentenceExtractor.pm
index c27d199..6aeee4e 100644
--- a/lib/Ninka/SentenceExtractor.pm
+++ b/lib/Ninka/SentenceExtractor.pm
@@ -93,7 +93,7 @@ sub execute {
                 $count2++ if ($c ge 'A' && $c le 'z');
             }
             my $clean_sentence = clean_sentence($sentence);
-            push @clean_sentences, $clean_sentence if $clean_sentence,
+            push @clean_sentences, $clean_sentence if $clean_sentence;
         }
 
         if ($count1 != $count2) {
@@ -251,6 +251,19 @@ Ninka::SentenceExtractor
 
 Breaks comments into sentences.
 
+=head1 NOTES
+
+This list of abbreviations was extracted from SCOWL (Spell Checker Oriented Word Lists)
+by Kevin Atkinson (kevina@gnu.org) version 2015.04.24.
+
+Specifically it was created from scowl-2015.04.24.tar.gz. by running:
+
+    cat *abbrev* | sort -u > abbreviations.dict
+
+It also contains some additions by D.M German.
+
+See Copyright.SCOWL for license.
+
 =head1 COPYRIGHT AND LICENSE
 
 Author: Paul Clough
diff --git a/man/ninka.1 b/man/ninka.1
deleted file mode 100644
index 9cd2d57..0000000
--- a/man/ninka.1
+++ /dev/null
@@ -1,83 +0,0 @@
-.TH NINKA 1.3 "May 2015" ninka
-.SH NAME
-ninka \- source file license identification tool
-.SH SYNOPSYS
-.SY ninka
-.OP \-vfCcSsGgTtLd
-.OP \-\-
-.RI [ file ]
-.YS 
-
-.SH DESCRIPTION
-
-Analyses source files to determine the license they fall under. Takes a source
-file as input and outputs the file's license.
-
-.SH OPTIONS
-
-.IP \-v
-verbose
-
-.IP \-f
-force all processing
-
-.IP \-C
-force creation of comments
-.IP \-c
-stop after creation of comments
-
-.IP \-S
-force creation of sentences
-.IP \-s
-stop after creation of sentences
-
-.IP \-G
-force creation of goodsent
-.IP \-g
-stop after creation of goodsent
-
-.IP \-T
-force creation of senttok
-.IP \-t
-stop after creation of senttok
-
-.IP \-L
-force creation of matching
-
-.IP \-d
-delete intermediate files
-
-.IP \-\-
-Stop processing options
-
-.SH EXAMPLES
-
-.TP
-\fBninka\fR \fIfoo.c\fR
-Determine the licenses in file foo.c
-
-.TP
-.BI ninka\ \-d \ foo.c
-Determine the license in file foo.c and delete intermediary files
-
-.TP
-find * | xargs \-n1 \-I@ \fBninka\fR '@'
-Determine the licenses of files in a directory.
-
-
-.SH AUTHOR
-
-\fBninka\fR was written by Daniel M. German <dmg@uvic.ca> and Yuki Manabe
-<y-manabe@ist.osaka-u.ac.jp>. ninka itself is licensed under the AGPLv3+. This
-manpage was written by Ryan Kavanagh <ryanakca@kubuntu.org> for the Debian
-project and is also licensed under the AGPLv3+.
-
-.SH SEE ALSO
-
-Daniel M. German, Yuki Manabe and Katsuro Inoue. A sentence-matching method
-for automatic license identification of source code files. In 25nd IEEE/ACM
-International Conference on Automated Software Engineering (ASE 2010).
-
-You can email Daniel M. German <dmg@uvic.ca> for a copy or download it from
-.UR http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf
-.UE
diff --git a/scripts/license_matcher_modified.pl b/scripts/license_matcher_modified.pl
index 53b1732..53b1732 100644..100755
--- a/scripts/license_matcher_modified.pl
+++ b/scripts/license_matcher_modified.pl
diff --git a/scripts/parseLicense.pl b/scripts/parseLicense.pl
index 2dc61ef..2dc61ef 100644..100755
--- a/scripts/parseLicense.pl
+++ b/scripts/parseLicense.pl
diff --git a/scripts/sort_package_license_list.pl b/scripts/sort_package_license_list.pl
index cd20b50..cd20b50 100644..100755
--- a/scripts/sort_package_license_list.pl
+++ b/scripts/sort_package_license_list.pl
diff --git a/scripts/unify.pl b/scripts/unify.pl
index f518fbb..f518fbb 100644..100755
--- a/scripts/unify.pl
+++ b/scripts/unify.pl
diff --git a/t/pod_ok.t b/t/pod_ok.t
new file mode 100644
index 0000000..57b423a
--- /dev/null
+++ b/t/pod_ok.t
@@ -0,0 +1,5 @@
+use strict;
+use warnings;
+use Test::Pod;
+
+all_pod_files_ok();
diff --git a/unify.pl b/unify.pl
deleted file mode 100644
index f518fbb..0000000
--- a/unify.pl
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/perl
-
-# first pass, unify names of licenses and remove duplicates.
-
-# we trick regarding gpl related licenses so they are "clustered" together..
-#
-# replace GPL with __GPL
-# replace exception in the text with ___exception
-
-use strict;
-
-my %equiv = (
-    "boostV1Ref" => "boostV1",
-    "X11" => "X11mit",
-    "X11Festival" => "X11mit",
-    "X11mitNoSellNoDocDocBSDvar" => "X11mit",
-    "X11mitwithoutSell" => 'X11mit',
-    "X11mitBSDvar" => "X11mit",
-    "X11mitwithoutSellCMUVariant" => "X11mit",
-    "X11mitwithoutSellCMUVariant" => "X11mit",
-    "X11mitwithoutSellandNoDocumentationRequi" => "X11mit",
-    "MITvar3" => "X11mit",
-    "MITvar2" => "X11mit",
-    "MIT" => "X11mit",
-    "ZLIBref" => "ZLIB",
-    "BSD3NoWarranty" => "BSD3",
-    "BSD2EndorseInsteadOfBinary" => "BSD2",
-    "BSD2var2" => "BSD2",
-    "LesserGPLv2" => "LibraryGPLv2",
-    "LesserGPLv2+"  => "LibraryGPLv2+",
-    "orLGPLVer2.1" => "LesserGPLVer2.1",
-    "postgresqlRef" => "postgresql",
-    );
-
-while (<>) {
-    chomp;
-    my @f = split(/;/);
-    # first remove duplicates
-
-    my $l = $f[1];
-
-    # do a simple rewriting of this exception which is an incomplete license
-
-    $l =~ s/^Exception$/UNKNOWN/;
-
-    my @l = split(/,/,$l);
-    my %lics =  %{{ map { $_ => 1 } @l }};
-
-    %lics = Do_Equivalent(%lics);
-    %lics = Remove_Redundant(%lics);
-    %lics = Do_Exceptions(%lics);
-
-    my @out = sort keys %lics;
-
-    my $t = join(',', @out);
-    if ($t eq "") {
-	$t = "UNKNOWN";
-    }
-    print $f[0], ";$t\n";
-}
-
-sub Do_Exceptions
-{
-    my (%lics) = @_;
-
-    if ($lics{'digiaQTExceptionNoticeVer1.1'} ne '' and $lics{'Qt'}) {
-	delete $lics{'digiaQTExceptionNoticeVer1.1'};
-	delete $lics{'Qt'};
-	$lics{'Qt-qtExcep'} = 'Qt-qtExcep';
-    }
-    if ($lics{'BisonException'} ne "" and $lics{"GPLv3+"} ne "") {
-	delete $lics{'BisonException'};
-	delete $lics{"GPLv3+"};
-	$lics{'GPLv3+-bisonExcep'} = 'GPLv3+-bisonExcep';
-    }
-    if ($lics{'BisonException'} ne "" and $lics{"GPLv2+"} ne "") {
-	delete $lics{'BisonException'};
-	delete $lics{"GPLv2+"};
-	$lics{'GPLv2+-bisonExcep'} = 'GPLv2+-bisonExcep';
-    }
-    if ($lics{'BisonException'} ne "" and $lics{"GPLv2"} ne "") {
-	delete $lics{'BisonException'};
-	delete $lics{"GPLv2"};
-	$lics{'GPLv2-bisonExcep'} = 'GPLv2-bisonExcep';
-    }
-    if ($lics{'ClassPathException'} ne "" and $lics{"GPLv2"} ne "") {
-	delete $lics{'ClassPathException'};
-	delete $lics{"GPLv2"};
-	$lics{"GPLv2-classPathExcep"} = "GPLv2-classPathExcep";
-    }
-    if ($lics{'CDDLorGPLv2'} ne "" and $lics{"ClassPathExceptionGPLv2"} ne "") {
-	delete $lics{'CDDLorGPLv2'};
-	delete $lics{"ClassPathExceptionGPLv2"};
-	$lics{'CDDLorGPLv2-classPathExcep'} = 'CDDLorGPLv2-classPathExcep';
-    }
-    if ($lics{'LinkException'} ne "" and $lics{"GPLv3+"} ne "") {
-	delete $lics{'LinkException'};
-	delete $lics{"GPLv3+"};
-	$lics{'GPLv3+-linkExcep'} = 'GPLv3+-linkExcep';
-    }
-    if ($lics{'LinkException'} ne "" and $lics{"GPLv2+"} ne "") {
-	delete $lics{'LinkException'};
-	delete $lics{"GPLv2+"};
-	$lics{'GPLv2+-linkExcep'} = 'GPLv2+-linkExcep';
-    }
-    if ($lics{'LinkException'} ne "" and $lics{"GPLv3"} ne "") {
-	delete $lics{'LinkException'};
-	delete $lics{"GPLv3"};
-	$lics{'GPLv3-linkExcep'} = 'GPLv3-linkExcep';
-    }
-    if ($lics{'LinkException'} ne "" and $lics{"GPLv2"} ne "") {
-	delete $lics{'LinkException'};
-	delete $lics{"GPLv2"};
-	$lics{'GPLv2-linkExcep'} = 'GPLv2-linkExcep';
-    }
-
-    return %lics;
-
-}
-
-sub Remove_Redundant
-{
-    my (%lics) = @_;
-
-    if ($lics{"GPLnoVersion"} ne "" and $lics{"GPLv2"} . $lics{"GPLv2+"} .$lics{"GPLv3"} . $lics{"GPLv3+"} ne "") {
-	delete $lics{"GPLnoVersion"};
-    }
-    if ($lics{"GPLv2+"} ne "" and $lics{"GPLv3+"} ne "") {
-	delete $lics{"GPLv2+"};
-    }
-    if ($lics{'MPL1_1andLGPLv2_1'} ne "" and $lics{"MPLv1_1"} ne "") {
-	delete $lics{"MPLv1_1"};
-    }
-
-
-    return %lics;
-
-}
-
-sub Do_Equivalent
-{
-    my (%lics) = @_;
-    my %outA;
-
-    # then normalize licenses
-    foreach my $a (keys %lics) {
-	next if $a eq "SeeFile";
-	if ($equiv{$a} ne "") {
-	    $outA{$equiv{$a}} = $equiv{$a};
-	}  else {
-	    $outA{$a} = $a;
-	}
-    }
-    return %outA;
-
-}
-
-
-sub uniq {
-    return keys %{{ map { $_ => 1 } @_ }};
-}