diff options
-rw-r--r-- | Changes | 2 | ||||
-rw-r--r-- | Copyright.SCOWL (renamed from lib/Ninka/Copyright.SCOWL) | 0 | ||||
-rw-r--r-- | MANIFEST | 9 | ||||
-rw-r--r-- | Makefile.PL | 11 | ||||
-rw-r--r-- | README | 124 | ||||
-rwxr-xr-x | bin/ninka | 70 | ||||
-rwxr-xr-x | bin/ninka-excel (renamed from ninka-excel.pl) | 15 | ||||
-rwxr-xr-x | bin/ninka-sqlite (renamed from ninka-sqlite.pl) | 87 | ||||
-rw-r--r-- | lib/Ninka.pm | 26 | ||||
-rw-r--r-- | lib/Ninka/CommentExtractor.pm | 24 | ||||
-rw-r--r-- | lib/Ninka/FileCleaner.pm | 24 | ||||
-rw-r--r-- | lib/Ninka/LicenseMatcher.pm | 22 | ||||
-rw-r--r-- | lib/Ninka/LicenseRules.pm | 22 | ||||
-rw-r--r-- | lib/Ninka/README.txt | 9 | ||||
-rw-r--r-- | lib/Ninka/SentenceExtractor.pm | 15 | ||||
-rw-r--r-- | man/ninka.1 | 83 | ||||
-rwxr-xr-x[-rw-r--r--] | scripts/license_matcher_modified.pl | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | scripts/parseLicense.pl | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | scripts/sort_package_license_list.pl | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | scripts/unify.pl | 0 | ||||
-rw-r--r-- | t/pod_ok.t | 5 | ||||
-rw-r--r-- | unify.pl | 161 |
22 files changed, 252 insertions, 457 deletions
@@ -22,7 +22,7 @@ * ninka.pl: fixed bug in finding the path of where ninka was being executed from (reported by Ryan Biesemeyer) - * Fixed quotes in perl (René bScheibe) + * Fixed quotes in perl (René Scheibe) 2015-01-05 dmg <dmg@uvic.ca> diff --git a/lib/Ninka/Copyright.SCOWL b/Copyright.SCOWL index a4654a3..a4654a3 100644 --- a/lib/Ninka/Copyright.SCOWL +++ b/Copyright.SCOWL @@ -1,9 +1,13 @@ bin/ninka +bin/ninka-excel +bin/ninka-sqlite Changes +Copyright.SCOWL lib/Ninka.pm lib/Ninka/abbreviations.dict lib/Ninka/CommentExtractor.pm lib/Ninka/criticalwords.dict +lib/Ninka/FileCleaner.pm lib/Ninka/interrules.dict lib/Ninka/LicenseMatcher.pm lib/Ninka/LicenseRules.pm @@ -17,6 +21,10 @@ Makefile.PL MANIFEST MANIFEST.SKIP README +scripts/license_matcher_modified.pl +scripts/parseLicense.pl +scripts/sort_package_license_list.pl +scripts/unify.pl t/data/expected_output/AAL t/data/expected_output/AFL-1.1 t/data/expected_output/AFL-1.2 @@ -209,5 +217,6 @@ t/data/licenses/OSL-2.1 t/data/licenses/OSL-3.0 t/data/licenses/PRESERVE_COPYRIGHT_NOTICE t/data/licenses/Public-domain +t/pod_ok.t t/reference_licenses.t t/syntax_ok_and_use_strict.t diff --git a/Makefile.PL b/Makefile.PL index af4730c..b29cf02 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -1,12 +1,13 @@ use strict; use warnings; +use utf8; use ExtUtils::MakeMaker; WriteMakefile( NAME => 'Ninka', VERSION_FROM => 'lib/Ninka.pm', ABSTRACT_FROM => 'lib/Ninka.pm', - LICENSE => 'agpl_3', + LICENSE => 'gpl_2', AUTHOR => [ 'Daniel M. German <dmg@uvic.ca>', 'Yuki Manabe <y-manabe@ist.osaka-u.ac.jp>', @@ -17,21 +18,27 @@ WriteMakefile( 'ExtUtils::MakeMaker' => '6.52', }, PREREQ_PM => { + 'DBI' => '0', + 'DBD::SQLite' => '0', 'File::Basename' => '0', + 'File::Find' => '0', 'File::Spec::Functions' => '0', + 'File::Temp' => '0', 'Getopt::Std' => '0', 'IPC::Open3' => '0', + 'Spreadsheet::WriteExcel' => '0', }, TEST_REQUIRES => { 'File::Temp' => '0', 'Test::More' => '0.98', + 'Test::Pod' => '1.00', 'Test::Strict' => '0', }, META_MERGE => { resources => { homepage => 'http://ninka.turingmachine.org/', repository => 'https://github.com/dmgerman/ninka', - license => 'http://www.gnu.org/licenses/agpl-3.0.html', + license => 'http://www.gnu.org/licenses/gpl-2.0.html', }, }, ); @@ -11,16 +11,13 @@ under which a source file is made available. This tool uses a source file as input and outputs the licenses identified within that file. -If you need to know the detail of Ninka, please see the following -paper: +If you need to know the detail of Ninka, please see the following paper: Daniel M. German, Yuki Manabe and Katsuro Inoue. A sentence-matching method for automatic license identification of source code files. In 25nd IEEE/ACM International Conference on Automated Software Engineering (ASE 2010). You can email me (dmg@uvic.ca) for a copy or -download it from - -http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf +download it from http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf. If you use Ninka for research purposes, we would appreciate you cite the above paper. @@ -28,13 +25,13 @@ the above paper. * Contributors - Paul Clough for his code to split sentences -- Anthony Kohan for writing the excel and sqlite backends. -- Armijn Hemel from Tjaldur Software Governance Solutions for multiple bug reports and suggestions +- Anthony Kohan for writing the excel and sqlite backends +- Armijn Hemel from Tjaldur Software Governance Solutions for multiple bug reports and suggestions +- René Scheibe for modularizing the code * License - Except for the directories comments and splitter, Ninka is licensed - under the GPLv2+ + Ninka is licensed under the GPLv2+: Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German @@ -51,59 +48,41 @@ the above paper. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. - - splitter.pl is a derivative work of the Rule-based sentence - splitter script by Paul Paul Clough. Please see splitter/README - for details. + Ninka::SentenceExtraxtor is a derivative work of the rule-based sentence + splitter script by Paul Paul Clough. - - comments is based on a program to remove comments by Jon Newman, - it is released under the GNU General Public License Version 2 or - (at your option) any later version. + comments is based on a program to remove comments by Jon Newman. * Requirements - Perl version 5 or above -- for ninka-excel.pl: Perl module Spreadsheet::WriteExcel - https://metacpan.org/release/Spreadsheet-WriteExcel/ -- for ninka-sqlite.pl: Perl module DBD::SQLite +- for ninka-excel: Perl module Spreadsheet::WriteExcel + https://metacpan.org/release/Spreadsheet-WriteExcel +- for ninka-sqlite: Perl module DBD::SQLite https://metacpan.org/release/DBD-SQLite * How to install 1. Unpack the distribution in a directory. - 2. Optional: Build and install comments (make sure it is somwehere in the - path) (see directory comments) - + 2. Optional: Build and install comments (make sure it is somwehere in the path) (see directory comments) -* Usage: +* Usage -Ninka uses a pipe model (see below). Each step of the "pipe" creates a -file, but +ninka [options] filename -ninka.pl [options] [filename] +Available options: -Available options + -i create intermediary files -v verbose - -d delete intermediate files - -C force creation of comments file - -c stop after creation of comments - -S force creation of sentences file - -s stop after creation of sentences - -G force creation of goodsent file - -g stop after creation of goodsent - -T force creation of senttok file - -t stop after creation of senttok - -L force creation of license file - -f force all processing - Example: - ninka.pl foo.c + ninka -i foo.c It will create five files: - 1. foo.c.comments: extracted the first two comments blocks, where - the license is usually + 1. foo.c.comments: extracted the first comments blocks, where + the license is usually included 2. foo.c.sentences: creates the list of sentences in the license statement 3. foo.c.goodsent: contains sentences that are likely to be part of @@ -117,69 +96,60 @@ It will create five files: - Licenses - Unmatched sentences in *.senttok that were not matched - - +The files are not required for Ninka's functionality. But they can help +to debug license detection issues. * Ninka model Ninka uses a pipe-model. Each stage of the pipe does something very specific: - 1. Comment extractor. +1. Comment extractor - - directory: extComments + - Module: Ninka::CommentExtractor - - command: extComments.pl, might use comments (included in distribution) + - Purpose: Extracts top comments of source code. + If no comment extractor is known for the language, + then extracts top lines from source (currently 700) - - Purpose: Extracts top comments of source code. If no - comment extractor is known for the language, then extracts top lines from source (currently 700) - - - Creates <filename>.comments file + - Output: <filename>.comments 2. Split sentences in comments - - directory: splitter - - - command: splitter.pl - - - Purpose: Ninka works by matching sentences of licenses, hence - it needs to properly break text into sentences. - - - Outputs <filename>.sentences - -3. Filter "good" sentences. + - Module: Ninka::SentenceExtractor - - directory filter + - Purpose: Ninka works by matching sentences of licenses, + hence it needs to properly break text into sentences. - - command: filter.pl + - Output: <filename>.sentences - - Purpose: some sentences are related to a license, some are - not. It is valuable to know if a file contains lines that look - like a license or not (e.g. to know that a file has no license) +3. Filter "good" sentences - - Outputs: <filename>.goodsent, and <filename>.badsent (not used) + - Module: Ninka::SentenceFilter -4. Tokenizes sentences + - Purpose: Some sentences are related to a license, some are not. + It is valuable to know if a file contains lines that look like + a license or not (e.g. to know that a file has no license). - - Directory senttok + - Output: <filename>.goodsent and <filename>.badsent - - command: senttok.pl +4. Tokenize sentences - - Purpose: It creates a file that corresponds to the recognized - sentence tokens. For each sentence, it outputs its sentence token, or unknown otherwise. + - Module: Ninka::SentenceTokenizer - - Outputs: <filename>.senttok + - Purpose: It creates a file that corresponds to the recognized sentence tokens. + For each sentence, it outputs its sentence token, or unknown otherwise. -5. Matches sentences to licenses + - Output: <filename>.senttok - - Directory matcher +5. Match sentences to licenses - - Command: matcher.pl + - Module: Ninka::LicenseMatcher - - Purpose: looks at the sequence of sentence tokens and outputs the licenses found + - Purpose: It looks at the sentence tokens and outputs the licenses found. - Output: <filename>.license -The script ninka.pl takes care of all these steps, and optionally removes +The script ninka takes care of all these steps, and optionally creates intermediary files, and writes to the stdout the licenses found. ------ @@ -1,4 +1,4 @@ -#!/usr/bin/env perl +#!/usr/bin/perl use strict; use warnings; @@ -19,7 +19,7 @@ sub parse_cmdline_parameters { if (!getopts('iv', \%opts) || scalar(@ARGV) == 0) { print STDERR "Ninka v${Ninka::VERSION} -Usage: $0 [options] <filename> +Usage: ninka [options] <filename> Options: -i create intermediary files @@ -32,29 +32,79 @@ Options: __END__ +=encoding utf8 + =head1 NAME -ninka +ninka - source file license identification tool + +=head1 SYNOPSYS + +B<ninka> [options] F<filename> =head1 DESCRIPTION -Scans a file and returns the found licenses. +Scans a source file and returns the found licenses. + +=head1 OPTIONS + +=over + +=item B<-i> + +create intermediary files (for debugging) + +=item B<-v> + +verbose + +=back + +=head1 EXAMPLES + +=over + +=item B<ninka> F<foo.c> + +Determine the licenses in file F<foo.c>. + +=item B<ninka -i> F<foo.c> + +Determine the licenses in file F<foo.c> and create intermediary files (for debugging). + +=item find * | xargs -n1 -I@ B<ninka> '@' + +Determine the licenses of files in a directory. + +=back + +=head1 AUTHOR + +B<ninka> was written by Daniel M. German <dmg@uvic.ca> and Yuki Manabe <y-manabe@ist.osaka-u.ac.jp>. + +=head1 SEE ALSO + +Daniel M. German, Yuki Manabe and Katsuro Inoue. A sentence-matching method +for automatic license identification of source code files. In 25nd IEEE/ACM +International Conference on Automated Software Engineering (ASE 2010). + +You can download it from http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf. =head1 COPYRIGHT AND LICENSE -Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German +Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German, 2015 René Scheibe -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as -published by the Free Software Foundation, either version 3 of the +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. +GNU General Public License for more details. -You should have received a copy of the GNU Affero General Public License +You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. =cut diff --git a/ninka-excel.pl b/bin/ninka-excel index 71adddf..2e2d35d 100755 --- a/ninka-excel.pl +++ b/bin/ninka-excel @@ -17,20 +17,17 @@ # use strict; -use Switch; use File::Temp; use File::Find; use File::Basename; -use Scalar::Util qw(looks_like_number); +use Ninka; use Spreadsheet::WriteExcel; - - -if(scalar(@ARGV) != 2){ - print STDERR "Ninka 1.3. sqlite wrapper\n"; +if (scalar(@ARGV) != 2) { + print STDERR "Ninka v${Ninka::VERSION}. sqlite wrapper\n"; print STDERR "Processes package file (.tar.gz, zip, jar. etc) and outputs to excel file\n"; print STDERR "Incorrect number of arguments\n"; - print STDERR "Correct usage is: $0 <path to package file> <excel-file>\n"; + print STDERR "Usage: $0 <path to package file> <excel-file>\n"; exit 1; } @@ -85,11 +82,10 @@ print "***** Beginning Execution of Ninka *****\n"; foreach my $file (@files) { if (-T $file) { print "Running ninka on file [$file]\n"; - execute("perl ${path}/ninka.pl '$file'"); + execute("perl ${path}/ninka -i '$file'"); } } - print "***** Entering Ninka Data into excell file [$excelFile] *****\n"; my $row = 1; @@ -116,7 +112,6 @@ foreach my $file (@files) { my @columns = parseLicenseData($filedata); - my $originalFile = $file; $originalFile =~ s/\.license$//; diff --git a/ninka-sqlite.pl b/bin/ninka-sqlite index d53f60f..6b27ea9 100755 --- a/ninka-sqlite.pl +++ b/bin/ninka-sqlite @@ -17,20 +17,17 @@ # use strict; -use Switch; use DBI; use File::Temp; use File::Find; use File::Basename; -use Scalar::Util qw(looks_like_number); +use Ninka; - - -if(scalar(@ARGV) != 2){ - print STDERR "Ninka 1.3. sqlite wrapper\n"; +if (scalar(@ARGV) != 2) { + print STDERR "Ninka v${Ninka::VERSION}. sqlite wrapper\n"; print STDERR "Processes package file (.tar.gz, zip, jar. etc) and outputs to sqlite file\n"; print STDERR "Incorrect number of arguments\n"; - print STDERR "Correct usage is: $0 <path to package file> <database name>\n"; + print STDERR "Usage: $0 <path to package file> <database name>\n"; exit 1; } @@ -88,7 +85,7 @@ find( print "***** Beginning Execution of Ninka *****\n"; foreach my $file (@files) { print "Running ninka on file [$file]\n"; - execute("perl ${path}/ninka.pl '$file'"); + execute("perl ${path}/ninka -i '$file'"); } my @ninkafiles; @@ -116,44 +113,46 @@ foreach my $file (@ninkafiles) { my $filedata = do { local $/; <$fh> }; my $sth; - switch (getExtension($basefile)){ - -# case ".comments" { -# print "Inserting [$basefile] into table comments\n"; -# $sth = $dbh->prepare("INSERT INTO comments VALUES -# ('$rootfile', '$filepath', '$packname', ?)"); -# } - case ".sentences" { - print "Inserting [$basefile] into table sentences\n"; - $sth = $dbh->prepare("INSERT INTO sentences VALUES - ('$rootfile', '$filepath', '$packname', ?)"); - } - case ".goodsent" { - print "Inserting [$basefile] into table goodsents\n"; - $sth = $dbh->prepare("INSERT INTO goodsents VALUES - ('$rootfile', '$filepath', '$packname', ?)"); - } - case ".badsent" { - print "Inserting [$basefile] into table goodsents\n"; - $sth = $dbh->prepare("INSERT INTO badsents VALUES - ('$rootfile', '$filepath', '$packname', ?)"); - } - case ".senttok" { - print "Inserting [$basefile] into table senttoks\n"; - $sth = $dbh->prepare("INSERT INTO senttoks VALUES - ('$rootfile', '$filepath', '$packname', ?)"); - } - case ".license" { - print "Inserting [$basefile] into table licenses\n"; - my @columns = parseLicenseData($filedata); - $sth = $dbh->prepare("INSERT INTO licenses VALUES - ('$rootfile', '$filepath', '$packname', '$columns[0]', '$columns[1]', - '$columns[2]', '$columns[3]', '$columns[4]', '$columns[5]', '$columns[6]')"); - } + my $ext = getExtension($basefile); + + if ($ext eq ".comments") { + print "Inserting [$basefile] into table comments\n"; + $sth = $dbh->prepare("INSERT INTO comments VALUES + ('$rootfile', '$filepath', '$packname', ?)"); + } + if ($ext eq ".sentences") { + print "Inserting [$basefile] into table sentences\n"; + $sth = $dbh->prepare("INSERT INTO sentences VALUES + ('$rootfile', '$filepath', '$packname', ?)"); + } + if ($ext eq ".goodsent") { + print "Inserting [$basefile] into table goodsents\n"; + $sth = $dbh->prepare("INSERT INTO goodsents VALUES + ('$rootfile', '$filepath', '$packname', ?)"); + } + if ($ext eq ".badsent") { + print "Inserting [$basefile] into table badsents\n"; + $sth = $dbh->prepare("INSERT INTO badsents VALUES + ('$rootfile', '$filepath', '$packname', ?)"); + } + if ($ext eq ".senttok") { + print "Inserting [$basefile] into table senttoks\n"; + $sth = $dbh->prepare("INSERT INTO senttoks VALUES + ('$rootfile', '$filepath', '$packname', ?)"); + } + if ($ext eq ".license") { + print "Inserting [$basefile] into table licenses\n"; + my @columns = parseLicenseData($filedata); + $sth = $dbh->prepare("INSERT INTO licenses VALUES + ('$rootfile', '$filepath', '$packname', '$columns[0]', '$columns[1]', + '$columns[2]', '$columns[3]', '$columns[4]', '$columns[5]', '$columns[6]')"); + } + + if (defined $sth) { + $sth->bind_param(1, $filedata); + $sth->execute; } - $sth->bind_param(1, $filedata); - $sth->execute; close($fh); } diff --git a/lib/Ninka.pm b/lib/Ninka.pm index dc9fbff..8f454cd 100644 --- a/lib/Ninka.pm +++ b/lib/Ninka.pm @@ -68,7 +68,7 @@ __END__ =head1 NAME -Ninka - Find licenses in source files. +Ninka - source file license identification tool =head1 SYNOPSIS @@ -82,23 +82,23 @@ Ninka - Find licenses in source files. =head1 DESCRIPTION -Scans a file and returns the found licenses. +Scans a source file and returns the found licenses. =head1 COPYRIGHT AND LICENSE - Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German +Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. =cut diff --git a/lib/Ninka/CommentExtractor.pm b/lib/Ninka/CommentExtractor.pm index fd62c02..8a2e66f 100644 --- a/lib/Ninka/CommentExtractor.pm +++ b/lib/Ninka/CommentExtractor.pm @@ -93,19 +93,19 @@ If no comment extractor is known for a language, then extracts top lines from so =head1 COPYRIGHT AND LICENSE - Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German +Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. -=cut
\ No newline at end of file +=cut diff --git a/lib/Ninka/FileCleaner.pm b/lib/Ninka/FileCleaner.pm index c3dd912..825b1fb 100644 --- a/lib/Ninka/FileCleaner.pm +++ b/lib/Ninka/FileCleaner.pm @@ -49,19 +49,19 @@ Escapes apostrophes and other potentially disturbing characters =head1 COPYRIGHT AND LICENSE - Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German, 2015 Daniele Fognini and Johannes Najjar +Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German, 2015 Daniele Fognini and Johannes Najjar - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. -=cut
\ No newline at end of file +=cut diff --git a/lib/Ninka/LicenseMatcher.pm b/lib/Ninka/LicenseMatcher.pm index 1cb402a..ba73b26 100644 --- a/lib/Ninka/LicenseMatcher.pm +++ b/lib/Ninka/LicenseMatcher.pm @@ -261,19 +261,19 @@ Uses a set of license sentence names as input and outputs license names correspo =head1 COPYRIGHT AND LICENSE - Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German +Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. =cut diff --git a/lib/Ninka/LicenseRules.pm b/lib/Ninka/LicenseRules.pm index c7810c8..ee5af4b 100644 --- a/lib/Ninka/LicenseRules.pm +++ b/lib/Ninka/LicenseRules.pm @@ -96,19 +96,19 @@ Contains rules used by Ninka::LicenseMatcher. =head1 COPYRIGHT AND LICENSE - Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German +Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. =cut diff --git a/lib/Ninka/README.txt b/lib/Ninka/README.txt deleted file mode 100644 index 2ca7f46..0000000 --- a/lib/Ninka/README.txt +++ /dev/null @@ -1,9 +0,0 @@ -This list of abbreviations was extracted from SCOWL (Spell Checker Oriented Word Lists) by by Kevin Atkinson (kevina@gnu.org) version 2015.04.24. - -Specifically it was created from scowl-2015.04.24.tar.gz. by running: - -cat *abbrev* | sort -u > abbreviations.dict - -It also contains some additions by D.M German. - -See Copyright.scowl for license. diff --git a/lib/Ninka/SentenceExtractor.pm b/lib/Ninka/SentenceExtractor.pm index c27d199..6aeee4e 100644 --- a/lib/Ninka/SentenceExtractor.pm +++ b/lib/Ninka/SentenceExtractor.pm @@ -93,7 +93,7 @@ sub execute { $count2++ if ($c ge 'A' && $c le 'z'); } my $clean_sentence = clean_sentence($sentence); - push @clean_sentences, $clean_sentence if $clean_sentence, + push @clean_sentences, $clean_sentence if $clean_sentence; } if ($count1 != $count2) { @@ -251,6 +251,19 @@ Ninka::SentenceExtractor Breaks comments into sentences. +=head1 NOTES + +This list of abbreviations was extracted from SCOWL (Spell Checker Oriented Word Lists) +by Kevin Atkinson (kevina@gnu.org) version 2015.04.24. + +Specifically it was created from scowl-2015.04.24.tar.gz. by running: + + cat *abbrev* | sort -u > abbreviations.dict + +It also contains some additions by D.M German. + +See Copyright.SCOWL for license. + =head1 COPYRIGHT AND LICENSE Author: Paul Clough diff --git a/man/ninka.1 b/man/ninka.1 deleted file mode 100644 index 9cd2d57..0000000 --- a/man/ninka.1 +++ /dev/null @@ -1,83 +0,0 @@ -.TH NINKA 1.3 "May 2015" ninka -.SH NAME -ninka \- source file license identification tool -.SH SYNOPSYS -.SY ninka -.OP \-vfCcSsGgTtLd -.OP \-\- -.RI [ file ] -.YS - -.SH DESCRIPTION - -Analyses source files to determine the license they fall under. Takes a source -file as input and outputs the file's license. - -.SH OPTIONS - -.IP \-v -verbose - -.IP \-f -force all processing - -.IP \-C -force creation of comments -.IP \-c -stop after creation of comments - -.IP \-S -force creation of sentences -.IP \-s -stop after creation of sentences - -.IP \-G -force creation of goodsent -.IP \-g -stop after creation of goodsent - -.IP \-T -force creation of senttok -.IP \-t -stop after creation of senttok - -.IP \-L -force creation of matching - -.IP \-d -delete intermediate files - -.IP \-\- -Stop processing options - -.SH EXAMPLES - -.TP -\fBninka\fR \fIfoo.c\fR -Determine the licenses in file foo.c - -.TP -.BI ninka\ \-d \ foo.c -Determine the license in file foo.c and delete intermediary files - -.TP -find * | xargs \-n1 \-I@ \fBninka\fR '@' -Determine the licenses of files in a directory. - - -.SH AUTHOR - -\fBninka\fR was written by Daniel M. German <dmg@uvic.ca> and Yuki Manabe -<y-manabe@ist.osaka-u.ac.jp>. ninka itself is licensed under the AGPLv3+. This -manpage was written by Ryan Kavanagh <ryanakca@kubuntu.org> for the Debian -project and is also licensed under the AGPLv3+. - -.SH SEE ALSO - -Daniel M. German, Yuki Manabe and Katsuro Inoue. A sentence-matching method -for automatic license identification of source code files. In 25nd IEEE/ACM -International Conference on Automated Software Engineering (ASE 2010). - -You can email Daniel M. German <dmg@uvic.ca> for a copy or download it from -.UR http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf -.UE diff --git a/scripts/license_matcher_modified.pl b/scripts/license_matcher_modified.pl index 53b1732..53b1732 100644..100755 --- a/scripts/license_matcher_modified.pl +++ b/scripts/license_matcher_modified.pl diff --git a/scripts/parseLicense.pl b/scripts/parseLicense.pl index 2dc61ef..2dc61ef 100644..100755 --- a/scripts/parseLicense.pl +++ b/scripts/parseLicense.pl diff --git a/scripts/sort_package_license_list.pl b/scripts/sort_package_license_list.pl index cd20b50..cd20b50 100644..100755 --- a/scripts/sort_package_license_list.pl +++ b/scripts/sort_package_license_list.pl diff --git a/scripts/unify.pl b/scripts/unify.pl index f518fbb..f518fbb 100644..100755 --- a/scripts/unify.pl +++ b/scripts/unify.pl diff --git a/t/pod_ok.t b/t/pod_ok.t new file mode 100644 index 0000000..57b423a --- /dev/null +++ b/t/pod_ok.t @@ -0,0 +1,5 @@ +use strict; +use warnings; +use Test::Pod; + +all_pod_files_ok(); diff --git a/unify.pl b/unify.pl deleted file mode 100644 index f518fbb..0000000 --- a/unify.pl +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/perl - -# first pass, unify names of licenses and remove duplicates. - -# we trick regarding gpl related licenses so they are "clustered" together.. -# -# replace GPL with __GPL -# replace exception in the text with ___exception - -use strict; - -my %equiv = ( - "boostV1Ref" => "boostV1", - "X11" => "X11mit", - "X11Festival" => "X11mit", - "X11mitNoSellNoDocDocBSDvar" => "X11mit", - "X11mitwithoutSell" => 'X11mit', - "X11mitBSDvar" => "X11mit", - "X11mitwithoutSellCMUVariant" => "X11mit", - "X11mitwithoutSellCMUVariant" => "X11mit", - "X11mitwithoutSellandNoDocumentationRequi" => "X11mit", - "MITvar3" => "X11mit", - "MITvar2" => "X11mit", - "MIT" => "X11mit", - "ZLIBref" => "ZLIB", - "BSD3NoWarranty" => "BSD3", - "BSD2EndorseInsteadOfBinary" => "BSD2", - "BSD2var2" => "BSD2", - "LesserGPLv2" => "LibraryGPLv2", - "LesserGPLv2+" => "LibraryGPLv2+", - "orLGPLVer2.1" => "LesserGPLVer2.1", - "postgresqlRef" => "postgresql", - ); - -while (<>) { - chomp; - my @f = split(/;/); - # first remove duplicates - - my $l = $f[1]; - - # do a simple rewriting of this exception which is an incomplete license - - $l =~ s/^Exception$/UNKNOWN/; - - my @l = split(/,/,$l); - my %lics = %{{ map { $_ => 1 } @l }}; - - %lics = Do_Equivalent(%lics); - %lics = Remove_Redundant(%lics); - %lics = Do_Exceptions(%lics); - - my @out = sort keys %lics; - - my $t = join(',', @out); - if ($t eq "") { - $t = "UNKNOWN"; - } - print $f[0], ";$t\n"; -} - -sub Do_Exceptions -{ - my (%lics) = @_; - - if ($lics{'digiaQTExceptionNoticeVer1.1'} ne '' and $lics{'Qt'}) { - delete $lics{'digiaQTExceptionNoticeVer1.1'}; - delete $lics{'Qt'}; - $lics{'Qt-qtExcep'} = 'Qt-qtExcep'; - } - if ($lics{'BisonException'} ne "" and $lics{"GPLv3+"} ne "") { - delete $lics{'BisonException'}; - delete $lics{"GPLv3+"}; - $lics{'GPLv3+-bisonExcep'} = 'GPLv3+-bisonExcep'; - } - if ($lics{'BisonException'} ne "" and $lics{"GPLv2+"} ne "") { - delete $lics{'BisonException'}; - delete $lics{"GPLv2+"}; - $lics{'GPLv2+-bisonExcep'} = 'GPLv2+-bisonExcep'; - } - if ($lics{'BisonException'} ne "" and $lics{"GPLv2"} ne "") { - delete $lics{'BisonException'}; - delete $lics{"GPLv2"}; - $lics{'GPLv2-bisonExcep'} = 'GPLv2-bisonExcep'; - } - if ($lics{'ClassPathException'} ne "" and $lics{"GPLv2"} ne "") { - delete $lics{'ClassPathException'}; - delete $lics{"GPLv2"}; - $lics{"GPLv2-classPathExcep"} = "GPLv2-classPathExcep"; - } - if ($lics{'CDDLorGPLv2'} ne "" and $lics{"ClassPathExceptionGPLv2"} ne "") { - delete $lics{'CDDLorGPLv2'}; - delete $lics{"ClassPathExceptionGPLv2"}; - $lics{'CDDLorGPLv2-classPathExcep'} = 'CDDLorGPLv2-classPathExcep'; - } - if ($lics{'LinkException'} ne "" and $lics{"GPLv3+"} ne "") { - delete $lics{'LinkException'}; - delete $lics{"GPLv3+"}; - $lics{'GPLv3+-linkExcep'} = 'GPLv3+-linkExcep'; - } - if ($lics{'LinkException'} ne "" and $lics{"GPLv2+"} ne "") { - delete $lics{'LinkException'}; - delete $lics{"GPLv2+"}; - $lics{'GPLv2+-linkExcep'} = 'GPLv2+-linkExcep'; - } - if ($lics{'LinkException'} ne "" and $lics{"GPLv3"} ne "") { - delete $lics{'LinkException'}; - delete $lics{"GPLv3"}; - $lics{'GPLv3-linkExcep'} = 'GPLv3-linkExcep'; - } - if ($lics{'LinkException'} ne "" and $lics{"GPLv2"} ne "") { - delete $lics{'LinkException'}; - delete $lics{"GPLv2"}; - $lics{'GPLv2-linkExcep'} = 'GPLv2-linkExcep'; - } - - return %lics; - -} - -sub Remove_Redundant -{ - my (%lics) = @_; - - if ($lics{"GPLnoVersion"} ne "" and $lics{"GPLv2"} . $lics{"GPLv2+"} .$lics{"GPLv3"} . $lics{"GPLv3+"} ne "") { - delete $lics{"GPLnoVersion"}; - } - if ($lics{"GPLv2+"} ne "" and $lics{"GPLv3+"} ne "") { - delete $lics{"GPLv2+"}; - } - if ($lics{'MPL1_1andLGPLv2_1'} ne "" and $lics{"MPLv1_1"} ne "") { - delete $lics{"MPLv1_1"}; - } - - - return %lics; - -} - -sub Do_Equivalent -{ - my (%lics) = @_; - my %outA; - - # then normalize licenses - foreach my $a (keys %lics) { - next if $a eq "SeeFile"; - if ($equiv{$a} ne "") { - $outA{$equiv{$a}} = $equiv{$a}; - } else { - $outA{$a} = $a; - } - } - return %outA; - -} - - -sub uniq { - return keys %{{ map { $_ => 1 } @_ }}; -} |