#!/usr/bin/perl # # Copyright (C) 2009-2014 Yuki Manabe and Daniel M. German # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # use strict; use Getopt::Std; use File::Basename; use File::Path qw(make_path); my %opts = (); if (!getopts ("vfCcSsGgTtLdDh",\%opts) or scalar(@ARGV) != 2) { print STDERR "Ninka version 1.3 Usage $0 -fCtTvcgsGd -v verbose -f force all processing -C force creation of comments -c stop after creation of comments -S force creation of sentences -s stop after creation of sentences -G force creation of goodsent -g stop after creation of goodsent -T force creation of senttok -t stop after creation of senttok -L force creation of matching -d delete intermediate files -D delete license output file -h re-create directory structure of original filename in output files under output dir. For the sake of security, no .. directory is allowed in the path name. Starting /s are removed. \n"; exit 1; } my $verbose = exists $opts{v}; my $delete = exists $opts{d}; my $deleteLic = exists $opts{D}; my $createDirsHier = exists $opts{h}; #$delete = 1; my $path = dirname($0); if ($path eq "") { $path = "./"; } my $force = exists $opts{f}; my $forceGood = exists $opts{G}; my $forceSentences = exists $opts{S}; my $forceSentok = exists $opts{T}; my $forceComments = exists $opts{C}; my $forceLicense = exists $opts{L}; #die "Usage $0 " unless $ARGV[0] =~ /\.(c|cpp|java|cc|cxx|h|jl|py|pm|el|pl)$/; my $original = $ARGV[0]; my $escapedOriginal = escape_filename($original); my $dirOriginal = $ARGV[1]; my $f = basename($escapedOriginal); print "Starting: $original;\n" if ($verbose); print "$original;"; if (not (-f $original)) { print "ERROR;[${original}] is not a file\n" ; exit 0; } if (not (-d $dirOriginal)) { print "ERROR;[${dirOriginal}] is not a directory\n" ; exit 0; } $dirOriginal =~ s@/$@@; my $dir; my $hier = ""; if ($createDirsHier) { $hier = dirname($original); # make sure it does not start with / $hier =~ s@^/+@@; # abort if relative... if ($hier =~ m@/\.\./@ or $hier =~ m@^\.\.@ or $hier =~ m@\.\.$@) { die "directory name [$hier] of input file contains .. aborting\n"; } $dir = "$dirOriginal/$hier"; make_path($dir) unless -d $dir; $dir = escape_filename($dir); } my $commentsFile = "$dir/${f}.comments"; my $sentencesFile = "$dir/${f}.sentences"; my $goodsentFile = "$dir/${f}.goodsent"; my $badsentFile = "$dir/${f}.badsent"; my $sentokFile = "$dir/${f}.senttok"; my $licenseFile = "$dir/${f}.license"; my $codeFile = "$dir/${f}.code"; Do_File_Process($original, $commentsFile, ($force or $forceComments), "$path/extComments/extComments.pl -c1 ${escapedOriginal} > $commentsFile", "Creating comments file", exists $opts{c} ); Do_File_Process($commentsFile, $sentencesFile, ($force or $forceSentences), "$path/splitter/splitter.pl ${commentsFile}", "Splitting sentences", exists $opts{s} ); Do_File_Process( $sentencesFile, $goodsentFile, ($force or $forceGood), "$path/filter/filter.pl ${sentencesFile}", "Filtering good sentences", exists $opts{s} ); Do_File_Process($goodsentFile, $sentokFile, ($force or $forceSentok), "$path/senttok/senttok.pl ${goodsentFile} > ${sentokFile}", "Matching sentences against rules", exists $opts{t} ); print "Matching ${f}.senttok against rules" if ($verbose); execute("$path/matcher/matcher.pl ${sentokFile} > ${licenseFile}"); print `cat ${licenseFile}`; if ($delete) { unlink($commentsFile); unlink($sentencesFile); unlink($goodsentFile); unlink($badsentFile); unlink($sentokFile); unlink($codeFile) if -f $codeFile; } if ($deleteLic) { unlink($licenseFile); } exit 0; sub Do_File_Process { my ($input, $output, $force, $cmd, $message, $end) = @_; print "${message}:" if ($verbose); if ($force or newer($input, $output)) { print "Running ${cmd}:" if ($verbose); execute($cmd); } else { print "File [$output] newer than input [$input], not creating:" if ($verbose); } if ($end) { print "Existing after $message" if $verbose; print "\n"; exit 0; } } sub execute { my ($c) = @_; # print "\nTo execute [$c]\n"; my $r = `$c`; my $status = ($? >> 8); die "execution of program [$c] failed: status [$status]" if ($status != 0); return $r; } sub newer { my ($f1, $f2) = @_; my ($f1write) = (stat($f1))[9]; my ($f2write) = (stat($f2))[9]; if (defined $f1write and defined $f2write) { return $f1write > $f2write; } else { return 1; } } sub escape_filename { my ($f) = @_; $f =~ s/'/\\'/g; $f =~ s/\$/\\\$/g; $f =~ s/;/\\;/g; $f =~ s/ /\\ /g; return $f; }