From 8b5edd0749fd42e6efef69911d4ee631ab4d6d23 Mon Sep 17 00:00:00 2001 From: Daniel M German Date: Fri, 22 May 2015 01:10:12 -0700 Subject: create temporary files in a different directory --- README.TXT | 7 ++++ extComments/extComments.pl | 26 +++++-------- ninka-excel.pl | 5 +-- ninka-sqlite.pl | 6 +-- ninka.pl | 93 ++++++++++++++++++++++++++++++++-------------- 5 files changed, 87 insertions(+), 50 deletions(-) diff --git a/README.TXT b/README.TXT index 71d8bd3..bdf5a6f 100644 --- a/README.TXT +++ b/README.TXT @@ -27,6 +27,13 @@ http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf If you use Ninka for research purposes, we would appreciate you cite the above paper. +* Mailing list + +Ninka has a mailing list located at sourceforge: + +https://sourceforge.net/p/ninka/mailman/ + + * Contributors - Paul Clough for his code to split sentences diff --git a/extComments/extComments.pl b/extComments/extComments.pl index 0509012..39dbeb5 100755 --- a/extComments/extComments.pl +++ b/extComments/extComments.pl @@ -30,8 +30,8 @@ if ($path eq '') { # set parameters my %opts = (); -if (!getopts ('vc:p:',\%opts)) { -print STDERR "Usage $0 -v +if (!getopts ('vc:p:',\%opts) or scalar(@ARGV) != 1 ) { +print STDERR "Usage $0 -v -v verbose -c count of comment blocks @@ -50,7 +50,6 @@ $f =~ s/\$/\\\$/g; $f =~ s/;/\\;/g; $f =~ s/ /\\ /g; - #die "illegal file [$f]" if $f =~ m@/\.@; my $numberComments = 1; @@ -69,12 +68,7 @@ if (get_size($original) == 0) { my $commentsCmd = Determine_Comments_Extractor($f); -execute("$commentsCmd"); - -if ($commentsCmd =~ /^comments/ and - get_size("${f}.comments") == 0){ - `cat $f | head -700 > ${f}.comments`; -} +print execute("$commentsCmd"); exit 0; @@ -91,33 +85,31 @@ sub Determine_Comments_Extractor ######################## # for the time being, let us just extract the top 400 lines - return "cat '$f' | head -400 > '${f}.comments'"; + return "cat $f | head -400"; # return "$path/hashComments.pl -p '#' '$f'"; } elsif ($ext eq 'jl' or $ext eq 'el' ) { - return "cat $f | head -400 > ${f}.comments"; + return "cat $f | head -400"; # return "$path/hashComments.pl -p ';' $f";; } elsif ($ext =~ /^(java|c|cpp|h|cxx|c\+\+|cc)$/ ) { my $comm = `which comments`; if ($comm ne '') { - return "comments -c1 '$f' 2> /dev/null"; + return "comments -o -c1 $f 2> /dev/null"; } else { - return "cat $f | head -400 > ${f}.comments"; + return "head -400 $f"; } } else { - return "cat $f | head -700 > ${f}.comments"; + return "head -700 $f"; } } else { - print "\n>>>>>>>>>>>>>>>>>>>>>\n"; - return "cat $f | head -700 > ${f}.comments"; + return "head -700 $f"; } } sub execute { my ($c) = @_; -# print "\nTo execute [$c]\n"; my $r = `$c`; my $status = ($? >> 8); die "execution of program [$c] failed: status [$status]" if ($status != 0); diff --git a/ninka-excel.pl b/ninka-excel.pl index 71adddf..9f95dc9 100755 --- a/ninka-excel.pl +++ b/ninka-excel.pl @@ -62,8 +62,7 @@ $worksheet->write(0, 7, 'TokensUnmatched', $format); $worksheet->write(0, 8, 'TokensUnknown', $format); $worksheet->write(0, 9, 'Tokens', $format); -my $tempdir = File::Temp->newdir(); -my $dirname = $tempdir->dirname; +my $dirname = File::Temp->newdir()->dirname; print "***** Extracting file [$pack] to temporary directory [$dirname] *****\n"; my $packext = getExtension($pack); @@ -85,7 +84,7 @@ print "***** Beginning Execution of Ninka *****\n"; foreach my $file (@files) { if (-T $file) { print "Running ninka on file [$file]\n"; - execute("perl ${path}/ninka.pl '$file'"); + execute("perl ${path}/ninka.pl -h '$file' /"); } } diff --git a/ninka-sqlite.pl b/ninka-sqlite.pl index d53f60f..214d000 100755 --- a/ninka-sqlite.pl +++ b/ninka-sqlite.pl @@ -66,8 +66,7 @@ $dbh->do("CREATE TABLE IF NOT EXISTS toks_unknown INT, tokens TEXT, PRIMARY KEY(filename, path, container))"); -my $tempdir = File::Temp->newdir(); -my $dirname = $tempdir->dirname; +my $dirname = File::Temp->newdir()->dirname; print "***** Extracting file [$pack] to temporary directory [$dirname] *****\n"; my $packext = getExtension($pack); @@ -88,7 +87,7 @@ find( print "***** Beginning Execution of Ninka *****\n"; foreach my $file (@files) { print "Running ninka on file [$file]\n"; - execute("perl ${path}/ninka.pl '$file'"); + execute("perl ${path}/ninka.pl -h '$file' /"); } my @ninkafiles; @@ -116,6 +115,7 @@ foreach my $file (@ninkafiles) { my $filedata = do { local $/; <$fh> }; my $sth; + next if ($basefile =~ /comments$/); switch (getExtension($basefile)){ # case ".comments" { diff --git a/ninka.pl b/ninka.pl index 3987a41..4ce644e 100755 --- a/ninka.pl +++ b/ninka.pl @@ -19,12 +19,14 @@ use strict; use Getopt::Std; use File::Basename; +use File::Path qw(make_path); + my %opts = (); -if (!getopts ("vfCcSsGgTtLd",\%opts) or scalar(@ARGV) == 0) { +if (!getopts ("vfCcSsGgTtLdDh",\%opts) or scalar(@ARGV) != 2) { print STDERR "Ninka version 1.3 -Usage $0 -fCtTvcgsGd +Usage $0 -fCtTvcgsGd -v verbose -f force all processing @@ -44,6 +46,9 @@ Usage $0 -fCtTvcgsGd -L force creation of matching -d delete intermediate files + -D delete license output file + + -h re-create directory structure of original filename in output files under output dir. For the sake of security, no .. directory is allowed in the path name. Starting /s are removed. \n"; @@ -54,6 +59,8 @@ Usage $0 -fCtTvcgsGd my $verbose = exists $opts{v}; my $delete = exists $opts{d}; +my $deleteLic = exists $opts{D}; +my $createDirsHier = exists $opts{h}; #$delete = 1; my $path = dirname($0); @@ -71,35 +78,54 @@ my $forceLicense = exists $opts{L}; #die "Usage $0 " unless $ARGV[0] =~ /\.(c|cpp|java|cc|cxx|h|jl|py|pm|el|pl)$/; -my $f = $ARGV[0]; - - -my $original = $f; - -$f =~ s/'/\\'/g; -$f =~ s/\$/\\\$/g; -$f =~ s/;/\\;/g; -$f =~ s/ /\\ /g; +my $original = $ARGV[0]; +my $escapedOriginal = escape_filename($original); +my $dirOriginal = $ARGV[1]; +my $f = basename($escapedOriginal); print "Starting: $original;\n" if ($verbose); print "$original;"; -my $commentsFile = "${f}.comments"; -my $sentencesFile = "${f}.sentences"; -my $goodsentFile = "${f}.goodsent"; -my $sentokFile = "${f}.senttok"; - if (not (-f $original)) { print "ERROR;[${original}] is not a file\n" ; exit 0; } +if (not (-d $dirOriginal)) { + print "ERROR;[${dirOriginal}] is not a directory\n" ; + exit 0; +} +$dirOriginal =~ s@/$@@; + +my $dir; +my $hier = ""; +if ($createDirsHier) { + $hier = dirname($original); + # make sure it does not start with / + $hier =~ s@^/+@@; + # abort if relative... + if ($hier =~ m@/\.\./@ or $hier =~ m@^\.\.@ or $hier =~ m@\.\.$@) { + die "directory name [$hier] of input file contains .. aborting\n"; + } + $dir = "$dirOriginal/$hier"; + make_path($dir) unless -d $dir; + $dir = escape_filename($dir); +} + +my $commentsFile = "$dir/${f}.comments"; +my $sentencesFile = "$dir/${f}.sentences"; +my $goodsentFile = "$dir/${f}.goodsent"; +my $badsentFile = "$dir/${f}.badsent"; +my $sentokFile = "$dir/${f}.senttok"; +my $licenseFile = "$dir/${f}.license"; +my $codeFile = "$dir/${f}.code"; Do_File_Process($original, $commentsFile, ($force or $forceComments), - "$path/extComments/extComments.pl -c1 ${f}", + "$path/extComments/extComments.pl -c1 ${escapedOriginal} > $commentsFile", "Creating comments file", - exists $opts{c}); + exists $opts{c} + ); Do_File_Process($commentsFile, $sentencesFile, ($force or $forceSentences), @@ -119,18 +145,21 @@ Do_File_Process($goodsentFile, $sentokFile, ($force or $forceSentok), print "Matching ${f}.senttok against rules" if ($verbose); -execute("$path/matcher/matcher.pl ${f}.senttok > ${f}.license"); +execute("$path/matcher/matcher.pl ${sentokFile} > ${licenseFile}"); -print `cat ${f}.license`; - -unlink("${f}.code"); +print `cat ${licenseFile}`; if ($delete) { - unlink("${f}.badsent"); - unlink("${f}.comments"); - unlink("${f}.goodsent"); - unlink("${f}.sentences"); - unlink("${f}.senttok"); + unlink($commentsFile); + unlink($sentencesFile); + unlink($goodsentFile); + unlink($badsentFile); + unlink($sentokFile); + unlink($codeFile) if -f $codeFile; +} + +if ($deleteLic) { + unlink($licenseFile); } exit 0; @@ -179,3 +208,13 @@ sub newer return 1; } } + +sub escape_filename +{ + my ($f) = @_; + $f =~ s/'/\\'/g; + $f =~ s/\$/\\\$/g; + $f =~ s/;/\\;/g; + $f =~ s/ /\\ /g; + return $f; +} -- cgit v1.2.1