summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel M German <dmg@uvic.ca>2015-05-22 01:10:12 -0700
committerDaniel M German <dmg@uvic.ca>2015-05-22 01:10:12 -0700
commit8b5edd0749fd42e6efef69911d4ee631ab4d6d23 (patch)
treeca629ababcb6323f03ffcaed196a088d5747aefb
parent494598e636aba7971c2e188a24ed6cde202b2b65 (diff)
downloadninka-test2.tar.gz
create temporary files in a different directorytest2
-rw-r--r--README.TXT7
-rwxr-xr-xextComments/extComments.pl26
-rwxr-xr-xninka-excel.pl5
-rwxr-xr-xninka-sqlite.pl6
-rwxr-xr-xninka.pl93
5 files changed, 87 insertions, 50 deletions
diff --git a/README.TXT b/README.TXT
index 71d8bd3..bdf5a6f 100644
--- a/README.TXT
+++ b/README.TXT
@@ -27,6 +27,13 @@ http://turingmachine.org/~dmg/papers/dmg2010ninka.pdf
If you use Ninka for research purposes, we would appreciate you cite
the above paper.
+* Mailing list
+
+Ninka has a mailing list located at sourceforge:
+
+https://sourceforge.net/p/ninka/mailman/
+
+
* Contributors
- Paul Clough for his code to split sentences
diff --git a/extComments/extComments.pl b/extComments/extComments.pl
index 0509012..39dbeb5 100755
--- a/extComments/extComments.pl
+++ b/extComments/extComments.pl
@@ -30,8 +30,8 @@ if ($path eq '') {
# set parameters
my %opts = ();
-if (!getopts ('vc:p:',\%opts)) {
-print STDERR "Usage $0 -v
+if (!getopts ('vc:p:',\%opts) or scalar(@ARGV) != 1 ) {
+print STDERR "Usage $0 -v <filename>
-v verbose
-c count of comment blocks
@@ -50,7 +50,6 @@ $f =~ s/\$/\\\$/g;
$f =~ s/;/\\;/g;
$f =~ s/ /\\ /g;
-
#die "illegal file [$f]" if $f =~ m@/\.@;
my $numberComments = 1;
@@ -69,12 +68,7 @@ if (get_size($original) == 0) {
my $commentsCmd = Determine_Comments_Extractor($f);
-execute("$commentsCmd");
-
-if ($commentsCmd =~ /^comments/ and
- get_size("${f}.comments") == 0){
- `cat $f | head -700 > ${f}.comments`;
-}
+print execute("$commentsCmd");
exit 0;
@@ -91,33 +85,31 @@ sub Determine_Comments_Extractor
########################
# for the time being, let us just extract the top 400 lines
- return "cat '$f' | head -400 > '${f}.comments'";
+ return "cat $f | head -400";
# return "$path/hashComments.pl -p '#' '$f'";
} elsif ($ext eq 'jl' or
$ext eq 'el'
) {
- return "cat $f | head -400 > ${f}.comments";
+ return "cat $f | head -400";
# return "$path/hashComments.pl -p ';' $f";;
} elsif ($ext =~ /^(java|c|cpp|h|cxx|c\+\+|cc)$/ ) {
my $comm = `which comments`;
if ($comm ne '') {
- return "comments -c1 '$f' 2> /dev/null";
+ return "comments -o -c1 $f 2> /dev/null";
} else {
- return "cat $f | head -400 > ${f}.comments";
+ return "head -400 $f";
}
} else {
- return "cat $f | head -700 > ${f}.comments";
+ return "head -700 $f";
}
} else {
- print "\n>>>>>>>>>>>>>>>>>>>>>\n";
- return "cat $f | head -700 > ${f}.comments";
+ return "head -700 $f";
}
}
sub execute
{
my ($c) = @_;
-# print "\nTo execute [$c]\n";
my $r = `$c`;
my $status = ($? >> 8);
die "execution of program [$c] failed: status [$status]" if ($status != 0);
diff --git a/ninka-excel.pl b/ninka-excel.pl
index 71adddf..9f95dc9 100755
--- a/ninka-excel.pl
+++ b/ninka-excel.pl
@@ -62,8 +62,7 @@ $worksheet->write(0, 7, 'TokensUnmatched', $format);
$worksheet->write(0, 8, 'TokensUnknown', $format);
$worksheet->write(0, 9, 'Tokens', $format);
-my $tempdir = File::Temp->newdir();
-my $dirname = $tempdir->dirname;
+my $dirname = File::Temp->newdir()->dirname;
print "***** Extracting file [$pack] to temporary directory [$dirname] *****\n";
my $packext = getExtension($pack);
@@ -85,7 +84,7 @@ print "***** Beginning Execution of Ninka *****\n";
foreach my $file (@files) {
if (-T $file) {
print "Running ninka on file [$file]\n";
- execute("perl ${path}/ninka.pl '$file'");
+ execute("perl ${path}/ninka.pl -h '$file' /");
}
}
diff --git a/ninka-sqlite.pl b/ninka-sqlite.pl
index d53f60f..214d000 100755
--- a/ninka-sqlite.pl
+++ b/ninka-sqlite.pl
@@ -66,8 +66,7 @@ $dbh->do("CREATE TABLE IF NOT EXISTS
toks_unknown INT, tokens TEXT,
PRIMARY KEY(filename, path, container))");
-my $tempdir = File::Temp->newdir();
-my $dirname = $tempdir->dirname;
+my $dirname = File::Temp->newdir()->dirname;
print "***** Extracting file [$pack] to temporary directory [$dirname] *****\n";
my $packext = getExtension($pack);
@@ -88,7 +87,7 @@ find(
print "***** Beginning Execution of Ninka *****\n";
foreach my $file (@files) {
print "Running ninka on file [$file]\n";
- execute("perl ${path}/ninka.pl '$file'");
+ execute("perl ${path}/ninka.pl -h '$file' /");
}
my @ninkafiles;
@@ -116,6 +115,7 @@ foreach my $file (@ninkafiles) {
my $filedata = do { local $/; <$fh> };
my $sth;
+ next if ($basefile =~ /comments$/);
switch (getExtension($basefile)){
# case ".comments" {
diff --git a/ninka.pl b/ninka.pl
index 3987a41..4ce644e 100755
--- a/ninka.pl
+++ b/ninka.pl
@@ -19,12 +19,14 @@
use strict;
use Getopt::Std;
use File::Basename;
+use File::Path qw(make_path);
+
my %opts = ();
-if (!getopts ("vfCcSsGgTtLd",\%opts) or scalar(@ARGV) == 0) {
+if (!getopts ("vfCcSsGgTtLdDh",\%opts) or scalar(@ARGV) != 2) {
print STDERR "Ninka version 1.3
-Usage $0 -fCtTvcgsGd <filename>
+Usage $0 -fCtTvcgsGd <filename> <outputDir>
-v verbose
-f force all processing
@@ -44,6 +46,9 @@ Usage $0 -fCtTvcgsGd <filename>
-L force creation of matching
-d delete intermediate files
+ -D delete license output file
+
+ -h re-create directory structure of original filename in output files under output dir. For the sake of security, no .. directory is allowed in the path name. Starting /s are removed.
\n";
@@ -54,6 +59,8 @@ Usage $0 -fCtTvcgsGd <filename>
my $verbose = exists $opts{v};
my $delete = exists $opts{d};
+my $deleteLic = exists $opts{D};
+my $createDirsHier = exists $opts{h};
#$delete = 1;
my $path = dirname($0);
@@ -71,35 +78,54 @@ my $forceLicense = exists $opts{L};
#die "Usage $0 <filename>" unless $ARGV[0] =~ /\.(c|cpp|java|cc|cxx|h|jl|py|pm|el|pl)$/;
-my $f = $ARGV[0];
-
-
-my $original = $f;
-
-$f =~ s/'/\\'/g;
-$f =~ s/\$/\\\$/g;
-$f =~ s/;/\\;/g;
-$f =~ s/ /\\ /g;
+my $original = $ARGV[0];
+my $escapedOriginal = escape_filename($original);
+my $dirOriginal = $ARGV[1];
+my $f = basename($escapedOriginal);
print "Starting: $original;\n" if ($verbose);
print "$original;";
-my $commentsFile = "${f}.comments";
-my $sentencesFile = "${f}.sentences";
-my $goodsentFile = "${f}.goodsent";
-my $sentokFile = "${f}.senttok";
-
if (not (-f $original)) {
print "ERROR;[${original}] is not a file\n" ;
exit 0;
}
+if (not (-d $dirOriginal)) {
+ print "ERROR;[${dirOriginal}] is not a directory\n" ;
+ exit 0;
+}
+$dirOriginal =~ s@/$@@;
+
+my $dir;
+my $hier = "";
+if ($createDirsHier) {
+ $hier = dirname($original);
+ # make sure it does not start with /
+ $hier =~ s@^/+@@;
+ # abort if relative...
+ if ($hier =~ m@/\.\./@ or $hier =~ m@^\.\.@ or $hier =~ m@\.\.$@) {
+ die "directory name [$hier] of input file contains .. aborting\n";
+ }
+ $dir = "$dirOriginal/$hier";
+ make_path($dir) unless -d $dir;
+ $dir = escape_filename($dir);
+}
+
+my $commentsFile = "$dir/${f}.comments";
+my $sentencesFile = "$dir/${f}.sentences";
+my $goodsentFile = "$dir/${f}.goodsent";
+my $badsentFile = "$dir/${f}.badsent";
+my $sentokFile = "$dir/${f}.senttok";
+my $licenseFile = "$dir/${f}.license";
+my $codeFile = "$dir/${f}.code";
Do_File_Process($original, $commentsFile, ($force or $forceComments),
- "$path/extComments/extComments.pl -c1 ${f}",
+ "$path/extComments/extComments.pl -c1 ${escapedOriginal} > $commentsFile",
"Creating comments file",
- exists $opts{c});
+ exists $opts{c}
+ );
Do_File_Process($commentsFile, $sentencesFile, ($force or $forceSentences),
@@ -119,18 +145,21 @@ Do_File_Process($goodsentFile, $sentokFile, ($force or $forceSentok),
print "Matching ${f}.senttok against rules" if ($verbose);
-execute("$path/matcher/matcher.pl ${f}.senttok > ${f}.license");
+execute("$path/matcher/matcher.pl ${sentokFile} > ${licenseFile}");
-print `cat ${f}.license`;
-
-unlink("${f}.code");
+print `cat ${licenseFile}`;
if ($delete) {
- unlink("${f}.badsent");
- unlink("${f}.comments");
- unlink("${f}.goodsent");
- unlink("${f}.sentences");
- unlink("${f}.senttok");
+ unlink($commentsFile);
+ unlink($sentencesFile);
+ unlink($goodsentFile);
+ unlink($badsentFile);
+ unlink($sentokFile);
+ unlink($codeFile) if -f $codeFile;
+}
+
+if ($deleteLic) {
+ unlink($licenseFile);
}
exit 0;
@@ -179,3 +208,13 @@ sub newer
return 1;
}
}
+
+sub escape_filename
+{
+ my ($f) = @_;
+ $f =~ s/'/\\'/g;
+ $f =~ s/\$/\\\$/g;
+ $f =~ s/;/\\;/g;
+ $f =~ s/ /\\ /g;
+ return $f;
+}