#!/usr/bin/perl # # Copyright (C) 2014,2015 Anthony Kohan and Daniel M. German # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # use strict; use Switch; use DBI; use File::Temp; use File::Find; use File::Basename; use Scalar::Util qw(looks_like_number); if(scalar(@ARGV) != 2){ print STDERR "Ninka 1.3. sqlite wrapper\n"; print STDERR "Processes package file (.tar.gz, zip, jar. etc) and outputs to sqlite file\n"; print STDERR "Incorrect number of arguments\n"; print STDERR "Correct usage is: $0 \n"; exit 1; } my $path = $0; $path =~ s/\/+[^\/]+$//; if ($path eq "") { $path = "./"; } my ($pack, $db) = @ARGV; my $dbh = DBI->connect("DBI:SQLite:dbname=$db", "", "", {RaiseError => 1, AutoCommit => 0}) or die $DBI::errstr; $dbh->do("CREATE TABLE IF NOT EXISTS comments (filename TEXT, path TEXT, container TEXT, content TEXT, PRIMARY KEY(filename, path, container))"); $dbh->do("CREATE TABLE IF NOT EXISTS sentences (filename TEXT, path TEXT, container TEXT, content TEXT, PRIMARY KEY(filename, path, container))"); $dbh->do("CREATE TABLE IF NOT EXISTS goodsents (filename TEXT, path TEXT, container TEXT, content TEXT, PRIMARY KEY(filename, path, container))"); $dbh->do("CREATE TABLE IF NOT EXISTS badsents (filename TEXT, path TEXT, container TEXT, content TEXT, PRIMARY KEY(filename, path, container))"); $dbh->do("CREATE TABLE IF NOT EXISTS senttoks (filename TEXT, path TEXT, container TEXT, content TEXT, PRIMARY KEY(filename, path, container))"); $dbh->do("CREATE TABLE IF NOT EXISTS licenses (filename TEXT, path TEXT, container TEXT, licenses TEXT, num_found INT, lines INT, toks_ignored INT, toks_unmatched INT, toks_unknown INT, tokens TEXT, PRIMARY KEY(filename, path, container))"); my $dirname = File::Temp->newdir()->dirname; print "***** Extracting file [$pack] to temporary directory [$dirname] *****\n"; my $packext = getExtension($pack); if ($packext eq ".bz2" || $packext eq ".gz") { execute("tar -xvf '$pack' --directory '$dirname'"); } elsif ($packext eq ".jar" || $packext eq ".zip") { execute("unzip -d $dirname $pack"); } else { print "ninka-wrapper does not support packages with extension [$packext]\n"; } my @files; find( sub { push @files, $File::Find::name unless -d; }, $dirname ); print "***** Beginning Execution of Ninka *****\n"; foreach my $file (@files) { print "Running ninka on file [$file]\n"; execute("perl ${path}/ninka.pl -h '$file' /"); } my @ninkafiles; find( sub { my $ext = getExtension($File::Find::name); if($ext =~ m/(comments|sentences|goodsent|badsent|senttok|license)$/){ push @ninkafiles, $File::Find::name; } }, $dirname ); print "***** Entering Ninka Data into Database [$db] *****\n"; foreach my $file (@ninkafiles) { my $filepath = dirname($file); $filepath =~ s/$dirname//; my $basefile = basename($file); my $rootfile = removeExtension($basefile); my $packname = basename($pack); #Read entire file into a string open (my $fh, '<', $file) or die "Can't open file $!"; my $filedata = do { local $/; <$fh> }; my $sth; next if ($basefile =~ /comments$/); switch (getExtension($basefile)){ # case ".comments" { # print "Inserting [$basefile] into table comments\n"; # $sth = $dbh->prepare("INSERT INTO comments VALUES # ('$rootfile', '$filepath', '$packname', ?)"); # } case ".sentences" { print "Inserting [$basefile] into table sentences\n"; $sth = $dbh->prepare("INSERT INTO sentences VALUES ('$rootfile', '$filepath', '$packname', ?)"); } case ".goodsent" { print "Inserting [$basefile] into table goodsents\n"; $sth = $dbh->prepare("INSERT INTO goodsents VALUES ('$rootfile', '$filepath', '$packname', ?)"); } case ".badsent" { print "Inserting [$basefile] into table goodsents\n"; $sth = $dbh->prepare("INSERT INTO badsents VALUES ('$rootfile', '$filepath', '$packname', ?)"); } case ".senttok" { print "Inserting [$basefile] into table senttoks\n"; $sth = $dbh->prepare("INSERT INTO senttoks VALUES ('$rootfile', '$filepath', '$packname', ?)"); } case ".license" { print "Inserting [$basefile] into table licenses\n"; my @columns = parseLicenseData($filedata); $sth = $dbh->prepare("INSERT INTO licenses VALUES ('$rootfile', '$filepath', '$packname', '$columns[0]', '$columns[1]', '$columns[2]', '$columns[3]', '$columns[4]', '$columns[5]', '$columns[6]')"); } } $sth->bind_param(1, $filedata); $sth->execute; close($fh); } $dbh->commit(); $dbh->disconnect(); sub parseLicenseData { my ($data) = @_; my @columns; my @fields = split(';', $data); if($fields[0] eq "NONE\n"){ @columns = '' x 7; @columns[0] = 'NONE'; } else { @columns = @fields; } return @columns; } sub getExtension { my ($file) = @_; my $filename = basename($file); my ($ext) = $filename =~ /(\.[^.]+)$/; return $ext; } sub removeExtension { my ($file) = @_; (my $filename = $file) =~ s/\.[^.]+$//; return $filename; } sub execute { my ($command) = @_; my $output = `$command`; my $status = ($? >> 8); die "execution of [$command] failed: status [$status]\n" if ($status != 0); return $output; }