#!/usr/bin/env perl # # csv2sqlite - import CSV results into a Sqlite3 database # # $Id$ use warnings; use strict; use Getopt::Std; use DBI; my %opt; getopts('hvd:t:', \%opt); sub help { print STDERR <connect("dbi:CSV:csv_sep=$sep;csv_eol=\n") or die "cannot read CSV files\n"; my $sqlite3h = DBI->connect( "dbi:SQLite:$dbfile", '', '', { RaiseError => 1, AutoCommit => 0 } ) or die "cannot open the SQLite3 database\n"; while (@ARGV < 2) { push(@ARGV, '-'); } foreach my $argv (@ARGV) { my $infh; if (!open($infh, '<', $argv)) { warn "cannot read from $argv, skipped\n"; next; } local $_ = <$infh>; if (!defined $_) { warn "no first line in $argv, skipped\n"; next; } if (!/^Release,Category,/) { warn "unsupported type of content in $argv, skipped\n"; } elsif (/,Sloc\s*$/) { &import_csv_into('sloc',$argv, $infh, $_); } else { &import_csv_into('attr', $argv, $infh, $_); } close($infh); } sub import_csv_into { my ($tablename, $argv, $infh, $header) = @_; # determine the column names chomp($header); my @colname = split($sep, $header); &ehm("reading CSV file: $argv"); # read the rows $csvh->{'csv_tables'}->{'input'} = { 'file' => $argv }; my $rows = $csvh->selectall_arrayref('SELECT * FROM input') or die "cannot read CSV input: ", $sqlite3h->errstr // '?', "\n"; # detect which of them are to be NUMERIC my %col_is_numeric; my $first = 1; my $nrcols; foreach my $row (@$rows) { if ($first) { foreach my $i (0 .. $#{$row}) { if ($row->[$i] !~ /\D/) { $col_is_numeric{$i} = 1; } } $first = 0; $nrcols = $#{$row}+1; } else { foreach my $i (keys %col_is_numeric) { if ($row->[$i] =~ /\D/) { delete $col_is_numeric{$i}; } } last if !%col_is_numeric; } } &ehm(sprintf('%d rows, %d of %d columns are numeric, in %s', scalar(@$rows), scalar(keys %col_is_numeric), $nrcols, $argv )); &ehm("storing as table $tablename: $argv"); # (re)create the table, deleting any pre-existing data in it (!) $sqlite3h->do(sprintf( 'DROP TABLE IF EXISTS "%s"', $tablename )); # injection attack possible - are prepare and ?s supported here? $sqlite3h->do(sprintf( 'CREATE TABLE "%s" (%s)', $tablename, join(',', map { sprintf('"%s" %s', $colname[$_], $col_is_numeric{$_} ? 'NUMERIC' : 'TEXT' ) } 0 .. $#colname))); $sqlite3h->commit(); # prepare the row insertion # this would of course be a lot faster with an IMPORT statement my $inserter = $sqlite3h->prepare(sprintf( 'INSERT INTO "%s" (%s) VALUES (%s)', $tablename, join(',', map { sprintf('"%s"', $_) } @colname), join(',', map { '?' } @colname) )); if (!$inserter) { warn "failed to prepare INSERTing $argv into $tablename: ", $sqlite3h->errstr // '?', "\n"; return; } foreach my $row (@$rows) { if (!$inserter->execute(@$row)) { warn "cannot insert a row from $argv into $tablename: ", $sqlite3h->errstr // '?', "\n"; last; # TODO: warn about this } } $sqlite3h->commit(); }