#!/usr/bin/env perl # # csvtrans - transform CSV files, interpreted as directed graphs # # $Id$ use warnings; use strict; use Getopt::Std; use Text::CSV_XS; my %opt; getopts( 'h?i:mo:q:t:', \%opt ); HELP_MESSAGE() if grep { /^[h?]/ } keys %opt; my $sep = $opt{'t'} // ','; my $quote = $opt{'q'} // ''; my @valid_fmts = qw(paths matrix triples); sub valid_fmt { my ($fmt) = @_; if ( !defined $fmt ) { return $valid_fmts[0]; } foreach my $v_fmt (@valid_fmts) { if ( $fmt eq $v_fmt ) { return $fmt; } } undef; } my $valid_fmts = join( ', ', @valid_fmts ); my $infmt = valid_fmt( $opt{'i'} ) // die "Invalid value for -i, must be one of $valid_fmts\n"; my $outfmt = valid_fmt( $opt{'o'} ) // die "Invalid value for -o, must be one of $valid_fmts\n"; my $csv = new Text::CSV_XS( { sep_char => $sep, eol => $/, quote_char => $quote } ) or die "the CSV parser doesn't work (not installed?)\n"; @ARGV or @ARGV = ('-'); foreach my $f (@ARGV) { my $infh; my @nodes; if ( $f eq '-' ) { $infh = *STDIN; } elsif ( !open( $infh, '<', $f ) ) { warn "error: nonexistent or unreadable input, skipping: $f\n"; next; } if ( $infmt eq 'matrix' ) { my @nodes; if ( !( my $row = $csv->getline($infh) ) ) { next; } elsif ( !( @nodes = @$row ) ) { next; } # we have at least one column shift(@nodes); # the corner cell is not a node add_node($_) for @nodes; while ( my $row = $csv->getline($infh) ) { if ( my @row = @$row ) { add_node( $row[0] ); nr_arcs_for( $row[0], $nodes[ $_ - 1 ], $row[$_] ) for 1 .. $#row; } } } elsif ( $infmt eq 'triples' ) { while ( my $row = $csv->getline($infh) ) { if ( scalar(@$row) != 3 ) { warn "invalid row (not 3 columns), ignored\n"; next; } my ( $src, $tgt, $cnt ) = @$row; if ( $cnt !~ /^\d+$/ ) { warn "invalid count (not an integer), assuming 1\n"; $cnt = 1; } nr_arcs_for( $src, $tgt, $cnt ); } } else # $infmt eq 'paths' { while ( my $row = $csv->getline($infh) ) { add_node($_) for @$row; add_arc( $row->[$_], $row->[ $_ + 1 ] ) for 0 .. @$row - 2; } } } if ( $outfmt eq 'matrix' ) { my @sources = $opt{m} ? sources() : nodes(); my @targets = $opt{m} ? targets() : nodes(); $csv->print( *STDOUT, [ '', @targets ] ); foreach my $src (@sources) { $csv->print( *STDOUT, [ $src, map { nr_arcs_for( $src, $_ ) } @targets ] ); } } elsif ( $outfmt eq 'triples' ) { $csv->print( *STDOUT, [ $_, $_, 0 ] ) for isolated_nodes(); my @nodes = nodes(); foreach my $src (@nodes) { foreach my $tgt (@nodes) { my $cnt = nr_arcs_for( $src, $tgt ) or next; $csv->print( *STDOUT, [ $src, $tgt, $cnt ] ); } } } else # $outfmt eq 'paths' { my @nodes = nodes(); foreach my $src (@nodes) { if ( my @tgts = grep { nr_arcs_for( $src, $_ ) } @nodes ) { $csv->print( *STDOUT, [ $src, $_ ] ) for @tgts; } else { $csv->print( *STDOUT, [$src] ); } } } exit(0); sub HELP_MESSAGE { print STDERR <{$_} } nodes(); } sub isolated_nodes # in input order { my $tgt2cnt = tgt2cnt(); grep { !$src2tgt2cnt{$_} && !$tgt2cnt->{$_} } nodes(); } sub add_node { my ($node) = @_; if ( !defined $node2idx{$node} ) { #warn "adding node $node\n"; push( @nodes, $node ); $node2idx{$node} = $#nodes; } } sub add_arc { nr_arcs_for( @_, nr_arcs_for(@_) + 1 ); } sub nr_arcs_for { my ( $src, $tgt, $c ) = @_; if ( !defined $c ) { # nothing } elsif ($c) { #warn "m[$src,$tgt] := $c\n"; add_node($_) for ( $src, $tgt ); $src2tgt2cnt{$src}->{$tgt} = $c; } elsif ( defined $src2tgt2cnt{$src}->{$tgt} ) { warn "m[$src,$tgt] := 0\n"; delete $src2tgt2cnt{$src}->{$tgt}; } $src2tgt2cnt{$src}->{$tgt} // 0; }