#!/usr/bin/env perl # # csv2dot - what it says # # $Id$ use warnings; use strict; use Getopt::Std; use Text::CSV_XS; my %opt; getopts( 'h1bcrHN:E:t:', \%opt ); help() if $opt{'h'}; my $sep = $opt{'t'} // ','; my $recolormissing = defined $opt{'1'}; my $use_biflows = defined $opt{'b'}; my $use_clusters = defined $opt{'c'}; my $draw_hierarchy = defined $opt{'H'}; my $nodelabel = $opt{'N'}; # also applied to subgraphs! my $arclabel = $opt{'E'}; my $ranked_same = defined $opt{'r'}; my $missingcolor = 'red3'; my $csvh = new Text::CSV_XS( { sep_char => $sep } ) or die "the CSV parser doesn't work (not installed?)\n"; @ARGV or @ARGV = ('-'); my $posnat; sub next_posnat { ++$posnat; } doit(); exit(0); sub help { print STDERR <getline(*ARGV) ) { #warn scalar(@$row), " fields: ", join(', ', map { "'$_'" } @$row), "\n"; if ( row_is_empty($row) ) { # finish this group? if (@rows) { push( @rowss, [@rows] ); @rows = (); } } else { push( @rows, $row ); } # finish this group? if (eof) { push( @rowss, [@rows] ); @rows = (); } } if ( !@rowss ) { # no groups at all - we're done } elsif ( @rowss == 1 ) { # a single group - don't use the subgraph construct print_dot_header(); print_dot_graph( $rowss[0] ); print "}\n"; } else { # multiple groups - use the subgraph construct print_dot_header(); foreach my $rows (@rowss) { print_dot_subgraph_header(' '); print_dot_graph($rows); print " }\n"; } print "}\n"; } } sub row_is_empty { # on an empty input row, getline() inexplicably produces # a 1-field row that contains the empty string as a field my ($row) = @_; !$row || !@$row || ( @$row == 1 && !length( $row->[0] ) ); } sub print_dot_header { printf "digraph %s {\n", quoted( join( ' ', @ARGV ) ); print " node [ color=blue, fontcolor=navy ]\n"; print " edge [ color=darkgreen, fontcolor=darkslategray ]\n"; } sub print_dot_graph { foreach my $rows (@_) { #warn "a group of ", scalar(@$rows), " rows\n"; if ($draw_hierarchy) { print_nested_boxes($rows); } else { print_dot_nodes_and_arcs($rows); } } } sub print_dot_subgraph_header { my ($indent) = @_; my $cl_ = $use_clusters ? 'cluster_' : 'noncluster_'; print $indent, 'subgraph "', $cl_, next_posnat(), '" {', "\n"; print $indent, ' ranked="same"', "\n" if $ranked_same; } sub print_nested_boxes { my $rows = $_[0]; my ( $node2degree, $src2tgt21 ) = rows2graph($rows); my $indegree = graph2posindegree($src2tgt21); my @nodes = @{ rows2nodes($rows) }; # in input order my %node2idx = map { $nodes[$_] => $_ } 0 .. $#nodes; my %printed21; my @roots = grep { !defined( $indegree->{$_} ) } @nodes; my $print_subtrees_boxes; $print_subtrees_boxes = sub { my ( $level, @roots ) = @_; my $indent = ' ' x $level; foreach my $r (@roots) { if ( $printed21{$r} ) { # $r and its subtree have already been printed # print a 1-node subgraph with an arrow to the previously printed $r print_dot_subgraph_header($indent); # generate a name for the copy my $i; for ( $i = 2 ; $printed21{ $r . $i } ; ++$i ) { } # create an '=' arrow from an invisible node to the printed $r print $indent, ' ', quoted( $r . $i ), invisible_node_attr(), "\n"; print $indent, ' ', quoted( $r . $i ), ' -> ', quoted($r), ' [label = "=", dir="both"]', "\n"; print $indent, ' graph', dot_node_attr( $r, 0 ); print "\n", $indent, '}', "\n"; } else { # $r hasn't been printed yet ++$printed21{$r}; # print a subgraph for $r and its contents print_dot_subgraph_header($indent); # print an invisible node for arcs to point to print $indent, ' ', quoted($r), invisible_node_attr(), "\n"; # print the contents, if any if ( my @children = keys %{ $src2tgt21->{$r} } ) { &$print_subtrees_boxes( $level + 1, sort { $node2idx{$a} <=> $node2idx{$b} } @children ); } # add the name ($r) print $indent, ' graph', dot_node_attr( $r, 0 ); print "\n", $indent, "}\n"; } } }; &$print_subtrees_boxes( 1, @roots ); } sub invisible_node_attr { ' [ color="invis", label="", height=0, width=0 ]'; } sub print_dot_nodes_and_arcs { my $rows = $_[0]; my ( $node2degree, $src2tgt21 ) = rows2graph($rows); foreach my $n ( keys %$node2degree ) { printf " %s%s\n", quoted($n), dot_node_attr( $n, !$node2degree->{$n} ); } print "\n"; foreach my $row (@$rows) { foreach my $i ( 0 .. @$row - 2 ) { my ( $src, $tgt ) = ( $row->[$i], $row->[ $i + 1 ] ); next if !defined $src2tgt21->{$src}->{$tgt}; my %attr; if ( $recolormissing && grep { !$node2degree->{$_} } ( $src, $tgt ) ) { $attr{color} = $missingcolor; } if ( $use_biflows && defined $src2tgt21->{$tgt}->{$src} ) { $attr{dir} = 'both'; delete $src2tgt21->{$tgt}->{$src}; } if ( defined($arclabel) ) { $attr{label} = join( $tgt, split( '%r', join( $src, split( '%l', $arclabel ) ) ) ); } printf " %s -> %s%s\n", quoted($src), quoted($tgt), %attr ? ' [' . join( ',', map { "$_=$attr{$_}" } keys %attr ) . ']' : ''; } } } sub dot_node_attr { my ( $n, $missing ) = @_; my %attr; $attr{label} = quoted( defined($nodelabel) ? join( $n, split( '%n', $nodelabel ) ) : $n ); if ( $recolormissing && $missing ) { $attr{color} = $missingcolor; } ' [' . join( ',', map { "$_=$attr{$_}" } keys %attr ) . ']'; } sub rows2graph { my $rows = $_[0]; my %node2degree; my %src2tgt21; foreach my $row (@$rows) { my $l = @$row; if ( $l == 1 ) { $node2degree{ $row->[0] } = 0 if !defined $node2degree{ $row->[0] }; } else { foreach my $i ( 0 .. $l - 2 ) { my ( $src, $tgt ) = ( $row->[$i], $row->[ $i + 1 ] ); ++$node2degree{$_} for ( $src, $tgt ); ++$src2tgt21{$src}->{$tgt}; } } } ( \%node2degree, \%src2tgt21 ); } sub graph2posindegree # maps all nodes to their indegree if it is positive { my $src2tgt21 = $_[0]; my %indegree; foreach my $src ( keys %$src2tgt21 ) { foreach my $tgt ( keys %{ $src2tgt21->{$src} } ) { ++$indegree{$tgt}; } } \%indegree; } sub rows2nodes # returns the nodes in order of first appearance { my $rows = $_[0]; my @nodeswdups = map { @$_ } @$rows; my %node2idx; my @nodes = grep { !$node2idx{$_}++ } @nodeswdups; \@nodes; } sub quoted { my $name = shift(@_); $name =~ s/["\\]/\\$&/g; '"' . $name . '"'; }