#!/usr/bin/env perl # # diffcluster - compare files and output file names on same line iff equal # # $Id: diffcluster 663 2009-10-01 19:10:51Z rp $ # Note: it's both faster and more exact to just open the files # and read them until a difference is found. # But we may run out of file descriptors that way. # The solution to that is divide and conquer (compare first half, # then second half, then merge results). use warnings; use strict; use Getopt::Std; use Digest::MD5 qw(md5); my %opt; getopts( 'hvt:', \%opt ); &help if $opt{'h'}; my $sep = $opt{'t'} // ' '; sub help { print STDERR <); close(IN); &ehm("taken MD5 hash of $f"); } else { warn "cannot open '$f', skipped\n"; } } %sum or exit; &ehm( 'comparing', scalar( keys %sum ), 'files' ); my $prevsum = undef; foreach ( sort { $sum{$a} cmp $sum{$b} } keys %sum ) { !defined $prevsum or print( $prevsum eq $sum{$_} ? $sep : "\n" ); print $_; $prevsum = $sum{$_}; } print "\n";