#!/usr/bin/env perl # # ln-r - in the given directory/ies, ln identical files to each other # # $Id: ln-r 738 2010-07-21 09:12:52Z rp $ # when are two files ln-able? this script thinks it knows use warnings; use strict; use Getopt::Std; use File::Find; use File::Compare; use Digest::MD5; $0 =~ m#[^/]+$#; my $me = $&; my %opt; getopts( 'hvdns0laxt:', \%opt ); my $verbose = $opt{v}; my $debug = $opt{d}; my $dontact = $opt{n}; my $symbolic = $opt{s}; my $xdev = $opt{x}; my $empty2 = $opt{0}; my $sym2 = $opt{l}; my $linked2 = $opt{a}; my $sep = $opt{t}; die "error: -t must be followed by a separator\n" if defined($sep) && ($sep =~ /^-/ || ($sep =~ /[^.]/ && -e $sep)); HELP_MESSAGE() if $opt{h} || !@ARGV || ( defined $opt{x} && !defined $opt{s} ); my $csvw; # the object that will write the CSV file, if -t is specified if ( defined $opt{t} ) { eval { require Text::CSV::Encoded } or die "cannot produce CSV, you may have to install Text::CSV::Encoded from CPAN:\n$@"; $csvw = new Text::CSV::Encoded( { sep_char => $sep, encoding => 'utf8', eol => $/ } ) or die sprintf( "cannot create CSV writer: %s\n", join( ', ', Text::CSV::Encoded->error_diag ) ); } sub HELP_MESSAGE { print STDERR < $fn2nrbyt{ $_[1] } } : sub { $fn2nrbyt{ $_[0] } <=> $fn2nrbyt{ $_[1] } || $fn2devno{ $_[0] } <=> $fn2devno{ $_[1] }; } ), keys %fn2devno ); my ( $nreq, $nrne ) = ( 0, 0 ); foreach my $same_dev_and_size (@fns_by_size) { next if @$same_dev_and_size < 2; # for debugging only if ( $debug && defined( my $fn = $same_dev_and_size->[0] ) ) { if ($xdev) { warn sprintf( "%d files on device %d have %d bytes, e.g. %s\n", scalar(@$same_dev_and_size), $fn2devno{$fn}, $fn2nrbyt{$fn}, $fn ); } else { warn sprintf( "%d files have %d bytes, e.g. %s\n", scalar(@$same_dev_and_size), $fn2nrbyt{$fn}, $fn ); } } # partition further by MD5 checksum my %fn2md5 = map { $_ => fn2md5($_) } @$same_dev_and_size; foreach my $same_md5 ( partition( sub { $fn2md5{ $_[0] } cmp $fn2md5{ $_[1] } }, @$same_dev_and_size ) ) { next if @$same_md5 < 2; # partition further by content equality foreach my $same_content ( partition( ( $debug ? \&counting_compare : \&compare ), @$same_md5 ) ) { next if @$same_content < 2; if ($debug) { warn sprintf( "%d files are compared, e.g. %s\n", scalar(@$same_content), $same_content->[0] ); } link_all(@$same_content); } } } if ($debug) { warn "$nreq file comparisons found equality\n"; warn "$nrne file comparisons found inequality\n"; } exit(0); sub fn2md5 { open( FILE, '<', $_[0] ) or return; binmode(FILE); my $md5 = Digest::MD5->new->addfile(*FILE)->digest; close(FILE); return $md5; } sub counting_compare { my $cmp = compare(@_); if ($cmp == 0) { ++$nreq; } else { ++$nrne; } return $cmp; } sub partition # partitions a list based on a comparison function, # returning a list of arrayrefs with the partitions; # the first argument is the comparison function, # which (for compatibility with sort) should return 0 # iff the arguments belong in the same partition { my $cmp = shift(@_) or die "BUG: partition function incorrectly called\n"; return () if !@_; my @result = ( [ shift(@_) ] ); ITEM: foreach my $item (@_) { foreach my $part (@result) { if ( !&$cmp( $item, $part->[0] ) ) { push( @$part, $item ); # TODO: use linked lists instead for efficiency next ITEM; # last isn't good enough } } push( @result, [$item] ); } @result; } sub link_all { my $fn = shift(@_); my $sopt = $symbolic ? ' -s' : ''; foreach my $lnable (@_) { # only include files already linked together if -= is specified next if !defined($linked2) && ( $fn2inode{$lnable} eq $fn2inode{$fn} ); if ($verbose) { warn "$me: ln$sopt $fn $lnable\n"; # ln [-s] OLD NEW } if ( defined $sep ) { # just output a table entry $csvw->print( *STDOUT, [ sort ($fn, $lnable) ] ); next; } # first mv it to a new, unique name, so we can undo if linking fails my $f2name = $lnable; $f2name =~ s/.$/0/; while ( -e $f2name ) { ++$f2name; } if ($dontact) { # nothing } elsif ( !rename $lnable, $f2name ) { warn "$me: failed to mv $lnable $f2name\n"; } elsif ( $symbolic ? !symlink( $fn, $lnable ) : !link( $fn, $lnable ) ) { warn "$me: failed to ln$sopt $fn $lnable\n"; # linking failed; undo rename $f2name, $lnable; } elsif ( !unlink $f2name ) { warn "$me: failed to rm $f2name, the mved $fn\n"; } } }