#!/usr/bin/env perl # # undup-mbox - removes duplicate items from a Unix mailbox # # $Id$ # # I've tried to do this the proper way, with takemail # http://cpansearch.perl.org/src/MARKOV/Mail-Box-2.087/scripts/takemail # but it is broken (doesn't emit From lines). use strict; use warnings; use Getopt::Std; my $me = $0; $me =~ s#.*/##; my %opt; getopts( 'h?fe:', \%opt ); if ( $opt{'h'} || $opt{'?'} ) { my $mailbox = defined( $ENV{'MAIL'} ) ? $ENV{'MAIL'} : 'incoming'; print STDERR < inbox-undupped ZZ exit(0); } #my @items = split(/^(?=From )/m,<>); #forgets to split on file boundaries #my @items = map { split(/^(?=From )/m,$_) } (@ARGV || ('-')); #looks sort of OK, but isn't my @items = (); foreach my $F ( @ARGV ? @ARGV : ('-') ) { if ( $F eq '-' ) { select(STDIN); undef $/; push( @items, split( /^(?=From [^\n]*\n\S+:)/m, ) ); } elsif ( open( IN, '<', $F ) ) { select(IN); undef $/; push( @items, split( /^(?=From [^\n]*\n\S+:)/m, ) ); close(IN); } else { warn "$me: cannot read $F, skipping it\n"; } } #easy, isn't it :-( my %key2idx = (); # maps From line (-f) or item (otherwise) to item index my $selector = defined( $opt{'e'} ) ? $opt{'e'} : '.'; my $idx = 0; foreach (@items) { my $key = ( $opt{'f'} && /^From [^\n]*/ ) ? $& : $_; if ( !defined $key2idx{$key} && /$selector/m ) { $key2idx{$key} = $idx; } ++$idx; } warn sprintf( "%s: %d of %d messages kept\n", $me, scalar( keys %key2idx ), scalar(@items) ); my %used = map { $_ => 1 } values(%key2idx); $idx = 0; foreach (@items) { print STDOUT if defined( $used{ $idx++ } ); }