#!/usr/bin/env perl # getmailaddr # # get email addresses from all files in a given subdirectory # e.g. getmailaddr /usr/spool/news/comp/infosystems/www # gets email addresses from the comp.infosystems.www.* newsgroups # # $Id$ use strict; use warnings; use File::Find; sub next_email_address # extracts first email address from $_ and chops $_ to the unscanned remainder # NOT foolproof - can err on both sides { if ( /@/ && ( ( my $l, my $before, my $after, my $r, $_ ) = /(^|[^-%+!\w])(\w[-%+!.\w]+?)@(\w[-%+!.\w]+\w)($|\W)(.*)/i ) ) # parse { $after =~ /\./ # @ followed by . && ( $l ne '<' && $r ne '>' # not enclosed by <> - message-ids are! || $before !~ /\d/ ) # well, OK, no digits - let 'em pass ? "\L$before\@$after\E" : undef; } else # no email address or anything similar found { $_ = undef; } } @ARGV or @ARGV = '-'; my (%line); # maps email addresses to the last line in which they were found my (%cnt); # maps email addresses to the number of occurrences, just for fun warn("$0: scanning files ...\n"); undef($!); undef($/); sub slurp { my ($file) = @_; my $infh; if ( $file eq '-' ) { $infh = *STDIN; } else { if ( !-r || !-T ) { warn("not a text file or not readable: $file\n"); return; } if ( !open( $infh, '<', $file ) ) { warn("cannot open file '$file'\n"); return; } } warn("scanning $file\n"); foreach ( split( /\n/, <$infh> ) ) { while ( ( ( my $line ) = /(^.*)/ ) && ( my $addr = &next_email_address ) ) { $cnt{$addr}++ or print "$addr\n"; $line{$addr} = $line; } } close($infh); } my %is_arg = map { $_ => 1 } @ARGV; slurp('-') if delete $is_arg{'-'}; find( { wanted => sub { slurp($File::Find::name) }, follow => 1 }, keys %is_arg ) if %is_arg;