#!/usr/bin/env perl # # shortest - preserve only line of which no proper prefix is also a line # # $Id$ # # optimized to avoid unnecessary sorting # still sorts "too much" on nonsorted input; # using prefix trees or similar might be more efficient use warnings; use strict; use Getopt::Std; my %opt; getopts( 'hve:lr:', \%opt ) or die("Use -h for help\n"); HELP_MESSAGE() if $opt{h}; my $prefixfilter = $opt{e} ? qr/$opt{e}/ : undef; my @line = map { chomp; $_ } <>; my %firstline = map { $line[$_] => $_ } reverse 0 .. $#line; my @sorted = sort keys %firstline; # on sorted input, we know all prefixes of a string occur before that string my $prefix; # the current shortest prefix, if any my $latest; # the current longest string with that prefix, if any my %print; # the indexes of the strings to print foreach (@sorted) { if ( defined($prefix) && substr( $_, 0, length($prefix) ) ne $prefix ) { # a new shortest prefix undef($prefix); } if ( !defined($prefix) ) { # $_ is a new shortest prefix if ( defined($prefixfilter) && !/$prefixfilter/ ) { # it is not admissible -> we have no current prefix ehm("does not pass -p: $_"); } else { # it is admissible -> this is the new shortest prefix ehm("shortest: $_"); $print{$_} = 1 if !$opt{l}; $prefix = $_; $latest = $_; } } # $_ is another string with the same shortest prefix elsif ( !$opt{l} ) { # ignore it } elsif ( substr( $_, 0, length($latest) ) eq $latest ) { # it extends the longest string $latest = $_; $opt{v} and ehm("extends: $_"); } else { # it does not extend the longest string ehm("longest: $latest"); $print{$latest} = 1; $latest = $_; } } if ( defined($latest) ) { ehm("longest: $latest"); $print{$latest} = 1; } print $_, $/ for grep { $print{$_} } map { $line[$_] } 0 ... $#line; #--- auxiliaries ---# # sub ehm { $opt{v} and warn( join( ' ', @_ ), "\n" ); } sub HELP_MESSAGE { print STDERR <