#!/usr/bin/env perl
#
# shortest - preserve only line of which no proper prefix is also a line
#
# $Id$
#
# optimized to avoid unnecessary sorting
# still sorts "too much" on nonsorted input;
# using prefix trees or similar might be more efficient

use warnings;
use strict;

use Getopt::Std;

my %opt;
getopts( 'hve:lr:', \%opt )
 or die("Use -h for help\n");

HELP_MESSAGE() if $opt{h};

my $prefixfilter = $opt{e} ? qr/$opt{e}/ : undef;
my @line = map { chomp; $_ } <>;

my %firstline = map { $line[$_] => $_ } reverse 0 .. $#line;

my @sorted = sort keys %firstline;
# on sorted input, we know all prefixes of a string occur before that string

my $prefix;    # the current shortest prefix, if any
my $latest;    # the current longest string with that prefix, if any
my %print;     # the indexes of the strings to print

foreach (@sorted)
{
  if ( defined($prefix) && substr( $_, 0, length($prefix) ) ne $prefix )
  {
    # a new shortest prefix
    undef($prefix);
  }

  if ( !defined($prefix) )
  {
    # $_ is a new shortest prefix

    if ( defined($prefixfilter) && !/$prefixfilter/ )
    {
      # it is not admissible -> we have no current prefix
      ehm("does not pass -p: $_");
    }
    else
    {
      # it is admissible -> this is the new shortest prefix
      ehm("shortest: $_");
      $print{$_} = 1 if !$opt{l};
      $prefix    = $_;
      $latest    = $_;
    }
  }
  # $_ is another string with the same shortest prefix
  elsif ( !$opt{l} )
  {
    # ignore it
  }
  elsif ( substr( $_, 0, length($latest) ) eq $latest )
  {
    # it extends the longest string
    $latest = $_;
    $opt{v} and ehm("extends: $_");
  }
  else
  {
    # it does not extend the longest string
    ehm("longest: $latest");
    $print{$latest} = 1;
    $latest = $_;
  }
}

if ( defined($latest) )
{
  ehm("longest: $latest");
  $print{$latest} = 1;
}

print $_, $/ for grep { $print{$_} } map { $line[$_] } 0 ... $#line;

#--- auxiliaries ---#
#

sub ehm
{
  $opt{v} and warn( join( ' ', @_ ), "\n" );
}

sub HELP_MESSAGE
{
  print STDERR <<ZZ;

Usage: $0 [-e pat] [-r pat] [file1 [file2 [...]]]

to print only the line in the input for which no prefixes are also
line in the input.

Option:

  -e xpr
    Consider only prefixes that match this regular expression.

  -l
    Longest: do not print the shortest prefixes,
    but the strings without proper prefixes.
    THIS OPTION IS NOT IMPLEMENTED CORRECTLY YET.

  -v
    Print matches and rejections to standard error.

    Note: never removes duplicates of lines; for that, see ./uniqer.
ZZ
  exit(0);
}