#!/usr/bin/env perl # # substrings - determine which nonempty input lines are substrings of each other # # $Id$ # hold all input lines in memory, discarding duplicates use warnings; use strict; use Getopt::Std; my %opt; getopts( 'e:1pqt:xh', \%opt ); my $proper = $opt{'p'} ? 1 : 0; my $equal = $opt{'q'} ? 1 : 0; my $once = $opt{'1'} ? 1 : 0; if ( $proper && $equal ) { die "-p and -q are incompatible\n"; } my $sep = $opt{'t'} // ' '; if ( $opt{'h'} ) { print <) { chomp; next if !length($_); push( @lines, $_ ); if ( defined $opt{'e'} && !defined $line2xpr{$_} ) # the second condition should be documented in the help { my $l = $_; eval $opt{'e'}; $line2xpr{$l} = $_; } else { $line2xpr{$_} = $_; # sorry I'm lazy } } # invert %line2xpr - which is not usually injective my %xpr2lines; while ( my ( $l, $xpr ) = each %line2xpr ) { push( @{ $xpr2lines{$xpr} }, $l ); } my @xprs = sort keys %xpr2lines; # the naivest algorithm possible: my %substr2strings; if ( !$equal ) { for ( my $i = 0 ; $i < $#xprs ; ++$i ) { my $s = $xprs[$i]; my @ts = (); for ( my $j = $i + 1 ; $j <= $#xprs ; ++$j ) { my $t = $xprs[$j]; if ( index( $t, $s ) >= $[ ) { push( @ts, $t ); } } if (@ts) { $substr2strings{$s} = [@ts]; #warn "$s is substring of ", join($sep, @ts), "\n"; } } } foreach my $l (@lines) { my $xpr = $line2xpr{$l}; if ( $once && grep { ( $_ cmp $l ) < 0 } @{ $xpr2lines{$xpr} } ) { # some other line, lexicographically smaller than $l, has the same $xpr next; } my @suplines; if ( defined $substr2strings{$xpr} ) { my @supxprs = @{ $substr2strings{$xpr} }; @suplines = map { @{ $xpr2lines{$_} } } @supxprs; my %seen = (); @suplines = grep { !$seen{$_}++ } @suplines; } else { @suplines = (); } # @suplines is the set of filenames of which the $xpr is a # proper superstring of the $xpr of $l if ( $equal || !$proper ) { # prepend the filenames of which the $xpr is the same as that of $l unshift( @suplines, grep { $_ ne $l } @{ $xpr2lines{$xpr} } ); } unshift( @suplines, $l ); if ( $opt{'x'} ) { unshift( @suplines, $xpr ); } print join( $sep, @suplines ), $/; }