#!/usr/bin/env perl # # splitx - like split, but on regular expressions # # $Id$ use strict; use warnings; use Getopt::Std; my %opt; getopts( 'e:F:p:s:ahlnv', \%opt ) or HELP_MESSAGE("failed to parse command line options: $!"); my $sep = $opt{'F'} // '^\s*$[\r\n]*'; my $sel = $opt{'e'} // '\S'; my $pat = $opt{'p'} // '%d%b-%i%x'; my $repl = $opt{s}; $opt{'h'} and HELP_MESSAGE(); sub HELP_MESSAGE { print STDERR join( ' ', @_ ), "\n" if @_; print STDERR < ) { my ( $dir, $file ) = ( $ARGV =~ m#/# ) ? ( $ARGV =~ m#(.*/)(.*)# ) : ( '', $ARGV ); my ( $base, $ext ) = ( $file =~ m#([^.]*)(.*)# ); my %x = ( 's' => $ARGV, 'd' => $dir, 'b' => $base, 'x' => $ext, 'i' => 0 ); foreach my $frag ( split( /$sep/m, $chunk ) ) { #warn "frag split with $sep: {$frag}\n"; # only use $frag if $sel matches, and # if $sel contains capturing brackets, split $frag based on # whether the captured value is the same in consecutive matches my @pos = (0); # the starting pos of each $subfrag within $frag, # surrounded by the start and end+1 of $frag my @submatch = (''); # the submatches for each $subfrag (i.e. $1 // '') my $selmatches = 0; while ( $frag =~ /$sel/mg ) { $selmatches = 1; # $sel matches (once or once more) # do we need to split because the submatch value changed? my $submatch = $1 // ''; if ( $submatch ne $submatch[$#submatch] ) { # yes, we do #warn "split, submatch is now $submatch\n"; push( @pos, $-[0] ); push( @submatch, $submatch ); } else { #warn "no split, submatch is still $submatch\n"; } } next if !$selmatches; # $sel didn't match at all, so skip this $frag push( @pos, length($frag) ); #warn "subfragments: ", join( ", ", @pos ), " in $frag\n"; # split $frag on @pos foreach my $i ( 0 .. $#pos - 1 ) { my $subfrag = substr( $frag, $pos[$i], $pos[ $i + 1 ] - $pos[$i] ); #warn "substr($frag, $pos[$i], $pos[$i+1]-$pos[$i]) is $subfrag\n"; $x{'p'} = $submatch[$i]; ++$x{'i'}; my $outfn = $pat; while ( my ( $c, $x ) = each %x ) { # warn "replacing %$c with $x in $outfn\n"; $outfn = join( $x, split( /%$c/, $outfn, -1 ) ); } if ( $outfn eq $ARGV ) { warn "attempt to overwrite input file $ARGV, skipped\n"; } elsif ( !open( OUT, $opt{a} ? '>>' : '>', $outfn ) ) { warn "cannot write to output file $outfn ($!), skipped\n"; } else { print "$outfn\n" if $opt{l}; $subfrag =~ s/$sel/$repl/mg if defined $repl; my $l = length($subfrag); warn "$l bytes for $outfn\n" if $opt{v}; if ( !$opt{n} ) { print OUT $subfrag; close OUT; # TO DO: keep the fds open when -a is used! } } } } }