#!/usr/bin/env perl # # apache-virtual-sites - list info about Apache sites # # $Id$ # postprocesses httpd -S output, # opens config files, and tries to list some site info use warnings; use strict; use File::Spec::Functions qw(rel2abs splitpath); use Getopt::Std; use File::Find; use POSIX qw(strftime); my ($me) = $0 =~ m#([^/]*$)#; my %opt; getopts( 'cdhlpr:Sxv', \%opt ); my @oneofopt = qw(c d l p r x); my $oneofopt = join( ', ', map { "-$_" } @oneofopt ); if ( $opt{h} ) { HELP_MESSAGE(); } elsif ( ( $opt{S} ? $opt{S} : 0 ) + @ARGV != 1 ) { die "Supply -S and no argument, or a file or directory as argument, or -h for help.\n"; } sub HELP_MESSAGE { my $oneofopt2 = sprintf( '[%s]', join( '|', map { "-$_" } @oneofopt ) ); my $cmd = "$me [-r basedir] $oneofopt [-v]"; print STDERR <) { if (/^\s*ServerRoot\s*"(.+)"/) { $server_root = $1; next; } # example input line: # port 80 namevhost wwwooti.win.tue.nl (/etc/httpd/conf/virthosts.d/wwwooti.win.tue.nl.conf:1) my ( $port, $host, $conf, $line_nr ) = m#^\s*port\s+(\d+)\s+namevhost\s+(\S+)\s+\((\S+):(\d+)\)#; if ( !defined($line_nr) ) { ( $port, $host, $conf, $line_nr ) = m#^\s*\*:(\d+)\s+(\S+)\s+\((\S+):(\d+)\)#; } if ( !defined($line_nr) ) { next; } #print_site_info( $host, $port, $conf, $line_nr ); # restrictive and inefficient $conf2nr2host2port{$conf}->{$line_nr}->{$host} = $port; } if ( !$. ) { die "fatal error: the input was empty!\n"; } my $main_conf; if ( $opt{r} ) { $server_root = defined($server_root) ? $opt{r} . $server_root : $opt{r}; } if ($server_root) { # it is specified in the input -d $server_root or die "cannot find ServerRoot directory on file system: $server_root\n"; foreach my $base ( 'apache2', 'httpd' ) { foreach my $conf ( "$server_root/$base.conf", "$server_root/conf/$base.conf" ) { if ( -f $conf ) { $main_conf = $conf; last; } } } $main_conf or die "config directory does not containg Apache's apache2.conf or httpd.conf: $server_root\n"; } else { # try to derive it from the $conf values my %seen; foreach my $conf ( keys %conf2nr2host2port ) { for ( my ($dir) = $conf =~ m#^(.*?)/*$# ; !$seen{$dir}++ && $dir =~ m#[^/]# ; $dir =~ s#/*[^/]+$## ) { foreach my $base ( 'apache2', 'httpd' ) { if ( -f "$dir/$base.conf" ) { $main_conf = "$dir/$base.conf"; $server_root = $dir; $server_root =~ s#/conf$##; # heuristic last; } } } $main_conf and last; } $main_conf or die "cannot derive implicit ServerRoot\n"; } ( $server_root, $main_conf ); } sub find_server_root { my ($main_conf) = @_; my $server_root; # scan the config file for the actual ServerRoot # open( my $conf_fh, '<', $main_conf ) or die "config file cannot be read: $main_conf\n"; while (<$conf_fh>) { if (/^\s*ServerRoot\s*"(.+)"/) { $server_root = $1; next; } } close($conf_fh); ehm("main config file is '$main_conf'"); if ( defined $server_root ) { ehm("ServerRoot is '$server_root'"); } else { ehm("ServerRoot not found"); } $server_root; } my ( $server_root, $main_conf ); if ( $opt{S} ) { # no input arguments - read stdin ( $server_root, $main_conf ) = parse_apachectl_output(); my $found_server_root = find_server_root($main_conf); if ( !$server_root && $found_server_root ) { $server_root = $found_server_root; } } elsif ( @ARGV > 1 ) { # not supported die "supply at most one file or directory as an argument\n"; } elsif ( !-e $ARGV[0] ) { die "Does not exist: $ARGV[0]\n"; } elsif ( !-f $ARGV[0] ) { die "Not an Apache configuration file: $ARGV[0]\n"; } else { # the one argument is the configuration file $main_conf = $ARGV[0]; $server_root = $opt{r} ? $opt{r} : find_server_root($main_conf); if ( !defined $server_root ) { $server_root = dirname($main_conf); } } read_and_report($main_conf); exit(0); sub dirname { ( splitpath( $_[0] ) )[1]; } sub read_and_report { my ($conf) = @_; #warn "ServerRoot is $server_root\n"; my @lines = included_lines($conf); $opt{c} and print join( ',', @$_ ) foreach @lines; my @sections = sections(@lines); foreach (@sections) { $opt{p} and print_directives($_); ( $opt{d} || $opt{x} ) and print_content_directories($_); $opt{l} and print_log_info( $_, $sections[0] ); } } sub included_lines { my @files = map { -d $_ ? glob("$_/*") : ($_) } @_; map { included_lines_from_file($_) } @files; } sub included_lines_from_file { my ($included) = @_; my $abs_included = rel2abs( $included, $server_root ); if ( !-f $abs_included ) { warn "file not found: Include $included\n"; (); } elsif ( open( my $fh, '<', $abs_included ) ) { ehm("reading config file '$abs_included'"); my @lines; while (<$fh>) { if ( my ($included) = m#^\s*Include(?:Optional)?\s+(\S+)#i ) { my $abs_included = rel2abs( $included, $server_root ); push( @lines, included_lines($_) ) for glob($abs_included); } else { push( @lines, [ $abs_included, $., $_ ] ); } } close($fh); @lines; } else { warn "file will not open: Include $included\n"; (); } } sub sections # given the @lines yielded by included_lines(), # produces them for each host, prepended by the host URL { # first, scan the config to look up properties such as # ServerName, Port and Listen, SSLEngine # we must record the directives per section # we do this by recording them with its starting line number # (its opening , or 0 if outside of VirtualHost) my $snr = 0; my %startnr2prop2value; foreach my $nr ( 0 .. $#_ ) { my ( $conf, $nr2, $l ) = @{ $_[$nr] }; if ( $l =~ /^\s*#/ ) { # a comment } elsif ( my ( $vh, $hostport ) = $l =~ m#<(VirtualHost)\s+(\S+)\s*>#i ) { #warn "<$vh> at line $nr\n"; $snr and warn "nested <$vh> section, all bets are off!\n"; $snr = $nr; $startnr2prop2value{$snr}->{ lc $vh } = $hostport; $startnr2prop2value{$snr}->{Original}->{ lc $vh } = $vh; } elsif ( $l =~ m#{$lcname} = $property{$lcname}; $startnr2prop2value{$snr}->{Original}->{$lcname} = $property{Original}->{$lcname}; } } } # list all (different) section starting line numbers found my @startnr = sort { $a <=> $b } keys %startnr2prop2value; #warn join(' ', "section starting numbers:", @startnr), "\n"; # for each, try to construct a host URL # parse Listen statements # at least one Listen statement must be present, of the form # Listen [host:]portnr [protocol] # parse NameVirtualHost statements, of the form # NameVirtualHost host[:portnr] # both are forbidden in VirtualHost sections; # try parsing them there anyway; # we expect to only see values for $nr == 0 my %listen_startnr2hostport2prot; my %name_virtual_startnr2hostports; foreach my $nr (@startnr) { foreach ( keys %{ $startnr2prop2value{$nr} } ) { if ( my ( $hostport, $prot ) = /^Listen\s+(\S+)\s*(\S*)/i ) { $listen_startnr2hostport2prot{$nr}->{$hostport} = $prot; #warn "parsed Listen $hostport $prot in section starting at line $nr\n"; } elsif (/^Listen/i) { warn "ignoring unparsable Listen statement: $_\n"; } elsif ( ($hostport) = /^NameVirtualHost (\S+)/ ) { push( @{ $name_virtual_startnr2hostports{$nr} }, $hostport ); #warn "parsed NameVirtualHost $hostport in section starting at line $nr\n"; } } } my %startnr2urls; foreach my $nr (@startnr) { #warn scalar(keys %{$listen_startnr2hostport2prot{$nr}}), " Listen hostports for section starting at $nr\n"; my @hostports = exists( $startnr2prop2value{$nr}->{virtualhost} ) ? ( $startnr2prop2value{$nr}->{virtualhost} ) : sort keys %{ $listen_startnr2hostport2prot{$nr} }; if ( !@hostports ) { warn "BUG: no host:ports for section starting at line nr $nr\n"; next; } foreach my $hostport (@hostports) { my ( $host, $port ) = $hostport =~ /:/ ? ( $`, $' ) : ( '*', $hostport ); #warn "hostport for section starting at line $nr is $host:$port\n"; if ( $startnr2prop2value{$nr}->{servername} ) { $host = $startnr2prop2value{$nr}->{servername}; } my $prot = $listen_startnr2hostport2prot{$nr}->{$hostport}; if ( !$prot ) { $prot = ( $port eq 443 ) ? 'https' : 'http'; } $hostport = $prot eq 'https' && $port eq 443 ? $host : $prot eq 'http' && $port eq 80 ? $host : "$host:$port"; push( @{ $startnr2urls{$nr} }, "$prot://$hostport/" ); #warn "section starting at line $nr has URL: $prot://$hostport/\n"; } } map { { startnr => $_, properties => $startnr2prop2value{$_}, urls => $startnr2urls{$_} } } @startnr; } sub preferred_url # amongst a list of URLs, pick a 'preferred' one to use in reporting # relevant only for picking a URL for the outer section now, # may become more relevant when we use ServerAliases { return $_[0] if ( @_ < 2 ); if ( my @named = grep { m#://\D# } @_ ) { # prefer a named URL if ( my @qualified = grep { m#://[^/]+\.# } @named ) { # prefer a qualified URL ( sort @qualified )[0]; } else { ( sort @named )[0]; } } elsif ( my @localhost = grep { $_ eq '127.0.0.1' } @_ ) { # prefer localhost ( sort @localhost )[0]; } else { ( sort @_ )[0]; } } sub print_directives { my ( $urls, $property ) = ( $_[0]->{urls}, $_[0]->{properties} ); # just print them my $url = preferred_url(@$urls); foreach my $name ( sort grep { $_ ne 'Original' } keys %$property ) { print join( ',', $url, $property->{Original}->{$name}, $property->{$name} ), "\n"; } } my %dir2ext2nrtimesreported; sub print_content_directories { my ( $urls, $property ) = ( $_[0]->{urls}, $_[0]->{properties} ); my $url = preferred_url(@$urls); my @url_with_top; if ( defined( my $dir = $property->{ lc 'DocumentRoot' } ) ) { push( @url_with_top, [ $url, $dir ] ); } if ( defined( my $userdir = $property->{ lc 'UserDir' } ) ) { setpwent(); while ( my ( $name, $passwd, $uid, $gid, $quota, $comment, $gcos, $home, $shell ) = getpwent() ) { my $dir = "$home/$userdir"; push( @url_with_top, [ "$url~$name/", $dir ] ) if -d $dir; } } foreach my $aliasdef ( sort grep { /^(Alias|Redirect) /i } grep { $_ ne 'Original' } keys %$property ) { my ( $aorr, $alias ) = $aliasdef =~ m#^(Alias|Redirect)\s+/*(.*\S)#i; my $dir = $property->{$aliasdef}; push( @url_with_top, [ "$url$alias", $dir ] ); } my @dir; my %dir2urls; foreach (@url_with_top) { my ( $url, $dir ) = @$_; defined( $dir2urls{$dir} ) or push( @dir, $dir ); push( @{ $dir2urls{$dir} }, $url ); } my %is_subtop; my @subtop; my %dir2uid2ext2size; my %dir2uid2ext2mtime; my $update_size_and_mtime = sub { my ( $dir, $uid, $ext, $size, $mtime ) = @_; #ehm("$File::Find::name contributes to", $dir); $dir2uid2ext2size{$dir}->{$uid}->{$ext} += $size; if ( ( $dir2uid2ext2mtime{$dir}->{$uid}->{$ext} // 0 ) < $mtime ) { $dir2uid2ext2mtime{$dir}->{$uid}->{$ext} = $mtime; } }; #my %dir2ext2nrtimesreported; # no, it's global - across calls my $process = sub { #ehm("processing $File::Find::name"); my $ext = !$opt{x} ? '*' : m#^.*(\..*)$# ? $1 : '-'; my @stat = stat($File::Find::name); my $uid = defined( $stat[4] ) ? $stat[4] : -1; my $size = $stat[7]; my $mtime = $stat[9]; my $d_uid = defined( stat($File::Find::dir) ) ? ( stat($File::Find::dir) )[4] : -1; if ( $uid ne $d_uid ) { if ( -d $File::Find::name ) { # report separately for this subdir $is_subtop{$File::Find::name} = 1; if ( !$opt{x} ) { # report even when 0 $dir2uid2ext2size{$File::Find::name}->{$uid}->{$ext} = 0; $dir2uid2ext2mtime{$File::Find::name}->{$uid}->{$ext} = 0; } } elsif ( -f $File::Find::name ) { # belongs to my parent &$update_size_and_mtime( $File::Find::dir, $uid, $ext, $size, $mtime ); } } elsif ( -f $File::Find::name ) { # belongs to the current top $dir2uid2ext2size{ $subtop[0] ? $subtop[0] : $File::Find::topdir } ->{$uid}->{$ext} += $size; #ehm("adding size of $File::Find::name to ", $subtop[0] ? $subtop[0] : $File::Find::topdir); } }; my $preprocess = sub { #ehm("preprocessing $File::Find::dir"); if ( $is_subtop{$File::Find::dir} ) { unshift( @subtop, $File::Find::dir ); # if done in $process, we couldn't undo it when pre/postprocess aren't called ehm( 'entering differently owned directory', $subtop[0] ); #ehm('all are', @subtop); } grep { !$dir2urls{$_} } @_; }; my $postprocess = sub { #ehm("postprocessing $File::Find::dir"); if ( $is_subtop{$File::Find::dir} ) { ehm( 'leaving differently owned directory', $subtop[0] ); shift(@subtop); #ehm('left are', @subtop); } if ( @subtop && $File::Find::dir eq $File::Find::topdir ) { ehm('bug: subtop administration is broken'); exit(1); } my $uid2ext2size = $dir2uid2ext2size{$File::Find::dir} or return; my $uid2ext2mtime = $dir2uid2ext2mtime{$File::Find::dir} or ehm( 'BUG: size without mtime in', $File::Find::dir ); ehm("reporting for $File::Find::dir"); my ($reltop) = substr( $File::Find::dir, length($File::Find::topdir) ) =~ m#^/*(.*)#; foreach my $uid ( sort { $a <=> $b } keys %{$uid2ext2size} ) { my $owner = ( defined($uid) && getpwuid($uid) ) ? ( getpwuid($uid) )[0] : '?'; foreach my $ext ( sort keys %{ $uid2ext2size->{$uid} } ) { foreach my $url ( @{ $dir2urls{$File::Find::topdir} } ) { my $url2 = $url . $reltop; print join( ',', ++$dir2ext2nrtimesreported{$File::Find::dir}->{$ext}, $uid2ext2size->{$uid}->{$ext}, mtime2date( $uid2ext2mtime->{$uid}->{$ext} ), $owner, $opt{x} ? ($ext) : (), $url2, $File::Find::dir ), "\n"; } } } }; no warnings 'File::Find'; foreach my $top (@dir) { if ( !-d $top ) { ehm( "no content directory $top for", @{ $dir2urls{$top} } > 1 ? 'URLs' : 'URL', @{ $dir2urls{$top} } ); next; } ehm( "printing content directories in $top for", @{ $dir2urls{$top} } > 1 ? 'URLs' : 'URL', @{ $dir2urls{$top} } ); if ( !$opt{x} ) { # report even when 0 my $uid = stat($top) ? ( stat($top) )[4] : -1; $dir2uid2ext2size{$top}->{$uid}->{'*'} = 0; $dir2uid2ext2mtime{$top}->{$uid}->{'*'} = 0; } find( { preprocess => $preprocess, wanted => $process, postprocess => $postprocess }, $top ); } } sub mtime2date { my ($mtime) = @_; if ($mtime) { strftime( '%F', localtime($mtime) ); } else { '0000-00-00'; } } my $properties_rx; sub properties_rx { my $req_term_rx = '("(?:\\"|[^"])*"|[^#\s]+)'; my $opt_term_rx = '("[^"]*"|[^#\s]*)'; qr/^\s*$req_term_rx\s+$req_term_rx\s*$opt_term_rx/; } sub properties # returns the properties found in the lines in @_ as a hashref # property names are case-sensitives, so the keys are lowercase, # and the special key 'Original' maps to a hashref # mapping them to their first occurring form { my %property; $properties_rx = properties_rx() if !defined $properties_rx; foreach (@_) { if ( my ( $name, $value, $third ) = /$properties_rx/ ) { my $lcname = lc $name; if ( $name =~ /^]$//; } ( $name, $lcname, $value, $third ) = ( "$name $value_third", "$lcname $value_third", '', '' ); } elsif ( $lcname eq lc 'CustomLog' ) { ( $value, $third ) = ( $third, $value ); } if ( $third ne '' ) { ( $name, $lcname, $value ) = ( "$name $value", "$lcname $value", $third ); } if ( defined $property{$lcname} && $value ne $property{$lcname} ) { warn "multiple definition of $name as $property{$name} and $value\n"; } $property{$lcname} = $value; $property{Original}->{$lcname} = $name; } } \%property; } sub du { # TO DO: use open(my $fh, '-|', 'du', '-bs', '$_') instead map { ( !-d $_ ) ? '-' : ( `du -bs $_` =~ /^(\d+)/ ) ? $1 : '?' } @_; } sub print_log_info { my ( $urls, $section_property, $main_property ) = ( $_[0]->{urls}, $_[0]->{properties}, $_[1]->{properties} ); # $property are the section's properties # $main_property are the main section's properties # this implements a primitive way to inherit properties from the main section; # this is not 100% accurate, as the order in which properties are stated is not used my $url = preferred_url(@$urls); my %logformat; # merge the properties, main properties first my %property = ( %$main_property, %$section_property ); # also merge the Originals $property{Original} = { %{ $main_property->{Original} }, %{ $section_property->{Original} } }; foreach ( grep { /LogFormat\b/i } grep { $_ ne 'Original' } keys %property ) { if ( my ($format) = /^LogFormat (".*")/i ) { $logformat{ $property{$_} } = $format; ehm("found LogFormat $format $property{$_}"); } else { warn "ignoring unparseable LogFormat statement: $_\n"; } } foreach my $log ( grep { /\b(Transfer|Custom|Error)Log\b/i } grep { $_ ne 'Original' } keys %property ) { if ( my ( $type, $rest ) = $log =~ /^(Transfer|Custom|Error)Log\s*(.*\S|)/i ) { my ( $namedest, $envcond ) = $rest =~ /(.*\S)\s+(\S+=\S+)$/ ? ( $1, $2 ) : ( $rest, '' ); ehm("looking up the ${type}LogFormat for name [dest] $namedest"); my ( $name, $dest ) = $namedest =~ /^(\S+)\s*(.*)$/ ? ( $1, $2 ) : ( '', $namedest ); ehm("looking up the ${type}LogFormat for $name"); my $format = $logformat{ $type eq 'custom' ? $name : '' }; $format or $format = '?'; # e.g. for ErrorLog printf "%s\n", join( ',', $url, rel2abs( $property{$log}, $server_root ), $property{Original}->{$log}, $format ); } else { warn "ignoring unparseable *Log statement: $log $property{$log}\n"; } } }