#!/usr/bin/env perl # # sakai-resource-file-properties - apply this on the output of ./sakai-resource-files # to get more info about Sakai resource files on disk # # $Id$ use strict; use warnings; use Getopt::Std; use POSIX qw(strftime); use File::Basename qw(dirname); my %opt; getopts( 'hmfs:1', \%opt ); $opt{h} and HELP_MESSAGE(); if ( $opt{f} ) { eval { require File::LibMagic } or die "Cannot use -f option: Perl module File::LibMagic is not installed\n"; } sub HELP_MESSAGE { print STDERR <) { #/^$rx[^=]*=\s*(.*\S)/ and warn "$rx is $1 in $file\n"; /^$rx[^=]*=\s*(.*\S)/ and return $1; } close($fh); #warn "$rx not found in $file\n"; return; } my @my_columns = qw(NR_LINKS NR_BYTES MODIF_TIME); push( @my_columns, 'FILE_TYPE' ) if $opt{f}; my $local_properties = $ENV{SAKAI_PROPERTIES} // '/var/lib/tomcat7/sakai/local.properties'; my $sakai_files = $ENV{SAKAI_FILES} // grep_in_file( 'bodyPath@org.sakaiproject.content.api.ContentHostingService', $local_properties ) // '${sakai.home}/files'; sub replace_substring # string replacement without regular expression matching or variable interpolation { my ( $substring, $replacement, $string ) = @_; join( $replacement, split( quotemeta($substring), $string ) ); } $sakai_files = replace_substring( '${sakai.home}', dirname($local_properties), $sakai_files ); my %sz2id; # maps each file size to the list of files with that size my %is_missing; # maps each file id of a missing file to 1 my %id2inputinfo; # maps each file id to an array ref with its input fields sub yikes { die join( ' ', 'fatal error:', @_ ), "\n"; } sub rel # removes any leading slashes { my ($rel) = $_[0] =~ m#^/*(.*)#; $rel; } my @input_header = qw(FILE_SIZE FILE_PATH RESOURCE_ID RESOURCE_TYPE_ID SITE_NAME); do { local $_ = <> // yikes('cannot read the first input line'); chomp; $_ eq join( ',', @input_header ) or yikes('the first line is not a valid header from sakai-resource-files'); }; my %column = map { $input_header[$_] => $_ } 0 .. $#input_header; $column{SITE_NAME} or yikes('something impossible happened'); my $nr_columns = scalar(@input_header); while (<>) { chomp; my @srf = split( /,/, $_, $nr_columns ); # only the last field, SITE_NAME, can contain commas @srf eq $nr_columns or yikes("wrong number of fields at $ARGV:$."); my $sz = $srf[ $column{FILE_SIZE} ]; my $file = $srf[ $column{FILE_PATH} ]; my $id = $srf[ $column{RESOURCE_ID} ]; $id2inputinfo{$id} = [@srf]; push( @{ $sz2id{$sz} }, $id ); if ( !-e "$sakai_files/$file" ) { $is_missing{$id} = 1; } } my $since; if ( $opt{s} ) { $since = `/bin/date +%Y%m%d%H%M%S -d "$opt{s}"` or yikes("invalid time specification: $opt{s}"); chomp($since); } my %devino2count; print join( ',', @my_columns, grep { $_ ne 'FILE_SIZE' } @input_header ), "\n"; for my $sz ( sort { $a <=> $b } keys %sz2id ) { my @id = @{ $sz2id{$sz} }; if ( $opt{m} ) { # report missing files only @id = grep { $is_missing{$_} } @id; } for my $id ( sort @id ) { my @input = @{$id2inputinfo{$id}}; my $file = rel( $input[$column{FILE_PATH}] ); my $absfile = $sakai_files . '/' . $file; my ( $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime ) = stat($absfile); if ( !defined($dev) && !$is_missing{$id} ) { warn "skipping, cannot stat: $file\n"; next; } if ( !$is_missing{$id} && $devino2count{"$dev:$ino"}++ && $opt{1} ) { # we've reported this file before next; } my $fmtime = $is_missing{$id} ? '00000000000000' : strftime( '%Y%m%d%H%M%S', localtime($mtime) ); if ( defined($since) && $fmtime < $since ) { # the file is too old or missing next; } my $in_sz = $input[$column{FILE_SIZE}]; if ($sz ne $in_sz) { # this is very rare, if it happens at all warn "size on disk: $sz, while size in database: $in_sz for: $file\n"; } my @my_info = ( $is_missing{$id} ? 0 : $nlink, $fmtime ); if ( $opt{f} ) { my $type = eval { File::LibMagic::MagicFile( $sakai_files . '/' . $file ) } // '?'; $type =~ s/,/;/g; push( @my_info, $is_missing{$id} ? '-' : $sz == 0 ? 'empty' # without this, the result would be 'empty ' : $type ); } print join( ',', @my_info, @input ), "\n"; } }