#!/bin/sh # # sakai-files-dedup - deduplicate Sakai's uploaded files # # $Id$ # designed to be called from /etc/cron.daily/ # for testing, use e.g. # # MIN_SIZE=1M sakai-files-dedup -n # # or # # MIN_SIZE=1 sakai-files-dedup -na # # the flags are passed to ln-r # defaults overridable in environment variables: : ${DEDUP_USER:=tomcat8s11} : ${MIN_SIZE:=100M} #: ${RECIPIENT:=sakai-remove-duplicates} # set $RECIPIENT to send the results in email to that recipient # by default, no email is sent and the results go to stdout PATH=/usr/bin:/bin Die() { echo "${0}: fatal error: $@"; exit 1; } DieIfMissing() { type "$1" >/dev/null 2>/dev/null || Die command not found: "$1" } DieIfNoUser() { getent passwd "$1" >/dev/null || Die user not found: "$1" } # execute as user $DEDUP_USER DieIfNoUser "$DEDUP_USER" # use sudo if DEDUP_USER is someone else [ $DEDUP_USER = "`whoami`" ] || exec sudo -u "$DEDUP_USER" -g "$DEDUP_USER" MIN_SIZE="$MIN_SIZE" RECIPIENT="$RECIPIENT" "$0" "$@" # $@ is just for passing flags to ln-r, such as -n! # executing as user $DEDUP_USER HOME=`getent passwd "$DEDUP_USER" | awk -F: '{print $6}'` D="$HOME"/tomcat8/sakai/files DieIfMissing realpath ME=`realpath "$0"` LN_R=`dirname "$ME"`/ln-r [ -x "$LN_R" ] || Die command not found: ln-r cd "$D" || Die cannot cd to "$D" DedupAndReport() # any arguments are assumed to be flags; they are passed to ln-r; # this is useful for testing with -n and -a { # assume no spaces etc. in filenames find 20?? -type f -size +"$MIN_SIZE" | "$LN_R" -Iv "$@" 2>&1 | awk '{print $NF}' | perl -lne ' BEGIN { $c = 0; $b = 0 }; ++$c; $b += (stat($_))[7]; END { print "deduplicated $c files, $b bytes" if $c > 0 }' | perl -wpe '1 while s/(\d+)(\d\d\d)/$1,$2/;' # adds thousands separators } if [ "$RECIPIENT" = "" ] then DedupAndReport "$@" # may be empty else DEDUP=`DedupAndReport "$@"` if [ "$DEDUP" != "" ] then # we have output DieIfMissing mail echo "$0: $DEDUP" | mail -s "removing duplicate files: $DEDUP" "$RECIPIENT" fi fi