Backup - fixed distributed processing, improved error handling.

This commit is contained in:
Louwrentius 2010-05-14 12:00:04 +00:00
parent 61d8a24ed2
commit 7c2d336dba

29
ppss
View File

@ -94,7 +94,7 @@ SIZE_OF_ARRAY=""
SSH_SERVER="" # Remote server or 'master'. SSH_SERVER="" # Remote server or 'master'.
SSH_KEY="" # SSH key for ssh account. SSH_KEY="" # SSH key for ssh account.
SSH_KNOWN_HOSTS="" SSH_KNOWN_HOSTS=""
SSH_SOCKET="/tmp/ppss_ssh_socket" # Multiplex multiple SSH connections over 1 master. SSH_SOCKET="/tmp/ppss_ssh_socket-$$" # Multiplex multiple SSH connections over 1 master.
SSH_OPTS="-o BatchMode=yes -o ControlPath=$SSH_SOCKET \ SSH_OPTS="-o BatchMode=yes -o ControlPath=$SSH_SOCKET \
-o GlobalKnownHostsFile=./known_hosts \ -o GlobalKnownHostsFile=./known_hosts \
-o ControlMaster=auto \ -o ControlMaster=auto \
@ -107,7 +107,7 @@ SSH_OPTS_NOMP="-o BatchMode=yes -o GlobalKnownHostsFile=./known_hosts \
# Blowfish is faster but still secure. # Blowfish is faster but still secure.
SSH_MASTER_PID="" SSH_MASTER_PID=""
ITEM_LOCK_DIR="$PPSS_HOME_DIR/$PPSS_DIR/PPSS_ITEM_LOCK_DIR" # Remote directory on master used for item locking. ITEM_LOCK_DIR="$PPSS_DIR/PPSS_ITEM_LOCK_DIR" # Remote directory on master used for item locking.
PPSS_LOCAL_TMPDIR="$PPSS_DIR/PPSS_LOCAL_TMPDIR" # Local directory on slave for local processing. PPSS_LOCAL_TMPDIR="$PPSS_DIR/PPSS_LOCAL_TMPDIR" # Local directory on slave for local processing.
PPSS_LOCAL_OUTPUT="$PPSS_DIR/PPSS_LOCAL_OUTPUT" # Local directory on slave for local output. PPSS_LOCAL_OUTPUT="$PPSS_DIR/PPSS_LOCAL_OUTPUT" # Local directory on slave for local output.
DOWNLOAD_TO_NODE="0" # Transfer item to slave via (s)cp. DOWNLOAD_TO_NODE="0" # Transfer item to slave via (s)cp.
@ -122,7 +122,7 @@ DAEMON=0
case $ARCH in case $ARCH in
"Darwin") MD5=md5 ;; "Darwin") MD5=md5 ;;
"FreeBSD") MD5=md5 ;; "FreeBSD") MD5=md5 ;;
"SunOS") MD5="diget -a md5" ;; "SunOS") MD5="digest -a md5" ;;
"Linux") MD5=md5sum ;; "Linux") MD5=md5sum ;;
esac esac
@ -314,6 +314,8 @@ showusage_long () {
echo -e "--homedir | -H Directory in which PPSS is installed on the node." echo -e "--homedir | -H Directory in which PPSS is installed on the node."
echo -e " Default is '$PPSS_HOME_DIR'." echo -e " Default is '$PPSS_HOME_DIR'."
echo echo
echo -e "--script | -S Script to run on the node. PPSS must copy this script to the node."
echo
echo -e "Amazon EC2 platform specific options:" echo -e "Amazon EC2 platform specific options:"
echo echo
echo -e "--awskeypair | -P The Amazon EC2 SSH keypair that new instances should use." echo -e "--awskeypair | -P The Amazon EC2 SSH keypair that new instances should use."
@ -752,7 +754,6 @@ log () {
# Type 'DSPLY ERROR and WARN' is logged to the screen # Type 'DSPLY ERROR and WARN' is logged to the screen
# Any other log-type is only logged to the logfile. # Any other log-type is only logged to the logfile.
# #
TYPE="$1" TYPE="$1"
MESG="$2" MESG="$2"
TYPE_LENGTH=5 TYPE_LENGTH=5
@ -798,8 +799,6 @@ init_vars () {
echo "ERROR - PPSS requires $MD5. It may not be within the path or installed." echo "ERROR - PPSS requires $MD5. It may not be within the path or installed."
fi fi
create_working_directory
if [ "$DAEMON" == "1" ] if [ "$DAEMON" == "1" ]
then then
INPUT_LOCK="$SRC_DIR/INPUT_LOCK" INPUT_LOCK="$SRC_DIR/INPUT_LOCK"
@ -878,17 +877,19 @@ init_vars () {
does_file_exist "$PPSS_HOME_DIR/$JOB_LOG_DIR" does_file_exist "$PPSS_HOME_DIR/$JOB_LOG_DIR"
if [ ! "$?" == "0" ] if [ ! "$?" == "0" ]
then then
log DEBUG "Remote Job log directory $JOB_lOG_DIR does not exist. Creating." log DEBUG "Remote Job log directory $PPSS_HOME_DIR/$JOB_lOG_DIR does not exist. Creating."
exec_cmd "mkdir $PPSS_HOME_DIR/$JOB_LOG_DIR" exec_cmd "mkdir $PPSS_HOME_DIR/$JOB_LOG_DIR"
fi fi
elif [ ! -e "$JOB_LOG_DIR" ] fi
if [ ! -e "$JOB_LOG_DIR" ]
then then
mkdir -p "$JOB_LOG_DIR" mkdir -p "$JOB_LOG_DIR"
fi fi
if [ -z "$SSH_SERVER" ] if [ ! -z "$SSH_SERVER" ]
then then
ITEM_LOCK_DIR="$PPSS_DIR/ITEM_LOCK_DIR" ITEM_LOCK_DIR="$PPSS_HOME_DIR/$ITEM_LOCK_DIR"
fi fi
does_file_exist "$ITEM_LOCK_DIR" does_file_exist "$ITEM_LOCK_DIR"
@ -950,7 +951,7 @@ check_status () {
if [ ! "$ERROR" == "0" ] if [ ! "$ERROR" == "0" ]
then then
log DSPLY "$FUNCTION - $MESSAGE" log DSPLY "$FUNCTION - $MESSAGE"
set_status STOPPED set_status ERROR
cleanup cleanup
exit 1 exit 1
fi fi
@ -1573,8 +1574,6 @@ get_all_items () {
((count++)) ((count++))
done done
fi fi
fi fi
if [ "$DAEMON" == "1" ] if [ "$DAEMON" == "1" ]
@ -2150,7 +2149,7 @@ show_status () {
if [ ! -z "$ITEMS" ] && [ ! "$ITEMS" == "0" ] if [ ! -z "$ITEMS" ] && [ ! "$ITEMS" == "0" ]
then then
PROCESSED=`exec_cmd "ls -1 $ITEM_LOCK_DIR 2>/dev/null | wc -l" 1` 2>&1 >> /dev/null PROCESSED=`exec_cmd "ls -1 $PPSS_HOME_DIR/$ITEM_LOCK_DIR 2>/dev/null | wc -l" 1` 2>&1 >> /dev/null
TMP_STATUS=$((100 * $PROCESSED / $ITEMS)) TMP_STATUS=$((100 * $PROCESSED / $ITEMS))
log DSPLY "Status:\t\t$TMP_STATUS percent complete." log DSPLY "Status:\t\t$TMP_STATUS percent complete."
else else
@ -2201,6 +2200,7 @@ main () {
case $MODE in case $MODE in
node ) node )
create_working_directory
test_server test_server
init_vars init_vars
get_all_items get_all_items
@ -2315,6 +2315,7 @@ main () {
;; ;;
* ) * )
create_working_directory
init_vars init_vars
get_all_items get_all_items
listen_for_job "$MAX_NO_OF_RUNNING_JOBS" & 2>&1 >> /dev/null listen_for_job "$MAX_NO_OF_RUNNING_JOBS" & 2>&1 >> /dev/null