PPSS can now be sourced for unit testing.

This commit is contained in:
Louwrentius 2010-06-06 18:02:05 +00:00
parent 40b3ed8228
commit 708803688f

330
ppss
View File

@ -20,14 +20,17 @@
# "Patches or other contributions are always welcome!" # "Patches or other contributions are always welcome!"
# #
#
# Handling control-c for a clean shutdown. # Handling control-c for a clean shutdown.
#
trap 'kill_process' SIGINT trap 'kill_process' SIGINT
# Setting some vars.
SCRIPT_NAME="Distributed Parallel Processing Shell Script" SCRIPT_NAME="Distributed Parallel Processing Shell Script"
SCRIPT_VERSION="2.70" SCRIPT_VERSION="2.70"
#
# The first argument to this script can be a mode. # The first argument to this script can be a mode.
#
MODES="node start config stop pause continue deploy status erase kill ec2" MODES="node start config stop pause continue deploy status erase kill ec2"
for x in $MODES for x in $MODES
do do
@ -39,32 +42,20 @@ do
fi fi
done done
#
# The working directory of PPSS can be set with # The working directory of PPSS can be set with
# export PPSS_DIR=/path/to/workingdir # export PPSS_DIR=/path/to/workingdir
#
if [ -z "$PPSS_DIR" ] if [ -z "$PPSS_DIR" ]
then then
PPSS_DIR="ppss_dir" PPSS_DIR="ppss_dir"
fi fi
get_time_in_seconds () {
if [ "$ARCH" == "SunOS" ]
then
#
# Dirty hack because this ancient operating system does not support +%s...
#
THE_TIME=`truss /usr/bin/date 2>&1 | grep ^time | awk '{ print $3 }'`
else
THE_TIME="$(date +%s)"
fi
echo "$THE_TIME"
}
CONFIG="" CONFIG=""
HOSTNAME="`hostname`" HOSTNAME="`hostname`"
ARCH="`uname`" ARCH="`uname`"
PPSS_HOME_DIR="ppss-home" PPSS_HOME_DIR="ppss-home"
SOURCED="$0"
PID="$$" PID="$$"
GLOBAL_LOCK="$PPSS_DIR/PPSS-GLOBAL-LOCK-$PID" # Global lock file used by local PPSS instance. GLOBAL_LOCK="$PPSS_DIR/PPSS-GLOBAL-LOCK-$PID" # Global lock file used by local PPSS instance.
@ -88,7 +79,7 @@ PROCESSORS=""
STOP_KEY="$RANDOM$RANDOM$RANDOM" # This is a signal to the listener to stop. STOP_KEY="$RANDOM$RANDOM$RANDOM" # This is a signal to the listener to stop.
KILL_KEY="$RANDOM$RANDOM$RANDOM" # This is a signal to stop immediately and kill KILL_KEY="$RANDOM$RANDOM$RANDOM" # This is a signal to stop immediately and kill
RECURSION="1" # all running processes. RECURSION="1" # all running processes.
START_PPSS=`get_time_in_seconds` START_PPSS=""
STOP_PPSS="" STOP_PPSS=""
SIZE_OF_INPUT="" SIZE_OF_INPUT=""
@ -120,19 +111,6 @@ ITEM_ESCAPED=""
NODE_STATUS="$PPSS_DIR/status.txt" NODE_STATUS="$PPSS_DIR/status.txt"
DAEMON=0 DAEMON=0
case $ARCH in
"Darwin") MD5=md5 ;;
"FreeBSD") MD5=md5 ;;
"SunOS") MD5="digest -a md5" ;;
"Linux") MD5=md5sum ;;
esac
if [ "$ARCH" == "Darwin" ] || [ "$ARCH" == "FreeBSD" ]
then
MD5=md5
else
MD5=$MD5
fi
showusage_short () { showusage_short () {
@ -208,12 +186,6 @@ showusage_normal () {
echo echo
} }
if [ "$#" == "0" ]
then
showusage_short
exit 1
fi
showusage_long () { showusage_long () {
echo echo
@ -359,7 +331,7 @@ kill_process () {
exec_cmd () { exec_cmd () {
STATUS="0" STATUS=""
CMD="$1" CMD="$1"
NOMP="$2" # Disable multiplexing. NOMP="$2" # Disable multiplexing.
@ -378,9 +350,9 @@ exec_cmd () {
STATUS=$? STATUS=$?
fi fi
else else
log DEBUG "LOCAL EXEC"
eval "$CMD" eval "$CMD"
STATUS=$? STATUS=$?
log DEBUG "LOCAL EXEC - status is $STATUS"
fi fi
return $STATUS return $STATUS
} }
@ -391,11 +363,13 @@ does_file_exist () {
# this function makes remote or local checking of existence of items transparent. # this function makes remote or local checking of existence of items transparent.
# #
FILE="$1" FILE="$1"
`exec_cmd "ls -1 $FILE" >> /dev/null 2>&1` RES=`exec_cmd "ls -1 $FILE" 2>&1`
if [ "$?" == "0" ] if [ "$?" = "0" ]
then then
log DEBUG "$FILE does exist - $RES"
return 0 return 0
else else
log DEBUG "$FILE does not exist - $RES"
return 1 return 1
fi fi
} }
@ -405,9 +379,8 @@ check_for_interrupt () {
# #
# PPSS can be interupted with a stop or pause command. # PPSS can be interupted with a stop or pause command.
# #
does_file_exist "$STOP_SIGNAL" does_file_exist "$STOP_SIGNAL"
if [ "$?" == "0" ] if [ "$?" = "0" ]
then then
set_status "STOPPED" set_status "STOPPED"
log INFO "STOPPING job. Stop signal found." log INFO "STOPPING job. Stop signal found."
@ -416,7 +389,7 @@ check_for_interrupt () {
fi fi
does_file_exist "$PAUSE_SIGNAL" does_file_exist "$PAUSE_SIGNAL"
if [ "$?" == "0" ] if [ "$?" = "0" ]
then then
set_status "PAUZED" set_status "PAUZED"
log INFO "PAUSE: sleeping for $PAUSE_DELAY SECONDS." log INFO "PAUSE: sleeping for $PAUSE_DELAY SECONDS."
@ -465,13 +438,35 @@ add_var_to_config () {
fi fi
} }
# Process any command-line options that are specified." is_var_empty () {
while [ $# -gt 0 ]
do if [ -z "$1" ]
then
showusage_normal
cleanup
exit 1
fi
}
process_arguments () {
#
# Process any command-line options that are specified."
#
if [ "$#" = "0" ]
then
showusage_short
exit 1
fi
while [ $# -gt 0 ]
do
case $1 in case $1 in
--config|-C ) --config|-C )
CONFIG="$2" CONFIG="$2"
is_var_empty "$CONFIG"
if [ "$MODE" == "config" ] if [ "$MODE" == "config" ]
then then
@ -530,11 +525,13 @@ do
--sourcefile|-f ) --sourcefile|-f )
INPUT_FILE="$2" INPUT_FILE="$2"
is_var_empty "$INPUT_FILE"
add_var_to_config INPUT_FILE "$INPUT_FILE" add_var_to_config INPUT_FILE "$INPUT_FILE"
shift 2 shift 2
;; ;;
--sourcedir|-d ) --sourcedir|-d )
SRC_DIR="$2" SRC_DIR="$2"
is_var_empty "$SRC_DIR"
add_var_to_config SRC_DIR "$SRC_DIR" add_var_to_config SRC_DIR "$SRC_DIR"
shift 2 shift 2
;; ;;
@ -578,6 +575,7 @@ do
;; ;;
--command|-c ) --command|-c )
COMMAND="$2" COMMAND="$2"
is_var_empty "$COMMAND"
if [ "$MODE" == "config" ] if [ "$MODE" == "config" ]
then then
COMMAND=\'$COMMAND\' COMMAND=\'$COMMAND\'
@ -623,6 +621,7 @@ do
;; ;;
--key|-k ) --key|-k )
SSH_KEY="$2" SSH_KEY="$2"
is_var_empty "$SSH_KEY"
add_var_to_config SSH_KEY "$SSH_KEY" add_var_to_config SSH_KEY "$SSH_KEY"
if [ ! -z "$SSH_KEY" ] if [ ! -z "$SSH_KEY" ]
then then
@ -705,16 +704,26 @@ do
echo echo
exit 1;; exit 1;;
esac esac
done done
if [ "$DAEMON" == "1" ] && [ -z "$SRC_DIR" ] if [ -z "$SRC_DIR" ] && [ -z "$INPUT_FILE" ]
then then
showusage_short
echo
log ERROR "No source file or directory specified with -f or -d."
cleanup
exit 1
fi
if [ "$DAEMON" == "1" ] && [ -z "$SRC_DIR" ]
then
showusage_short showusage_short
echo echo
echo "Daemon mode requires an argument to the -d option as a place to put the lock dir." echo "Daemon mode requires an argument to the -d option as a place to put the lock dir."
echo "Read the on-line manual for more information." echo "Read the on-line manual for more information."
exit exit 1
fi fi
}
display_header () { display_header () {
@ -749,6 +758,52 @@ expand_str () {
echo "$STR" echo "$STR"
} }
are_we_sourced () {
if [ "$SOURCED" == "-bash" ] || [ "$SOURCED" == "bash" ] || [ "$SOURCED" = "dash" ]
then
log DEBUG "This script is sourced."
return 0
else
log DEBUG "This script is not sourced."
return 1
fi
}
get_time_in_seconds () {
if [ "$ARCH" == "SunOS" ]
then
#
# Dirty hack because this ancient operating system does not support +%s...
#
THE_TIME=`truss /usr/bin/date 2>&1 | grep ^time | awk '{ print $3 }'`
else
THE_TIME="$(date +%s)"
fi
echo "$THE_TIME"
}
set_md5 () {
case $ARCH in
"Darwin") MD5=md5 ;;
"FreeBSD") MD5=md5 ;;
"SunOS") MD5="digest -a md5" ;;
"Linux") MD5=md5sum ;;
esac
echo "test" | $MD5 > /dev/null 2>&1
if [ ! "$?" ]
then
LOG ERROR "ERROR - PPSS requires $MD5. It may not be within the path or installed."
return 1
else
return 0
fi
}
log () { log () {
# #
@ -759,6 +814,15 @@ log () {
MESG="$2" MESG="$2"
TYPE_LENGTH=5 TYPE_LENGTH=5
#
# Performance hack. Don't go through all the code if not required.
#
if [ "$TYPE" = "DEBUG" ] && [ "$PPSS_DEBUG" == "0" ]
then
return
fi
TYPE_EXP=`expand_str "$TYPE"` TYPE_EXP=`expand_str "$TYPE"`
DATE=`date +%b\ %d\ %H:%M:%S` DATE=`date +%b\ %d\ %H:%M:%S`
@ -780,6 +844,7 @@ log () {
if [ "$TYPE" == "DSPLY" ] || [ "$TYPE" == "ERROR" ] || [ "$TYPE" == "WARN" ] && [ "$QUIET" == "0" ] if [ "$TYPE" == "DSPLY" ] || [ "$TYPE" == "ERROR" ] || [ "$TYPE" == "WARN" ] && [ "$QUIET" == "0" ]
then then
echo -e "$ECHO_MSG" echo -e "$ECHO_MSG"
elif [ "$TYPE" == "ERROR" ] && [ "$QUIET" == "1" ] elif [ "$TYPE" == "ERROR" ] && [ "$QUIET" == "1" ]
then then
echo -e "$ECHO_MSG" echo -e "$ECHO_MSG"
@ -794,17 +859,28 @@ log () {
# Init all vars # Init all vars
init_vars () { init_vars () {
echo "test" | $MD5 > /dev/null 2>&1 #
if [ ! "$?" == "0" ] # Get start time to measure how long PPSS has been running.
then #
echo "ERROR - PPSS requires $MD5. It may not be within the path or installed." START_PPSS=`get_time_in_seconds`
fi
#
# Check if MD5(SUM) is present on the system.
#
set_md5
#
# Is PPSS run as a daemon? Then use input locking, which is not required otherwise.
#
if [ "$DAEMON" == "1" ] if [ "$DAEMON" == "1" ]
then then
INPUT_LOCK="$SRC_DIR/INPUT_LOCK" INPUT_LOCK="$SRC_DIR/INPUT_LOCK"
fi fi
#
# For some strange reason, this value differ on different operating systems due to
# different behaviour betwen the ps utilily acros operating systems.
#
if [ "$ARCH" == "Darwin" ] if [ "$ARCH" == "Darwin" ]
then then
MIN_JOBS=4 MIN_JOBS=4
@ -813,29 +889,16 @@ init_vars () {
MIN_JOBS=3 MIN_JOBS=3
fi fi
if [ -e "$LOGFILE" ] #
then # Create a remote homedir for PPSS
rm $LOGFILE #
fi
does_file_exist "$PPSS_HOME_DIR" does_file_exist "$PPSS_HOME_DIR"
if [ ! "$?" == "0" ] && [ ! -z "$SSH_SERVER" ] if [ ! "$?" = "0" ] && [ ! -z "$SSH_SERVER" ]
then then
log DEBUG "Remote PPSS home directory $PPSS_HOME_DIR does not exist. Creating." log DEBUG "Remote PPSS home directory $PPSS_HOME_DIR does not exist. Creating."
exec_cmd "mkdir -p $PPSS_HOME_DIR/$PPSS_DIR" exec_cmd "mkdir -p $PPSS_HOME_DIR/$PPSS_DIR"
fi fi
display_header
if [ -z "$COMMAND" ]
then
echo
log ERROR "No command specified."
echo
showusage_normal
cleanup
exit 1
fi
echo 1 > $GLOBAL_COUNTER_FILE echo 1 > $GLOBAL_COUNTER_FILE
@ -876,7 +939,7 @@ init_vars () {
if [ ! -z "$SSH_SERVER" ] if [ ! -z "$SSH_SERVER" ]
then then
does_file_exist "$PPSS_HOME_DIR/$JOB_LOG_DIR" does_file_exist "$PPSS_HOME_DIR/$JOB_LOG_DIR"
if [ ! "$?" == "0" ] if [ ! "$?" = "0" ]
then then
log DEBUG "Remote Job log directory $PPSS_HOME_DIR/$JOB_lOG_DIR does not exist. Creating." log DEBUG "Remote Job log directory $PPSS_HOME_DIR/$JOB_lOG_DIR does not exist. Creating."
exec_cmd "mkdir $PPSS_HOME_DIR/$JOB_LOG_DIR" exec_cmd "mkdir $PPSS_HOME_DIR/$JOB_LOG_DIR"
@ -894,7 +957,7 @@ init_vars () {
fi fi
does_file_exist "$ITEM_LOCK_DIR" does_file_exist "$ITEM_LOCK_DIR"
if [ ! "$?" == "0" ] if [ ! "$?" = "0" ]
then then
if [ ! -z "$SSH_SERVER" ] if [ ! -z "$SSH_SERVER" ]
then then
@ -903,7 +966,7 @@ init_vars () {
log DEBUG "Creating local item lock dir." log DEBUG "Creating local item lock dir."
fi fi
exec_cmd "mkdir $ITEM_LOCK_DIR" exec_cmd "mkdir $ITEM_LOCK_DIR"
if [ ! "$?" == "0" ] if [ ! "$?" ]
then then
log DEBUG "Failed to create item lock dir." log DEBUG "Failed to create item lock dir."
fi fi
@ -912,7 +975,7 @@ init_vars () {
if [ ! -z "$SSH_SERVER" ] if [ ! -z "$SSH_SERVER" ]
then then
does_file_exist "$REMOTE_OUTPUT_DIR" does_file_exist "$REMOTE_OUTPUT_DIR"
if [ ! "$?" == "0" ] if [ ! "$?" = "0" ]
then then
log DEBUG "Remote output dir $REMOTE_OUTPUT_DIR does not exist." log DEBUG "Remote output dir $REMOTE_OUTPUT_DIR does not exist."
exec_cmd "mkdir $REMOTE_OUTPUT_DIR" exec_cmd "mkdir $REMOTE_OUTPUT_DIR"
@ -1180,7 +1243,7 @@ get_no_of_cpus () {
then then
if [ "$ARCH" == "Linux" ] if [ "$ARCH" == "Linux" ]
then then
NUMBER=`grep ^processor $CPUINFO | wc -l` NUMBER=`grep -c ^processor $CPUINFO`
got_cpu_info "$?" got_cpu_info "$?"
elif [ "$ARCH" == "Darwin" ] elif [ "$ARCH" == "Darwin" ]
@ -1195,12 +1258,12 @@ get_no_of_cpus () {
elif [ "$ARCH" == "SunOS" ] elif [ "$ARCH" == "SunOS" ]
then then
NUMBER=`psrinfo | grep on-line | wc -l` NUMBER=`psrinfo | grep -c on-line`
got_cpu_info "$?" got_cpu_info "$?"
else else
if [ -e "$CPUINFO" ] if [ -e "$CPUINFO" ]
then then
NUMBER=`grep ^processor $CPUINFO | wc -l` NUMBER=`grep -c ^processor $CPUINFO`
got_cpu_info "$?" got_cpu_info "$?"
fi fi
fi fi
@ -1217,7 +1280,7 @@ get_no_of_cpus () {
if [ "$ARCH" == "Linux" ] if [ "$ARCH" == "Linux" ]
then then
PHYSICAL=`grep 'physical id' $CPUINFO` PHYSICAL=`grep 'physical id' $CPUINFO`
if [ "$?" == "0" ] if [ "$?" ]
then then
PHYSICAL=`grep 'physical id' $CPUINFO | sort | uniq | wc -l` PHYSICAL=`grep 'physical id' $CPUINFO | sort | uniq | wc -l`
if [ "$PHYSICAL" == "1" ] if [ "$PHYSICAL" == "1" ]
@ -1228,7 +1291,7 @@ get_no_of_cpus () {
fi fi
TMP=`grep 'core id' $CPUINFO` TMP=`grep 'core id' $CPUINFO`
if [ "$?" == "0" ] if [ "$?" ]
then then
log DEBUG "Starting job only for each physical core on all physical CPU(s)." log DEBUG "Starting job only for each physical core on all physical CPU(s)."
NUMBER=`grep 'core id' $CPUINFO | sort | uniq | wc -l` NUMBER=`grep 'core id' $CPUINFO | sort | uniq | wc -l`
@ -1240,7 +1303,7 @@ get_no_of_cpus () {
fi fi
else else
log INFO "No 'physical id' section found in $CPUINFO, typical for older cpus." log INFO "No 'physical id' section found in $CPUINFO, typical for older cpus."
NUMBER=`grep ^processor $CPUINFO | wc -l` NUMBER=`grep -c ^processor $CPUINFO`
got_cpu_info "$?" got_cpu_info "$?"
fi fi
elif [ "$ARCH" == "Darwin" ] elif [ "$ARCH" == "Darwin" ]
@ -1312,7 +1375,7 @@ get_global_lock () {
ERROR="$?" ERROR="$?"
if [ ! "$ERROR" == "0" ] if [ ! "$ERROR" == "0" ]
then then
random_delay $MAX_LOCK_DELAY #random_delay $MAX_LOCK_DELAY
continue continue
else else
break break
@ -1457,24 +1520,16 @@ lock_item () {
ITEM="$1" ITEM="$1"
LOCK_FILE_NAME=`echo "$ITEM" | $MD5 | awk '{ print $1 }'` LOCK_FILE_NAME=`echo "$ITEM" | $MD5 | awk '{ print $1 }'`
ITEM_LOCK_FILE="$ITEM_LOCK_DIR/$LOCK_FILE_NAME" ITEM_LOCK_FILE="$ITEM_LOCK_DIR/$LOCK_FILE_NAME"
log DEBUG "Trying to lock item $ITEM - $ITEM_LOCK_FILE."
exec_cmd "mkdir $ITEM_LOCK_FILE >> /dev/null 2>&1" exec_cmd "mkdir $ITEM_LOCK_FILE >> /dev/null 2>&1"
ERROR="$?" return "$?"
if [ "$ERROR" == "$?" ]
then
exec_cmd "touch $ITEM_LOCK_FILE/$HOSTNAME" # Record that item is claimed by node x.
fi
return "$ERROR"
} }
get_input_lock () { get_input_lock () {
while true while true
do do
exec_cmd "mkdir $INPUT_LOCK >> /dev/null 2>&1 " exec_cmd "mkdir $INPUT_LOCK >> /dev/null 2>&1 "
if [ "$?" == "0" ] if [ "$?" ]
then then
log DEBUG "Input lock is obtained..." log DEBUG "Input lock is obtained..."
break break
@ -1488,7 +1543,7 @@ get_input_lock () {
release_input_lock () { release_input_lock () {
exec_cmd "rm -rf $INPUT_LOCK" exec_cmd "rm -rf $INPUT_LOCK"
if [ "$?" == "0" ] if [ "$?" ]
then then
log DEBUG "Input lock was released..." log DEBUG "Input lock was released..."
return 0 return 0
@ -1544,6 +1599,7 @@ get_all_items () {
cleanup cleanup
exit 1 exit 1
fi fi
else else
ITEMS="" ITEMS=""
fi fi
@ -1636,8 +1692,13 @@ get_item () {
else else
((GLOBAL_COUNTER++)) ((GLOBAL_COUNTER++))
echo $GLOBAL_COUNTER > $GLOBAL_COUNTER_FILE echo $GLOBAL_COUNTER > $GLOBAL_COUNTER_FILE
if [ "$DISABLE_ITEM_LOCK" == "0" ]
then
lock_item "$ITEM" lock_item "$ITEM"
if [ ! "$?" == "0" ] else
log DEBUG "Item lock disabled."
fi
if [ ! "$?" ]
then then
log DEBUG "Item $ITEM is locked." log DEBUG "Item $ITEM is locked."
release_global_lock release_global_lock
@ -1701,16 +1762,11 @@ elapsed () {
SECS="$(expr $REMAINDER % 60)" SECS="$(expr $REMAINDER % 60)"
MINS="$(expr $(expr $REMAINDER - $SECS) / 60)" MINS="$(expr $(expr $REMAINDER - $SECS) / 60)"
RES=`printf 'Total processing time (hh:mm:ss): %02d:%02d:%02d' $HOURS $MINS $SECS` RES=$(printf "Total processing time (hh:mm:ss): %02d:%02d:%02d" $HOURS $MINS $SECS)
log DSPLY "$RES" log DSPLY "$RES"
} }
commando () { commando () {
log DEBUG "-------------------------------------"
if [ "$DAEMON" == "1" ]
then
log INFO "Processing item: $1 in DAEMON MODE"
fi
# #
# This function will start a chain reaction of events. # This function will start a chain reaction of events.
@ -1734,7 +1790,6 @@ commando () {
# #
ITEM="$1" ITEM="$1"
if [ "$RECURSION" == "1" ] if [ "$RECURSION" == "1" ]
then then
escape_item "$ITEM" escape_item "$ITEM"
@ -1766,11 +1821,7 @@ commando () {
DIR_NAME="$SRC_DIR" DIR_NAME="$SRC_DIR"
ITEM_NO_PATH="$ITEM" ITEM_NO_PATH="$ITEM"
OUTPUT_DIR="$PPSS_LOCAL_OUTPUT" OUTPUT_DIR="$PPSS_LOCAL_OUTPUT"
fi fi
#
# OUTPUT_DIR can be used in scripts or command lines.
#
else else
VIRTUAL="1" VIRTUAL="1"
DIR_NAME="" DIR_NAME=""
@ -1781,12 +1832,17 @@ commando () {
OUTPUT_FILE="$ITEM_NO_PATH" OUTPUT_FILE="$ITEM_NO_PATH"
log DEBUG "Processing item: $ITEM" #
log DEBUG "ITEM_NO_PATH is $ITEM_NO_PATH" # The following lines should only be enabled for debugging.
log DEBUG "Dirname is $DIR_NAME" #
log DEBUG "OUTPUT DIR IS $OUTPUT_DIR" #log DEBUG "Processing item: $ITEM"
log DEBUG "Virtual is $VIRTUAL" #log DEBUG "ITEM_NO_PATH is $ITEM_NO_PATH"
log DEBUG "OUTPUT FILE is $OUTPUT_FILE" #log DEBUG "Dirname is $DIR_NAME"
#log DEBUG "OUTPUT DIR IS $OUTPUT_DIR"
#log DEBUG "Virtual is $VIRTUAL"
#log DEBUG "OUTPUT FILE is $OUTPUT_FILE"
#
# #
# Decide if an item must be transfered from server to the node. # Decide if an item must be transfered from server to the node.
# or be processed in-place (NFS / SMB mount?) # or be processed in-place (NFS / SMB mount?)
@ -1818,7 +1874,6 @@ commando () {
# #
# Create the log file containing the output of the command. # Create the log file containing the output of the command.
# #
#LOG_FILE_NAME=`echo "$ITEM" | sed s/^\\\.//g | sed s/^\\\.\\\.//g | sed s/\\\///g | sed s/\\ /_/g`
LOG_FILE_NAME=`echo "$ITEM" | $MD5 | awk '{ print $1 }'` LOG_FILE_NAME=`echo "$ITEM" | $MD5 | awk '{ print $1 }'`
ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME" ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME"
@ -1839,11 +1894,17 @@ commando () {
# If the item is virtual, the item can contain special characters. # If the item is virtual, the item can contain special characters.
# These characters are stripted from the log file name, so this is used. # These characters are stripted from the log file name, so this is used.
# #
OUTPUT_DIR=$PPSS_LOCAL_OUTPUT/"$LOG_FILE_NAME" OUTPUT_DIR="$PPSS_LOCAL_OUTPUT"
fi fi
log DEBUG "Local output dir is $OUTPUT_DIR" log DEBUG "Local output dir is $OUTPUT_DIR"
#
# FIXME!
#
if [ "$PPSS_OUTPUT" == "1" ]
then
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
fi
ERROR="" ERROR=""
# #
@ -1863,7 +1924,7 @@ commando () {
# #
BEFORE=`get_time_in_seconds` BEFORE=`get_time_in_seconds`
TMP=`echo $COMMAND | grep -i '$ITEM'` TMP=`echo $COMMAND | grep -i '$ITEM'`
if [ "$?" == "0" ] if [ "$?" ]
then then
eval "$COMMAND" >> "$ITEM_LOG_FILE" 2>&1 eval "$COMMAND" >> "$ITEM_LOG_FILE" 2>&1
ERROR="$?" ERROR="$?"
@ -1933,7 +1994,7 @@ commando () {
then then
log DEBUG "Uploading item log file $ITEM_LOG_FILE to master $PPSS_HOME_DIR/$JOB_LOG_DIR" log DEBUG "Uploading item log file $ITEM_LOG_FILE to master $PPSS_HOME_DIR/$JOB_LOG_DIR"
scp -q $SSH_OPTS $SSH_KEY "$ITEM_LOG_FILE" $USER@$SSH_SERVER:$PPSS_HOME_DIR/$JOB_LOG_DIR scp -q $SSH_OPTS $SSH_KEY "$ITEM_LOG_FILE" $USER@$SSH_SERVER:$PPSS_HOME_DIR/$JOB_LOG_DIR
if [ ! "$?" == "0" ] if [ ! "$?" ]
then then
log DEBUG "Uploading of item log file failed." log DEBUG "Uploading of item log file failed."
fi fi
@ -1983,7 +2044,7 @@ listen_for_job () {
RES=$((MAX_NO_OF_RUNNING_JOBS-DIED)) RES=$((MAX_NO_OF_RUNNING_JOBS-DIED))
if [ "$RES" == "1" ] && [ "$QUIET" == "0" ] if [ "$RES" == "1" ] && [ "$QUIET" == "0" ]
then then
log PRCNT "$((MAX_NO_OF_RUNNING_JOBS-DIED)) job is remaining. \n" log PRCNT "$((MAX_NO_OF_RUNNING_JOBS-DIED)) job is remaining. "
elif [ "$QUIET" == "0" ] elif [ "$QUIET" == "0" ]
then then
if [ "$DIED" == "1" ] if [ "$DIED" == "1" ]
@ -2004,7 +2065,6 @@ listen_for_job () {
# spawned, although disowned or backgrounded will be killed... # spawned, although disowned or backgrounded will be killed...
# #
PROCLIST=`ps a -o pid,pgid,ppid,command | grep [0-9] | grep $PID | grep -v -i grep` PROCLIST=`ps a -o pid,pgid,ppid,command | grep [0-9] | grep $PID | grep -v -i grep`
#echo "$PROCLIST" > proclist.txt
oldIFS=$IFS # save the field separator oldIFS=$IFS # save the field separator
IFS=$'\n' # new field separator, the end of line IFS=$'\n' # new field separator, the end of line
for x in `echo "$PROCLIST"` for x in `echo "$PROCLIST"`
@ -2177,7 +2237,7 @@ show_status () {
then then
STATUS=`get_status_of_node "$x" | awk '{ print $2 }'` STATUS=`get_status_of_node "$x" | awk '{ print $2 }'`
RES=`exec_cmd "grep -i $NODE ~/$PPSS_HOME_DIR/$JOB_LOG_DIR/* 2>/dev/null | wc -l " 1` RES=`exec_cmd "grep -i $NODE ~/$PPSS_HOME_DIR/$JOB_LOG_DIR/* 2>/dev/null | wc -l " 1`
if [ ! "$?" == "0" ] || [ -z "$RES" ] if [ ! "$?" ] || [ -z "$RES" ]
then then
RES=0 RES=0
fi fi
@ -2316,6 +2376,7 @@ main () {
* ) * )
create_working_directory create_working_directory
display_header
init_vars init_vars
get_all_items get_all_items
listen_for_job "$MAX_NO_OF_RUNNING_JOBS" & 2>&1 >> /dev/null listen_for_job "$MAX_NO_OF_RUNNING_JOBS" & 2>&1 >> /dev/null
@ -2324,12 +2385,25 @@ main () {
;; ;;
esac esac
} }
#
# This command starts the that sets the whole framework in motion.
#
main
# if ! are_we_sourced
# Exit after all processes have finished. then
#
wait #
# First step: process all command-line arguments.
#
process_arguments "$@"
#
# This command starts the that sets the whole framework in motion.
# But only if the file is not sourced.
#
main
#
# Exit after all processes have finished.
#
wait
fi