Switched ppss over to md5 checksumming. (backup)

This commit is contained in:
Louwrentius 2010-05-14 02:07:43 +00:00
parent 825d7ed0a1
commit 61d8a24ed2
2 changed files with 147 additions and 135 deletions

272
ppss
View File

@ -25,7 +25,7 @@ trap 'kill_process' SIGINT
# Setting some vars. # Setting some vars.
SCRIPT_NAME="Distributed Parallel Processing Shell Script" SCRIPT_NAME="Distributed Parallel Processing Shell Script"
SCRIPT_VERSION="2.63" SCRIPT_VERSION="2.65"
# The first argument to this script can be a mode. # The first argument to this script can be a mode.
MODES="node start config stop pause continue deploy status erase kill ec2" MODES="node start config stop pause continue deploy status erase kill ec2"
@ -119,6 +119,20 @@ ITEM_ESCAPED=""
NODE_STATUS="$PPSS_DIR/status.txt" NODE_STATUS="$PPSS_DIR/status.txt"
DAEMON=0 DAEMON=0
case $ARCH in
"Darwin") MD5=md5 ;;
"FreeBSD") MD5=md5 ;;
"SunOS") MD5="diget -a md5" ;;
"Linux") MD5=md5sum ;;
esac
if [ "$ARCH" == "Darwin" ] || [ "$ARCH" == "FreeBSD" ]
then
MD5=md5
else
MD5=$MD5
fi
showusage_short () { showusage_short () {
echo echo
@ -778,6 +792,12 @@ log () {
# Init all vars # Init all vars
init_vars () { init_vars () {
echo "test" | $MD5 > /dev/null 2>&1
if [ ! "$?" == "0" ]
then
echo "ERROR - PPSS requires $MD5. It may not be within the path or installed."
fi
create_working_directory create_working_directory
if [ "$DAEMON" == "1" ] if [ "$DAEMON" == "1" ]
@ -797,6 +817,13 @@ init_vars () {
then then
rm $LOGFILE rm $LOGFILE
fi fi
does_file_exist "$PPSS_HOME_DIR"
if [ ! "$?" == "0" ] && [ ! -z "$SSH_SERVER" ]
then
log DEBUG "Remote PPSS home directory $PPSS_HOME_DIR does not exist. Creating."
exec_cmd "mkdir -p $PPSS_HOME_DIR/$PPSS_DIR"
fi
display_header display_header
@ -846,30 +873,38 @@ init_vars () {
get_no_of_cpus $HYPERTHREADING get_no_of_cpus $HYPERTHREADING
fi fi
does_file_exist "$PPSS_HOME_DIR" if [ ! -z "$SSH_SERVER" ]
if [ ! "$?" == "0" ]
then then
log DEBUG "Remote PPSS home directory $PPSS_HOME_DIR does not exist. Creating." does_file_exist "$PPSS_HOME_DIR/$JOB_LOG_DIR"
exec_cmd "mkdir -p $PPSS_HOME_DIR/$PPSS_DIR" if [ ! "$?" == "0" ]
then
log DEBUG "Remote Job log directory $JOB_lOG_DIR does not exist. Creating."
exec_cmd "mkdir $PPSS_HOME_DIR/$JOB_LOG_DIR"
fi
elif [ ! -e "$JOB_LOG_DIR" ]
then
mkdir -p "$JOB_LOG_DIR"
fi fi
does_file_exist "$PPSS_HOME_DIR/$JOB_LOG_DIR" if [ -z "$SSH_SERVER" ]
if [ ! "$?" == "0" ]
then then
log DEBUG "Remote Job log directory $JOB_lOG_DIR does not exist. Creating." ITEM_LOCK_DIR="$PPSS_DIR/ITEM_LOCK_DIR"
exec_cmd "mkdir $PPSS_HOME_DIR/$JOB_LOG_DIR"
fi fi
does_file_exist "$ITEM_LOCK_DIR" does_file_exist "$ITEM_LOCK_DIR"
if [ ! "$?" == "0" ] if [ ! "$?" == "0" ]
then then
log DEBUG "Creating remote item lock dir." if [ ! -z "$SSH_SERVER" ]
then
log DEBUG "Creating remote item lock dir."
else
log DEBUG "Creating local item lock dir."
fi
exec_cmd "mkdir $ITEM_LOCK_DIR" exec_cmd "mkdir $ITEM_LOCK_DIR"
fi if [ ! "$?" == "0" ]
then
if [ ! -e "$JOB_LOG_DIR" ] log DEBUG "Failed to create item lock dir."
then fi
mkdir -p "$JOB_LOG_DIR"
fi fi
if [ ! -z "$SSH_SERVER" ] if [ ! -z "$SSH_SERVER" ]
@ -1415,35 +1450,18 @@ upload_item () {
lock_item () { lock_item () {
if [ ! -z "$SSH_SERVER" ] ITEM="$1"
LOCK_FILE_NAME=`echo "$ITEM" | $MD5 | awk '{ print $1 }'`
ITEM_LOCK_FILE="$ITEM_LOCK_DIR/$LOCK_FILE_NAME"
log DEBUG "Trying to lock item $ITEM - $ITEM_LOCK_FILE."
exec_cmd "mkdir $ITEM_LOCK_FILE >> /dev/null 2>&1"
ERROR="$?"
if [ "$ERROR" == "$?" ]
then then
ITEM="$1" exec_cmd "touch $ITEM_LOCK_FILE/$HOSTNAME" # Record that item is claimed by node x.
LOCK_FILE_NAME=`echo "$ITEM" | \
sed s/^\\\.//g | \
sed s/^\\\.\\\.//g | \
sed s/^\\\///g | \
sed s/\\\//\\\\\\ /g | \
sed s/\\ /\\\\\\\\\\\\\\ /g | \
sed s/\\'/\\\\\\\\\\\\\\'/g | \
sed s/\\\//\\\\\\\\\\\\\\ /g | \
sed s/\&/\\\\\\\\\\\\\\&/g | \
sed s/\;/\\\\\\\\\\\\\\;/g | \
sed s/\(/\\\\\\\\\\(/g | \
sed s/\)/\\\\\\\\\\)/g `
ITEM_LOCK_FILE="$ITEM_LOCK_DIR/$LOCK_FILE_NAME"
log DEBUG "Trying to lock item $ITEM - $ITEM_LOCK_FILE."
exec_cmd "mkdir $ITEM_LOCK_FILE >> /dev/null 2>&1"
ERROR="$?"
if [ "$ERROR" == "$?" ]
then
exec_cmd "touch $ITEM_LOCK_FILE/$HOSTNAME" # Record that item is claimed by node x.
fi
return "$ERROR"
fi fi
return "$ERROR"
} }
get_input_lock () { get_input_lock () {
@ -1800,7 +1818,8 @@ commando () {
# #
# Create the log file containing the output of the command. # Create the log file containing the output of the command.
# #
LOG_FILE_NAME=`echo "$ITEM" | sed s/^\\\.//g | sed s/^\\\.\\\.//g | sed s/\\\///g | sed s/\\ /_/g` #LOG_FILE_NAME=`echo "$ITEM" | sed s/^\\\.//g | sed s/^\\\.\\\.//g | sed s/\\\///g | sed s/\\ /_/g`
LOG_FILE_NAME=`echo "$ITEM" | $MD5 | awk '{ print $1 }'`
ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME" ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME"
# #
@ -1826,104 +1845,97 @@ commando () {
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
ERROR=""
does_file_exist "$ITEM_LOG_FILE" #
if [ "$?" == "0" ] # Some formatting of item log files.
then #
log DEBUG "Skipping item $ITEM - already processed." DATE=`date +%b\ %d\ %H:%M:%S`
echo "===== PPSS Item Log File =====" > "$ITEM_LOG_FILE"
echo -e "Host:\t\t$HOSTNAME" >> "$ITEM_LOG_FILE"
echo -e "Process:\t$PID" >> "$ITEM_LOG_FILE"
echo -e "Item:\t\t$ITEM" >> "$ITEM_LOG_FILE"
echo -e "Start date:\t$DATE" >> "$ITEM_LOG_FILE"
echo -e "" >> "$ITEM_LOG_FILE"
#
# The actual execution of the command as specified by
# the -c option.
#
BEFORE=`get_time_in_seconds`
TMP=`echo $COMMAND | grep -i '$ITEM'`
if [ "$?" == "0" ]
then
eval "$COMMAND" >> "$ITEM_LOG_FILE" 2>&1
ERROR="$?"
MYPID="$!"
else else
ERROR="" eval '$COMMAND"$ITEM" >> "$ITEM_LOG_FILE" 2>&1'
# ERROR="$?"
# Some formatting of item log files. MYPID="$!"
# fi
DATE=`date +%b\ %d\ %H:%M:%S` AFTER=`get_time_in_seconds`
echo "===== PPSS Item Log File =====" > "$ITEM_LOG_FILE"
echo -e "Host:\t\t$HOSTNAME" >> "$ITEM_LOG_FILE"
echo -e "Process:\t$PID" >> "$ITEM_LOG_FILE"
echo -e "Item:\t\t$ITEM" >> "$ITEM_LOG_FILE"
echo -e "Start date:\t$DATE" >> "$ITEM_LOG_FILE"
echo -e "" >> "$ITEM_LOG_FILE"
#
# The actual execution of the command as specified by
# the -c option.
#
BEFORE=`get_time_in_seconds`
TMP=`echo $COMMAND | grep -i '$ITEM'`
if [ "$?" == "0" ]
then
eval "$COMMAND" >> "$ITEM_LOG_FILE" 2>&1
ERROR="$?"
MYPID="$!"
else
eval '$COMMAND"$ITEM" >> "$ITEM_LOG_FILE" 2>&1'
ERROR="$?"
MYPID="$!"
fi
AFTER=`get_time_in_seconds`
echo -e "" >> "$ITEM_LOG_FILE" echo -e "" >> "$ITEM_LOG_FILE"
# Some error logging. Success or fail. # Some error logging. Success or fail.
if [ ! "$ERROR" == "0" ] if [ ! "$ERROR" == "0" ]
then
echo -e "Status:\t\tFAILURE" >> "$ITEM_LOG_FILE"
else
echo -e "Status:\t\tSUCCESS" >> "$ITEM_LOG_FILE"
fi
#
# If part of a cluster, remove the downloaded item after
# it has been processed and uploaded as not to fill up disk space.
#
if [ "$DOWNLOAD_TO_NODE" == "1" ]
then
if [ -e "$ITEM" ]
then then
echo -e "Status:\t\tFAILURE" >> "$ITEM_LOG_FILE" rm -rf "$ITEM"
else else
echo -e "Status:\t\tSUCCESS" >> "$ITEM_LOG_FILE" log DEBUG "There is no local file to remove.. strange..."
fi fi
# fi
# If part of a cluster, remove the downloaded item after
# it has been processed and uploaded as not to fill up disk space.
#
if [ "$DOWNLOAD_TO_NODE" == "1" ]
then
if [ -e "$ITEM" ]
then
rm -rf "$ITEM"
else
log DEBUG "There is no local file to remove.. strange..."
fi
#
# Create remote output dir and transfer output to server.
#
escape_item "$DIR_NAME"
ITEM_OUTPUT_DIR="$REMOTE_OUTPUT_DIR/$ITEM_ESCAPED"
if [ "$DOWNLOAD_TO_NODE" == "0" ]
then
log DEBUG "Download to node is disabled."
else
if [ "$DIR_NAME" == "." ]
then
DIR_NAME=""
fi fi
fi
# #
# Create remote output dir and transfer output to server. # Upload the output file back to the server.
# #
escape_item "$DIR_NAME"
ITEM_OUTPUT_DIR="$REMOTE_OUTPUT_DIR/$ITEM_ESCAPED"
if [ "$DOWNLOAD_TO_NODE" == "0" ] upload_item "$OUTPUT_DIR" "$DIR_NAME"
#
# Upload the log file to the server.
#
elapsed "$BEFORE" "$AFTER" >> "$ITEM_LOG_FILE"
echo -e "" >> "$ITEM_LOG_FILE"
if [ ! -z "$SSH_SERVER" ]
then
log DEBUG "Uploading item log file $ITEM_LOG_FILE to master $PPSS_HOME_DIR/$JOB_LOG_DIR"
scp -q $SSH_OPTS $SSH_KEY "$ITEM_LOG_FILE" $USER@$SSH_SERVER:$PPSS_HOME_DIR/$JOB_LOG_DIR
if [ ! "$?" == "0" ]
then then
log DEBUG "Download to node is disabled." log DEBUG "Uploading of item log file failed."
else
if [ "$DIR_NAME" == "." ]
then
DIR_NAME=""
fi
fi
#
# Upload the output file back to the server.
#
upload_item "$OUTPUT_DIR" "$DIR_NAME"
#
# Upload the log file to the server.
#
elapsed "$BEFORE" "$AFTER" >> "$ITEM_LOG_FILE"
echo -e "" >> "$ITEM_LOG_FILE"
if [ ! -z "$SSH_SERVER" ]
then
log DEBUG "Uploading item log file $ITEM_LOG_FILE to master $PPSS_HOME_DIR/$JOB_LOG_DIR"
scp -q $SSH_OPTS $SSH_KEY "$ITEM_LOG_FILE" $USER@$SSH_SERVER:$PPSS_HOME_DIR/$JOB_LOG_DIR
if [ ! "$?" == "0" ]
then
log DEBUG "Uploading of item log file failed."
fi
fi fi
fi fi

View File

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
DEBUG="$1" DEBUG="$1"
VERSION="2.63" VERSION="2.65"
TMP_DIR="ppss" TMP_DIR="ppss"
PPSS=./ppss PPSS=./ppss
PPSS_DIR=ppss_dir PPSS_DIR=ppss_dir
@ -170,8 +170,8 @@ testSkippingOfProcessedItems () {
assertEquals "PPSS did not execute properly." 0 "$?" assertEquals "PPSS did not execute properly." 0 "$?"
assertNull "PPSS retured some errors..." "$RES" assertNull "PPSS retured some errors..." "$RES"
grep -i skip ./$PPSS_DIR/* >> /dev/null 2>&1 RES=`grep -c -i locked ./$PPSS_DIR/ppss-log* | tail -n 1 | cut -d ":" -f 2`
assertEquals "Skipping of items went wrong." 0 "$?" assertEquals "Skipping of items went wrong." 2 "$RES"
rename-ppss-dir $FUNCNAME-1 rename-ppss-dir $FUNCNAME-1
@ -183,8 +183,8 @@ testSkippingOfProcessedItems () {
assertEquals "PPSS did not execute properly." 0 "$?" assertEquals "PPSS did not execute properly." 0 "$?"
assertNull "PPSS retured some errors..." "$RES" assertNull "PPSS retured some errors..." "$RES"
grep -i skip ./$PPSS_DIR/* >> /dev/null 2>&1 RES=`grep -c -i locked ./$PPSS_DIR/ppss-log* | tail -n 1 | cut -d ":" -f 2`
assertEquals "Skipping of items went wrong." 0 "$?" assertEquals "Skipping of items went wrong." 8 "$RES"
rm -rf "/tmp/$TMP_DIR" rm -rf "/tmp/$TMP_DIR"
rename-ppss-dir $FUNCNAME-2 rename-ppss-dir $FUNCNAME-2