Distributed ppss almost finished

This commit is contained in:
Louwrentius 2009-01-25 03:36:21 +00:00
parent 1bc583a5e1
commit be8bf0886c

67
ppss.sh
View File

@ -42,9 +42,9 @@ SCRIPT_NAME="Parallel Processing Shell Script"
SCRIPT_VERSION="1.10" SCRIPT_VERSION="1.10"
RUNNING_SIGNAL="$0_is_running" RUNNING_SIGNAL="$0_is_running"
GLOBAL_LOCK="PPSS-$RANDOM-$RANDOM" GLOBAL_LOCK="PPSS-GLOBAL-LOCK"
PAUSE_SIGNAL="pause.txt" PAUSE_SIGNAL="pause.txt"
ARRAY_POINTER_FILE="array-pointer-$RANDOM-$RANDOM" ARRAY_POINTER_FILE="ppss-array-pointer"
JOB_LOG_DIR="JOB_LOG" JOB_LOG_DIR="JOB_LOG"
LOGFILE="ppss-log.txt" LOGFILE="ppss-log.txt"
MAX_DELAY=2 MAX_DELAY=2
@ -55,7 +55,7 @@ IFS_BACKUP="$IFS"
SSH_SERVER="" # Remote server or 'master'. SSH_SERVER="" # Remote server or 'master'.
SSH_KEY="" # SSH key for ssh account. SSH_KEY="" # SSH key for ssh account.
SSH_OPTS="-o \\"BatchMode=yes\\" -o \\"ControlPath /tmp/master-%r@%h:%p\\" -o \\"ControlMaster auto\\"" SSH_OPTS="-o BatchMode=yes -o ControlPath=/tmp/master-%r@%h:%p -o ControlMaster=auto -o ConnectTimeout=5"
SSH_MASTER_PID="" SSH_MASTER_PID=""
showusage () { showusage () {
@ -109,17 +109,17 @@ kill_process () {
else else
cleanup cleanup
echo -en "\033[1B" echo -en "\033[1B"
# The master SSH connection should be killed.
if [ ! -z "$SSH_MASTER_PID" ]
then
kill -9 "$SSH_MASTER_PID"
fi
log INFO "Finished." log INFO "Finished."
echo "" echo ""
exit 0 exit 0
fi fi
done done
# The master SSH connection should be killed.
if [ ! -z "$SSH_MASTER_PID" ]
then
kill -9 "$SSH_MASTER_PID"
fi
} }
@ -132,14 +132,14 @@ cleanup () {
rm $FIFO rm $FIFO
fi fi
if [ -e "$ARRAY_POINTER_FILE" ] && [ -z "$SSH_SERVER" ] if [ -e "$ARRAY_POINTER_FILE" ]
then then
rm $ARRAY_POINTER_FILE rm $ARRAY_POINTER_FILE
fi fi
if [ -e "$GLOBAL_LOCK" ] && [ -z "$SSH_SERVER" ] if [ -e "$GLOBAL_LOCK" ]
then then
rm -rf "$GLOBAL_LOCK" rm -rf $GLOBAL_LOCK
fi fi
if [ -e "$RUNNING_SIGNAL" ] if [ -e "$RUNNING_SIGNAL" ]
@ -242,12 +242,11 @@ done
# This function makes local and remote operation transparent. # This function makes local and remote operation transparent.
exec_cmd () { exec_cmd () {
CMD="$1" CMD="eval $1"
if [ ! -z "$SSH_SERVER" ] if [ ! -z "$SSH_SERVER" ]
then then
#ssh "$SSH_OPTS" "$SSH_KEY" "$SSH_SERVER" eval "$CMD" ssh $SSH_OPTS $SSH_KEY $SSH_SERVER $CMD
ssh "$SSH_SERVER" "$CMD"
else else
eval "$CMD" eval "$CMD"
fi fi
@ -257,7 +256,13 @@ exec_cmd () {
does_file_exist () { does_file_exist () {
FILE="$1" FILE="$1"
exec_cmd "if [ -e \"$FILE\" ]; then return 0; else return 1; fi" `exec_cmd "ls -1 $FILE >> /dev/null 2>&1"`
if [ "$?" == "0" ]
then
return 0
else
return 1
fi
} }
@ -297,12 +302,14 @@ init_vars () {
MAX_NO_OF_RUNNING_JOBS=`get_no_of_cpus $HYPERTHREADING` MAX_NO_OF_RUNNING_JOBS=`get_no_of_cpus $HYPERTHREADING`
fi fi
if [ ! -e "$JOB_LOG_DIR" ] does_file_exist "$JOB_LOG_DIR"
if [ ! "$?" == "0" ]
then then
log INFO "Job log directory $JOB_lOG_DIR does not exist. Creating." log INFO "Job log directory $JOB_lOG_DIR does not exist. Creating."
mkdir "$JOB_LOG_DIR" exec_cmd "mkdir $JOB_LOG_DIR"
mkdir "$JOB_LOG_DIR" >> /dev/null 2>&1
else else
log INFO "Job log directory $JOB_LOG_DIR exists, if it contains logs for items, these items will be skipiped." log INFO "Job log directory $JOB_LOG_DIR exists, if it contains logs for items, these items will be skipped."
fi fi
} }
@ -363,10 +370,10 @@ test_server () {
# Testing if the remote server works as expected. # Testing if the remote server works as expected.
if [ ! -z "$SSH_SERVER" ] if [ ! -z "$SSH_SERVER" ]
then then
exec_cmd "date" exec_cmd "date >> /dev/null"
check_status "$?" "$FUNCNAME" "Server $SSH_SERVER could not be reached" check_status "$?" "$FUNCNAME" "Server $SSH_SERVER could not be reached"
ssh -N -M "$SSH_OPTS" "$SSH_KEY" "$SSH_SERVER" & ssh -N -M $SSH_OPTS $SSH_KEY $SSH_SERVER &
SSH_MASTER_PID="$!" SSH_MASTER_PID="$!"
else else
log DEBUG "No remote server specified, assuming stand-alone mode." log DEBUG "No remote server specified, assuming stand-alone mode."
@ -515,8 +522,8 @@ get_all_items () {
then then
if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a slave?" if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a slave?"
then then
ITEMS=`exec_cmd ls -1 $SRC_DIR` ITEMS=`exec_cmd "ls -1 $SRC_DIR"`
check_status "$FUNCNAME" "Could not list files within remote source directory." check_status "$?" "$FUNCNAME" "Could not list files within remote source directory."
else else
ITEMS=`ls -1 $SRC_DIR` ITEMS=`ls -1 $SRC_DIR`
fi fi
@ -531,8 +538,8 @@ get_all_items () {
else else
if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a slave?" if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a slave?"
then then
scp "$SSH_KEY" "$SSH_SERVER:~/$INPUT_FILE" >> /dev/null 2>&! scp -q "$SSH_KEY" "$SSH_SERVER:~/$INPUT_FILE" >> /dev/null 2>&!
check_status "$FUNCNAME" "Could not copy input file." check_status "$?" "$FUNCNAME" "Could not copy input file."
fi fi
exec 10<$INPUT_FILE exec 10<$INPUT_FILE
@ -627,21 +634,21 @@ commando () {
LOG_FILE_NAME=`echo $ITEM | sed s/^\\.//g | sed s/^\\.\\.//g | sed s/\\\///g` LOG_FILE_NAME=`echo $ITEM | sed s/^\\.//g | sed s/^\\.\\.//g | sed s/\\\///g`
ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME" ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME"
does_file_exist "$ITEM_LOG_FILE" does_file_exist "./$ITEM_LOG_FILE"
if [ "$0" == "0" ] if [ "$?" == "0" ]
then then
log DEBUG "Skipping item $ITEM - already processed." log DEBUG "Skipping item $ITEM - already processed."
else else
EXECME='$COMMAND"$ITEM" > "$ITEM_LOG_FILE" 2>&1' EXECME='$COMMAND"$ITEM" > "./$ITEM_LOG_FILE" 2>&1'
eval "$EXECME" eval "$EXECME"
fi fi
if [ ! -z "$SSH_SERVER" ] if [ ! -z "$SSH_SERVER" ]
then then
get_global_lock #get_global_lock
scp "$SSH_KEY" "$ITEM_LOG_FILE" "$SSH_SERVER:~/$JOB_LOG" scp -q $SSH_OPTS $SSH_KEY $ITEM_LOG_FILE $SSH_SERVER:~/$JOB_LOG_DIR &
release_global_lock #release_global_lock
fi fi
start_single_worker start_single_worker