Parallel deployment of ppss + new mechanism for specifing output files.

This commit is contained in:
Louwrentius 2009-03-13 10:12:10 +00:00
parent ad2c345903
commit 74f2021032
1 changed files with 56 additions and 33 deletions

View File

@ -64,6 +64,7 @@ LISTENER_PID=""
IFS_BACKUP="$IFS" IFS_BACKUP="$IFS"
INTERVAL="30" # Polling interval to check if there are running jobs. INTERVAL="30" # Polling interval to check if there are running jobs.
CPUINFO=/proc/cpuinfo CPUINFO=/proc/cpuinfo
PROCESSORS=""
SSH_SERVER="" # Remote server or 'master'. SSH_SERVER="" # Remote server or 'master'.
SSH_KEY="" # SSH key for ssh account. SSH_KEY="" # SSH key for ssh account.
@ -495,7 +496,13 @@ init_vars () {
if [ -z "$MAX_NO_OF_RUNNING_JOBS" ] if [ -z "$MAX_NO_OF_RUNNING_JOBS" ]
then then
MAX_NO_OF_RUNNING_JOBS=`get_no_of_cpus $HYPERTHREADING` get_no_of_cpus $HYPERTHREADING
fi
if [ -e "$CPUINFO" ]
then
CPU=`cat /proc/cpuinfo | grep 'model name' | cut -d ":" -f 2 | sed -e s/^\ //g | sort | uniq`
log INFO "CPU: $CPU"
fi fi
log INFO "---------------------------------------------------------" log INFO "---------------------------------------------------------"
@ -607,7 +614,9 @@ erase_ppss () {
fi fi
} }
deploy_ppss () { deploy () {
NODE="$1"
ERROR=0 ERROR=0
set_error () { set_error () {
@ -618,6 +627,34 @@ deploy_ppss () {
fi fi
} }
ssh -q $USER@$NODE "mkdir $PPSS_HOME_DIR >> /dev/null 2>&1"
scp -q $SSH_OPTS $0 $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q $KEY $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q $CONFIG $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q known_hosts $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q $SCRIPT $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
if [ ! -z "$INPUT_FILE" ]
then
scp -q $INPUT_FILE $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
fi
if [ "$ERROR" == "0" ]
then
log INFO "PPSS installed on node $NODE."
else
log INFO "PPSS failed to install on $NODE."
fi
}
deploy_ppss () {
if [ -z "$NODES_FILE" ] if [ -z "$NODES_FILE" ]
then then
log INFO "ERROR - are you using the right option? -C ?" log INFO "ERROR - are you using the right option? -C ?"
@ -648,29 +685,7 @@ deploy_ppss () {
else else
for NODE in `cat $NODES_FILE` for NODE in `cat $NODES_FILE`
do do
ssh -q $USER@$NODE "mkdir $PPSS_HOME_DIR >> /dev/null 2>&1" deploy "$NODE" &
scp -q $SSH_OPTS $0 $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q $KEY $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q $CONFIG $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q known_hosts $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q $SCRIPT $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
if [ ! -z "$INPUT_FILE" ]
then
scp -q $INPUT_FILE $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
fi
if [ "$ERROR" == "0" ]
then
log INFO "PPSS installed on node $NODE."
else
log INFO "PPSS failed to install on $NODE."
fi
done done
fi fi
} }
@ -739,27 +754,32 @@ get_no_of_cpus () {
log DEBUG "Found $NUMBER logic processors." log DEBUG "Found $NUMBER logic processors."
elif [ "$HPT" == "no" ] elif [ "$HPT" == "no" ]
then then
log DEBUG "Hyperthreading is disabled." log INFO "Hyperthreading is disabled."
if [ "$ARCH" == "Linux" ] if [ "$ARCH" == "Linux" ]
then then
PHYSICAL=`grep 'physical id' $CPUINFO` PHYSICAL=`grep 'physical id' $CPUINFO`
if [ "$?" == "0" ] if [ "$?" == "0" ]
then then
PHYSICAL=`grep 'physical id' $CPUINFO | sort | uniq | wc -l` PHYSICAL=`grep 'physical id' $CPUINFO | sort | uniq | wc -l`
log DEBUG "Detected $PHYSICAL physical CPU(s)" if [ "$PHYSICAL" == "1" ]
then
log INFO "Found $PHYSICAL physical CPU."
else
log INFO "Found $PHYSICAL physical CPUs."
fi
TMP=`grep 'core id' $CPUINFO` TMP=`grep 'core id' $CPUINFO`
if [ "$?" == "0" ] if [ "$?" == "0" ]
then then
log DEBUG "Starting job only for each physical core on all physical CPU(s)." log DEBUG "Starting job only for each physical core on all physical CPU(s)."
NUMBER=`grep 'core id' $CPUINFO | sort | uniq | wc -l` NUMBER=`grep 'core id' $CPUINFO | sort | uniq | wc -l`
log DEBUG "Found $NUMBER physical cores." log INFO "Found $NUMBER physical cores."
else else
log DEBUG "Single core processor(s) detected (or you found a bug)." log INFO "Single core processor(s) detected."
log DEBUG "Starting job (only) for each physical CPU." log INFO "Starting job for each physical CPU."
NUMBER=$PHYSICAL NUMBER=$PHYSICAL
fi fi
else else
log DEBUG "No 'physical id' section found in $CPUINFO, is this a bug?." log INFO "No 'physical id' section found in $CPUINFO, is this a bug?."
NUMBER=`grep ^processor $CPUINFO | wc -l` NUMBER=`grep ^processor $CPUINFO | wc -l`
got_cpu_info "$?" got_cpu_info "$?"
fi fi
@ -780,7 +800,7 @@ get_no_of_cpus () {
if [ ! -z "$NUMBER" ] if [ ! -z "$NUMBER" ]
then then
echo "$NUMBER" MAX_NO_OF_RUNNING_JOBS=$NUMBER
else else
log INFO "$FUNCNAME ERROR - number of CPUs not obtained." log INFO "$FUNCNAME ERROR - number of CPUs not obtained."
exit 1 exit 1
@ -1073,6 +1093,8 @@ commando () {
ITEM="$1" ITEM="$1"
ITEM_NO_PATH="$1" ITEM_NO_PATH="$1"
OUTPUT_DIR=$PPSS_LOCAL_OUTPUT/"$ITEM_NO_PATH"
OUTPUT_FILE="$ITEM_NO_PATH"
log DEBUG "Processing item $ITEM" log DEBUG "Processing item $ITEM"
@ -1086,7 +1108,7 @@ commando () {
LOG_FILE_NAME=`echo "$ITEM" | sed s/^\\\.//g | sed s/^\\\.\\\.//g | sed s/\\\///g` LOG_FILE_NAME=`echo "$ITEM" | sed s/^\\\.//g | sed s/^\\\.\\\.//g | sed s/\\\///g`
ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME" ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME"
mkdir -p $PPSS_LOCAL_OUTPUT/"$ITEM_NO_PATH" mkdir -p "$OUTPUT_DIR"
does_file_exist "$ITEM_LOG_FILE" does_file_exist "$ITEM_LOG_FILE"
if [ "$?" == "0" ] if [ "$?" == "0" ]
@ -1286,6 +1308,7 @@ main () {
display_header display_header
log INFO "Deploying PPSS on nodes." log INFO "Deploying PPSS on nodes."
deploy_ppss deploy_ppss
wait
cleanup cleanup
exit 0 exit 0
;; ;;