Parallel deployment of ppss + new mechanism for specifing output files.
This commit is contained in:
parent
ad2c345903
commit
74f2021032
|
@ -64,6 +64,7 @@ LISTENER_PID=""
|
||||||
IFS_BACKUP="$IFS"
|
IFS_BACKUP="$IFS"
|
||||||
INTERVAL="30" # Polling interval to check if there are running jobs.
|
INTERVAL="30" # Polling interval to check if there are running jobs.
|
||||||
CPUINFO=/proc/cpuinfo
|
CPUINFO=/proc/cpuinfo
|
||||||
|
PROCESSORS=""
|
||||||
|
|
||||||
SSH_SERVER="" # Remote server or 'master'.
|
SSH_SERVER="" # Remote server or 'master'.
|
||||||
SSH_KEY="" # SSH key for ssh account.
|
SSH_KEY="" # SSH key for ssh account.
|
||||||
|
@ -495,7 +496,13 @@ init_vars () {
|
||||||
|
|
||||||
if [ -z "$MAX_NO_OF_RUNNING_JOBS" ]
|
if [ -z "$MAX_NO_OF_RUNNING_JOBS" ]
|
||||||
then
|
then
|
||||||
MAX_NO_OF_RUNNING_JOBS=`get_no_of_cpus $HYPERTHREADING`
|
get_no_of_cpus $HYPERTHREADING
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -e "$CPUINFO" ]
|
||||||
|
then
|
||||||
|
CPU=`cat /proc/cpuinfo | grep 'model name' | cut -d ":" -f 2 | sed -e s/^\ //g | sort | uniq`
|
||||||
|
log INFO "CPU: $CPU"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log INFO "---------------------------------------------------------"
|
log INFO "---------------------------------------------------------"
|
||||||
|
@ -607,7 +614,9 @@ erase_ppss () {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
deploy_ppss () {
|
deploy () {
|
||||||
|
|
||||||
|
NODE="$1"
|
||||||
|
|
||||||
ERROR=0
|
ERROR=0
|
||||||
set_error () {
|
set_error () {
|
||||||
|
@ -618,6 +627,34 @@ deploy_ppss () {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ssh -q $USER@$NODE "mkdir $PPSS_HOME_DIR >> /dev/null 2>&1"
|
||||||
|
scp -q $SSH_OPTS $0 $USER@$NODE:~/$PPSS_HOME_DIR
|
||||||
|
set_error $?
|
||||||
|
scp -q $KEY $USER@$NODE:~/$PPSS_HOME_DIR
|
||||||
|
set_error $?
|
||||||
|
scp -q $CONFIG $USER@$NODE:~/$PPSS_HOME_DIR
|
||||||
|
set_error $?
|
||||||
|
scp -q known_hosts $USER@$NODE:~/$PPSS_HOME_DIR
|
||||||
|
set_error $?
|
||||||
|
scp -q $SCRIPT $USER@$NODE:~/$PPSS_HOME_DIR
|
||||||
|
set_error $?
|
||||||
|
if [ ! -z "$INPUT_FILE" ]
|
||||||
|
then
|
||||||
|
scp -q $INPUT_FILE $USER@$NODE:~/$PPSS_HOME_DIR
|
||||||
|
set_error $?
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$ERROR" == "0" ]
|
||||||
|
then
|
||||||
|
log INFO "PPSS installed on node $NODE."
|
||||||
|
else
|
||||||
|
log INFO "PPSS failed to install on $NODE."
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
deploy_ppss () {
|
||||||
|
|
||||||
|
|
||||||
if [ -z "$NODES_FILE" ]
|
if [ -z "$NODES_FILE" ]
|
||||||
then
|
then
|
||||||
log INFO "ERROR - are you using the right option? -C ?"
|
log INFO "ERROR - are you using the right option? -C ?"
|
||||||
|
@ -648,29 +685,7 @@ deploy_ppss () {
|
||||||
else
|
else
|
||||||
for NODE in `cat $NODES_FILE`
|
for NODE in `cat $NODES_FILE`
|
||||||
do
|
do
|
||||||
ssh -q $USER@$NODE "mkdir $PPSS_HOME_DIR >> /dev/null 2>&1"
|
deploy "$NODE" &
|
||||||
scp -q $SSH_OPTS $0 $USER@$NODE:~/$PPSS_HOME_DIR
|
|
||||||
set_error $?
|
|
||||||
scp -q $KEY $USER@$NODE:~/$PPSS_HOME_DIR
|
|
||||||
set_error $?
|
|
||||||
scp -q $CONFIG $USER@$NODE:~/$PPSS_HOME_DIR
|
|
||||||
set_error $?
|
|
||||||
scp -q known_hosts $USER@$NODE:~/$PPSS_HOME_DIR
|
|
||||||
set_error $?
|
|
||||||
scp -q $SCRIPT $USER@$NODE:~/$PPSS_HOME_DIR
|
|
||||||
set_error $?
|
|
||||||
if [ ! -z "$INPUT_FILE" ]
|
|
||||||
then
|
|
||||||
scp -q $INPUT_FILE $USER@$NODE:~/$PPSS_HOME_DIR
|
|
||||||
set_error $?
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$ERROR" == "0" ]
|
|
||||||
then
|
|
||||||
log INFO "PPSS installed on node $NODE."
|
|
||||||
else
|
|
||||||
log INFO "PPSS failed to install on $NODE."
|
|
||||||
fi
|
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
@ -739,27 +754,32 @@ get_no_of_cpus () {
|
||||||
log DEBUG "Found $NUMBER logic processors."
|
log DEBUG "Found $NUMBER logic processors."
|
||||||
elif [ "$HPT" == "no" ]
|
elif [ "$HPT" == "no" ]
|
||||||
then
|
then
|
||||||
log DEBUG "Hyperthreading is disabled."
|
log INFO "Hyperthreading is disabled."
|
||||||
if [ "$ARCH" == "Linux" ]
|
if [ "$ARCH" == "Linux" ]
|
||||||
then
|
then
|
||||||
PHYSICAL=`grep 'physical id' $CPUINFO`
|
PHYSICAL=`grep 'physical id' $CPUINFO`
|
||||||
if [ "$?" == "0" ]
|
if [ "$?" == "0" ]
|
||||||
then
|
then
|
||||||
PHYSICAL=`grep 'physical id' $CPUINFO | sort | uniq | wc -l`
|
PHYSICAL=`grep 'physical id' $CPUINFO | sort | uniq | wc -l`
|
||||||
log DEBUG "Detected $PHYSICAL physical CPU(s)"
|
if [ "$PHYSICAL" == "1" ]
|
||||||
|
then
|
||||||
|
log INFO "Found $PHYSICAL physical CPU."
|
||||||
|
else
|
||||||
|
log INFO "Found $PHYSICAL physical CPUs."
|
||||||
|
fi
|
||||||
TMP=`grep 'core id' $CPUINFO`
|
TMP=`grep 'core id' $CPUINFO`
|
||||||
if [ "$?" == "0" ]
|
if [ "$?" == "0" ]
|
||||||
then
|
then
|
||||||
log DEBUG "Starting job only for each physical core on all physical CPU(s)."
|
log DEBUG "Starting job only for each physical core on all physical CPU(s)."
|
||||||
NUMBER=`grep 'core id' $CPUINFO | sort | uniq | wc -l`
|
NUMBER=`grep 'core id' $CPUINFO | sort | uniq | wc -l`
|
||||||
log DEBUG "Found $NUMBER physical cores."
|
log INFO "Found $NUMBER physical cores."
|
||||||
else
|
else
|
||||||
log DEBUG "Single core processor(s) detected (or you found a bug)."
|
log INFO "Single core processor(s) detected."
|
||||||
log DEBUG "Starting job (only) for each physical CPU."
|
log INFO "Starting job for each physical CPU."
|
||||||
NUMBER=$PHYSICAL
|
NUMBER=$PHYSICAL
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
log DEBUG "No 'physical id' section found in $CPUINFO, is this a bug?."
|
log INFO "No 'physical id' section found in $CPUINFO, is this a bug?."
|
||||||
NUMBER=`grep ^processor $CPUINFO | wc -l`
|
NUMBER=`grep ^processor $CPUINFO | wc -l`
|
||||||
got_cpu_info "$?"
|
got_cpu_info "$?"
|
||||||
fi
|
fi
|
||||||
|
@ -780,7 +800,7 @@ get_no_of_cpus () {
|
||||||
|
|
||||||
if [ ! -z "$NUMBER" ]
|
if [ ! -z "$NUMBER" ]
|
||||||
then
|
then
|
||||||
echo "$NUMBER"
|
MAX_NO_OF_RUNNING_JOBS=$NUMBER
|
||||||
else
|
else
|
||||||
log INFO "$FUNCNAME ERROR - number of CPUs not obtained."
|
log INFO "$FUNCNAME ERROR - number of CPUs not obtained."
|
||||||
exit 1
|
exit 1
|
||||||
|
@ -1073,6 +1093,8 @@ commando () {
|
||||||
|
|
||||||
ITEM="$1"
|
ITEM="$1"
|
||||||
ITEM_NO_PATH="$1"
|
ITEM_NO_PATH="$1"
|
||||||
|
OUTPUT_DIR=$PPSS_LOCAL_OUTPUT/"$ITEM_NO_PATH"
|
||||||
|
OUTPUT_FILE="$ITEM_NO_PATH"
|
||||||
|
|
||||||
log DEBUG "Processing item $ITEM"
|
log DEBUG "Processing item $ITEM"
|
||||||
|
|
||||||
|
@ -1086,7 +1108,7 @@ commando () {
|
||||||
LOG_FILE_NAME=`echo "$ITEM" | sed s/^\\\.//g | sed s/^\\\.\\\.//g | sed s/\\\///g`
|
LOG_FILE_NAME=`echo "$ITEM" | sed s/^\\\.//g | sed s/^\\\.\\\.//g | sed s/\\\///g`
|
||||||
ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME"
|
ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME"
|
||||||
|
|
||||||
mkdir -p $PPSS_LOCAL_OUTPUT/"$ITEM_NO_PATH"
|
mkdir -p "$OUTPUT_DIR"
|
||||||
|
|
||||||
does_file_exist "$ITEM_LOG_FILE"
|
does_file_exist "$ITEM_LOG_FILE"
|
||||||
if [ "$?" == "0" ]
|
if [ "$?" == "0" ]
|
||||||
|
@ -1286,6 +1308,7 @@ main () {
|
||||||
display_header
|
display_header
|
||||||
log INFO "Deploying PPSS on nodes."
|
log INFO "Deploying PPSS on nodes."
|
||||||
deploy_ppss
|
deploy_ppss
|
||||||
|
wait
|
||||||
cleanup
|
cleanup
|
||||||
exit 0
|
exit 0
|
||||||
;;
|
;;
|
||||||
|
|
Loading…
Reference in New Issue