Improved distributed ppss
This commit is contained in:
parent
e774b666ad
commit
695b65af3a
|
@ -38,7 +38,7 @@ trap 'kill_process; ' INT
|
||||||
|
|
||||||
# Setting some vars. Do not change.
|
# Setting some vars. Do not change.
|
||||||
SCRIPT_NAME="Distributed Parallel Processing Shell Script"
|
SCRIPT_NAME="Distributed Parallel Processing Shell Script"
|
||||||
SCRIPT_VERSION="2.20"
|
SCRIPT_VERSION="2.22"
|
||||||
|
|
||||||
# The first argument to this script is always the 'mode'.
|
# The first argument to this script is always the 'mode'.
|
||||||
MODE="$1"
|
MODE="$1"
|
||||||
|
@ -48,7 +48,7 @@ shift
|
||||||
# export PPSS_DIR=/path/to/workingdir
|
# export PPSS_DIR=/path/to/workingdir
|
||||||
if [ -z "$PPSS_DIR" ]
|
if [ -z "$PPSS_DIR" ]
|
||||||
then
|
then
|
||||||
PPSS_DIR="./ppss"
|
PPSS_DIR="ppss"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -e "$PPSS_DIR" ]
|
if [ ! -e "$PPSS_DIR" ]
|
||||||
|
@ -159,7 +159,7 @@ showusage () {
|
||||||
echo
|
echo
|
||||||
echo -e "The following options are used for distributed execution of PPSS."
|
echo -e "The following options are used for distributed execution of PPSS."
|
||||||
echo
|
echo
|
||||||
echo -e "--server | -s Specifies the SSH server that is used for communication between nodes."
|
echo -e "--master | -m Specifies the SSH server that is used for communication between nodes."
|
||||||
echo -e " Using SSH, file locks are created, informing other nodes that an item "
|
echo -e " Using SSH, file locks are created, informing other nodes that an item "
|
||||||
echo -e " is locked. Also, often items, such as files, reside on this host. SCP "
|
echo -e " is locked. Also, often items, such as files, reside on this host. SCP "
|
||||||
echo -e " is used to transfer files from this host to nodes for local procesing."
|
echo -e " is used to transfer files from this host to nodes for local procesing."
|
||||||
|
@ -167,7 +167,7 @@ showusage () {
|
||||||
echo -e "--node | -n File containig a list of nodes that act as PPSS clients. One IP / DNS "
|
echo -e "--node | -n File containig a list of nodes that act as PPSS clients. One IP / DNS "
|
||||||
echo -e " name per line."
|
echo -e " name per line."
|
||||||
echo
|
echo
|
||||||
echo -e "--key | -k The SSH key that a node uses to connect to the server."
|
echo -e "--key | -k The SSH key that a node uses to connect to the master."
|
||||||
echo
|
echo
|
||||||
echo -e "--known-hosts | -K The file that contains the server public key. Can often be found on "
|
echo -e "--known-hosts | -K The file that contains the server public key. Can often be found on "
|
||||||
echo -e " hosts that already once connected to the server. See the file "
|
echo -e " hosts that already once connected to the server. See the file "
|
||||||
|
@ -243,11 +243,13 @@ exec_cmd () {
|
||||||
|
|
||||||
CMD="$1"
|
CMD="$1"
|
||||||
|
|
||||||
if [ ! -z "$SSH_SERVER" ] && [ "$SECURE_COPY" == "1" ]
|
if [ ! -z "$SSH_SERVER" ]
|
||||||
then
|
then
|
||||||
ssh $SSH_OPTS $SSH_KEY $USER@$SSH_SERVER $CMD
|
ssh $SSH_OPTS $SSH_KEY $USER@$SSH_SERVER $CMD
|
||||||
|
return $?
|
||||||
else
|
else
|
||||||
eval "$CMD"
|
eval "$CMD"
|
||||||
|
return $?
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -482,7 +484,7 @@ do
|
||||||
shift 2
|
shift 2
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
--server|-s )
|
--master|-m )
|
||||||
SSH_SERVER="$2"
|
SSH_SERVER="$2"
|
||||||
add_var_to_config SSH_SERVER "$SSH_SERVER"
|
add_var_to_config SSH_SERVER "$SSH_SERVER"
|
||||||
shift 2
|
shift 2
|
||||||
|
@ -533,7 +535,7 @@ check_for_running_instances () {
|
||||||
JOBS=`ps axu | grep -v grep | grep ${USER} | grep -v -i screen | grep ppss.sh | wc -l`
|
JOBS=`ps axu | grep -v grep | grep ${USER} | grep -v -i screen | grep ppss.sh | wc -l`
|
||||||
#echo "$(date) : ${JOBS}"
|
#echo "$(date) : ${JOBS}"
|
||||||
get_min_jobs
|
get_min_jobs
|
||||||
log INFO "Minjobs is $MIN_JOBS"
|
log DEBUG "Minjobs is $MIN_JOBS"
|
||||||
|
|
||||||
if [ "$JOBS" -gt "$MIN_JOBS" ]
|
if [ "$JOBS" -gt "$MIN_JOBS" ]
|
||||||
then
|
then
|
||||||
|
@ -750,9 +752,15 @@ erase_ppss () {
|
||||||
then
|
then
|
||||||
for NODE in `cat $NODES_FILE`
|
for NODE in `cat $NODES_FILE`
|
||||||
do
|
do
|
||||||
log INFO "Erasing PPSS homedir $PPSS_HOME_DIR from node $NODE."
|
does_file_exist "ppss"
|
||||||
ssh -q $SSH_KEY $SSH_OPTS $USER@$NODE "./$PPSS_HOME_DIR/$0 kill"
|
if [ "$?" == "0" ]
|
||||||
ssh -q $SSH_KEY $SSH_OPTS $USER@$NODE "rm -rf $PPSS_HOME_DIR"
|
then
|
||||||
|
log INFO "Erasing PPSS homedir $PPSS_HOME_DIR from node $NODE."
|
||||||
|
ssh -q $SSH_KEY $SSH_OPTS $USER@$NODE "./$PPSS_HOME_DIR/$0 kill"
|
||||||
|
ssh -q $SSH_KEY $SSH_OPTS $USER@$NODE "rm -rf $PPSS_HOME_DIR"
|
||||||
|
else
|
||||||
|
log INFO "PPSS was not present on node $NODE."
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
else
|
else
|
||||||
log INFO "Aborting.."
|
log INFO "Aborting.."
|
||||||
|
@ -1464,7 +1472,7 @@ start_all_workers () {
|
||||||
get_status_of_node () {
|
get_status_of_node () {
|
||||||
|
|
||||||
NODE="$1"
|
NODE="$1"
|
||||||
STATUS=`ssh -o ConnectTimeout=10 $SSH_KEY $USER@$NODE cat "$PPSS_HOME_DIR/$NODE_STATUS"`
|
STATUS=`ssh -o ConnectTimeout=10 $SSH_KEY $USER@$NODE cat "$PPSS_DIR/$NODE_STATUS"`
|
||||||
ERROR="$?"
|
ERROR="$?"
|
||||||
if [ ! "$ERROR" == "0" ]
|
if [ ! "$ERROR" == "0" ]
|
||||||
then
|
then
|
||||||
|
@ -1508,7 +1516,13 @@ show_status () {
|
||||||
for x in `cat $NODES_FILE`
|
for x in `cat $NODES_FILE`
|
||||||
do
|
do
|
||||||
NODE=`get_status_of_node "$x" | awk '{ print $1 }'`
|
NODE=`get_status_of_node "$x" | awk '{ print $1 }'`
|
||||||
RES=`exec_cmd "grep $NODE ~/$JOB_LOG_DIR/* | wc -l"`
|
RES=`exec_cmd "grep $NODE ~/$JOB_LOG_DIR/* >> /dev/null 2>&1"`
|
||||||
|
if [ ! "$ERROR" == "0" ]
|
||||||
|
then
|
||||||
|
RES=0
|
||||||
|
else
|
||||||
|
RES=`exec_cmd "grep $NODE ~/$JOB_LOG_DIR/* | wc -l"`
|
||||||
|
fi
|
||||||
let PROCESSED=$PROCESSED+$RES
|
let PROCESSED=$PROCESSED+$RES
|
||||||
STATUS=`get_status_of_node "$x" | awk '{ print $2 }'`
|
STATUS=`get_status_of_node "$x" | awk '{ print $2 }'`
|
||||||
LINE=`echo "$x $NODE $RES $STATUS" | awk '{ printf ("%-16s %-18s % 10s %10s\n",$1,$2,$3,$4) }'`
|
LINE=`echo "$x $NODE $RES $STATUS" | awk '{ printf ("%-16s %-18s % 10s %10s\n",$1,$2,$3,$4) }'`
|
||||||
|
|
Loading…
Reference in New Issue