Reworked OS detection and proces detection, which differs for Linux and Mac OS X

This commit is contained in:
Louwrentius 2009-02-28 17:44:02 +00:00
parent 2edb204c4c
commit 39c1ecf542

71
ppss.sh
View File

@ -47,6 +47,7 @@ ARGS=$@
CONFIG="config.cfg" CONFIG="config.cfg"
DAEMON=0 DAEMON=0
HOSTNAME=`hostname` HOSTNAME=`hostname`
ARCH=`uname`
RUNNING_SIGNAL="$0_is_running" # Prevents running mutiple instances of PPSS.. RUNNING_SIGNAL="$0_is_running" # Prevents running mutiple instances of PPSS..
GLOBAL_LOCK="PPSS-GLOBAL-LOCK" # Global lock file used by local PPSS instance. GLOBAL_LOCK="PPSS-GLOBAL-LOCK" # Global lock file used by local PPSS instance.
PAUSE_SIGNAL="pause_signal" # Not implemented yet (pause processing). PAUSE_SIGNAL="pause_signal" # Not implemented yet (pause processing).
@ -75,6 +76,7 @@ PPSS_LOCAL_WORKDIR="PPSS_LOCAL_WORKDIR" # Local directory on slave for local pro
TRANSFER_TO_SLAVE="0" # Transfer item to slave via (s)cp. TRANSFER_TO_SLAVE="0" # Transfer item to slave via (s)cp.
SECURE_COPY="1" # If set, use SCP, Otherwise, use cp. SECURE_COPY="1" # If set, use SCP, Otherwise, use cp.
REMOTE_OUTPUT_DIR="" # Remote directory to which output must be uploaded. REMOTE_OUTPUT_DIR="" # Remote directory to which output must be uploaded.
SCRIPT="" # Custom user script that is executed by ppss.
showusage () { showusage () {
@ -185,7 +187,6 @@ check_for_interrupt () {
sleep $PAUSE_DELAY sleep $PAUSE_DELAY
check_for_interrupt check_for_interrupt
fi fi
} }
cleanup () { cleanup () {
@ -353,6 +354,11 @@ do
add_var_to_config SSH_SERVER "$SSH_SERVER" add_var_to_config SSH_SERVER "$SSH_SERVER"
shift 2 shift 2
;; ;;
-S )
SCRIPT="$2"
add_var_to_config SCRIPT "$SCRIPT"
shift 2
;;
-t ) -t )
TRANSFER_TO_SLAVE="1" TRANSFER_TO_SLAVE="1"
add_var_to_config TRANSFER_TO_SLAVE "$TRANSFER_TO_SLAVE" add_var_to_config TRANSFER_TO_SLAVE "$TRANSFER_TO_SLAVE"
@ -517,12 +523,21 @@ deploy_ppss () {
if [ -z "$KEY" ] || [ ! -e "$KEY" ] if [ -z "$KEY" ] || [ ! -e "$KEY" ]
then then
log INFO "ERROR - nodes require a key file." log INFO "ERROR - nodes require a key file."
cleanup
exit 1
fi
if [ ! -e "$SCRIPT" ]
then
log INFO "ERROR - script $SCRIPT not found."
cleanup
exit 1 exit 1
fi fi
if [ ! -e "$NODES_FILE" ] if [ ! -e "$NODES_FILE" ]
then then
log INFO "ERROR file $NODES with list of nodes does not exist." log INFO "ERROR file $NODES with list of nodes does not exist."
cleanup
exit 1 exit 1
else else
for NODE in `cat $NODES_FILE` for NODE in `cat $NODES_FILE`
@ -536,6 +551,13 @@ deploy_ppss () {
set_error $? set_error $?
scp -q known_hosts $USER@$NODE:~/$PPSS_HOME_DIR scp -q known_hosts $USER@$NODE:~/$PPSS_HOME_DIR
set_error $? set_error $?
scp -q $SCRIPT $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q $INPUT_FILE $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
if [ "$ERROR" == "0" ] if [ "$ERROR" == "0" ]
then then
log INFO "PPSS installed on node $NODE." log INFO "PPSS installed on node $NODE."
@ -590,16 +612,16 @@ get_no_of_cpus () {
if [ "$HPT" == "yes" ] if [ "$HPT" == "yes" ]
then then
if [ `uname` == "Linux" ] if [ "$ARCH" == "Linux" ]
then then
NUMBER=`cat /proc/cpuinfo | grep processor | wc -l` NUMBER=`cat /proc/cpuinfo | grep processor | wc -l`
got_cpu_info "$?" got_cpu_info "$?"
elif [ `uname` == "Darwin" ] elif [ "$ARCH" == "Darwin" ]
then then
NUMBER=`sysctl -a hw | grep -w logicalcpu | awk '{ print $2 }'` NUMBER=`sysctl -a hw | grep -w logicalcpu | awk '{ print $2 }'`
got_cpu_info "$?" got_cpu_info "$?"
elif [ `uname` == "FreeBSD" ] elif [ "$ARCH" == "FreeBSD" ]
then then
NUMBER=`sysctl hw.ncpu | awk '{ print $2 }'` NUMBER=`sysctl hw.ncpu | awk '{ print $2 }'`
got_cpu_info "$?" got_cpu_info "$?"
@ -609,7 +631,7 @@ get_no_of_cpus () {
fi fi
elif [ "$HPT" == "no" ] elif [ "$HPT" == "no" ]
then then
if [ `uname` == "Linux" ] if [ "$ARCH" == "Linux" ]
then then
RES=`cat /proc/cpuinfo | grep "cpu cores"` RES=`cat /proc/cpuinfo | grep "cpu cores"`
if [ "$?" == "0" ] if [ "$?" == "0" ]
@ -620,11 +642,11 @@ get_no_of_cpus () {
NUMBER=`cat /proc/cpuinfo | grep processor | wc -l` NUMBER=`cat /proc/cpuinfo | grep processor | wc -l`
got_cpu_info "$?" got_cpu_info "$?"
fi fi
elif [ `uname` == "Darwin" ] elif [ "$ARCH" == "Darwin" ]
then then
NUMBER=`sysctl -a hw | grep -w physicalcpu | awk '{ print $2 }'` NUMBER=`sysctl -a hw | grep -w physicalcpu | awk '{ print $2 }'`
got_cpu_info "$?" got_cpu_info "$?"
elif [ `uname` == "FreeBSD" ] elif [ "$ARCH" == "FreeBSD" ]
then then
NUMBER=`sysctl hw.ncpu | awk '{ print $2 }'` NUMBER=`sysctl hw.ncpu | awk '{ print $2 }'`
got_cpu_info "$?" got_cpu_info "$?"
@ -811,8 +833,13 @@ get_all_items () {
else else
if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a slave?" if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a slave?"
then then
scp -q $SSH_OPTS "$SSH_KEY" "$USER@$SSH_SERVER:~/$INPUT_FILE" >> /dev/null 2>&! log DEBUG "Running as slave, input file has been pushed (hopefully)."
check_status "$?" "$FUNCNAME" "Could not copy input file." if [ ! -e "$INPUT_FILE" ]
then
log INFO "ERROR - input file $INPUT_FILE does not exist."
fi
#scp -q $SSH_OPTS $SSH_KEY $USER@$SSH_SERVER:~/"$INPUT_FILE" >> /dev/null 2>&1
#check_status "$?" "$FUNCNAME" "Could not copy input file $INPUT_FILE."
fi fi
exec 10<$INPUT_FILE exec 10<$INPUT_FILE
@ -1049,14 +1076,25 @@ main () {
} }
# This command starts the that sets the whole framework in motion. # This command starts the that sets the whole framework in motion.
main main
#if [ $MODE == "node" ]
#then # Either start new jobs or exit, sleep in the meantime.
while true while true
do do
sleep 5 sleep 5
JOBS=`ps ax | grep -v grep | grep -v -i screen | grep ppss.sh | wc -l` JOBS=`ps ax | grep -v grep | grep -v -i screen | grep ppss.sh | wc -l`
log INFO "JOBS is jobs: $JOBS" log INFO "JOBS is jobs: $JOBS"
if [ "$JOBS" -gt "3" ]
MIN_JOBS=3
if [ "$ARCH" == "Darwin" ]
then
MIN_JOBS=4
elif [ "$ARCH" == "Linux" ]
then
MIN_JOBS=3
fi
if [ "$JOBS" -gt "$MIN_JOBS" ]
then then
log INFO "Sleeping $INTERVAL..." log INFO "Sleeping $INTERVAL..."
sleep $INTERVAL sleep $INTERVAL
@ -1076,6 +1114,7 @@ main
log INFO "Dying processes may display an error message." log INFO "Dying processes may display an error message."
kill_process kill_process
fi fi
done done
#fi
# Exit after all processes have finished.
wait wait