2009-01-10 12:52:15 +00:00
#!/usr/bin/env bash
2009-03-05 23:19:55 +00:00
#
# PPSS, the Parallel Processing Shell Script
#
# Copyright (c) 2009, Louwrentius
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the <organization> nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY Louwrentius ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Louwrentius BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#------------------------------------------------------------------------------
# It should not be necessary to edit antyhing in this script..
# Ofcource you can if it is necessary for your needs.
2009-01-04 11:30:38 +00:00
# Send a patch if your changes may benefit others.
2009-03-05 23:19:55 +00:00
#------------------------------------------------------------------------------
2009-01-04 11:30:38 +00:00
# Handling control-c for a clean shutdown.
trap 'kill_process; ' INT
# Setting some vars. Do not change.
2009-01-31 17:54:45 +00:00
SCRIPT_NAME = "Distributed Parallel Processing Shell Script"
2009-03-21 23:30:15 +00:00
SCRIPT_VERSION = "2.12"
2009-01-04 11:30:38 +00:00
2009-03-05 23:19:55 +00:00
# The first argument to this script is always the 'mode'.
2009-02-08 16:41:34 +00:00
MODE = " $1 "
shift
2009-03-05 23:19:55 +00:00
2009-03-15 00:35:53 +00:00
CONFIG = ""
2009-02-28 15:51:00 +00:00
HOSTNAME = ` hostname`
2009-02-28 17:44:02 +00:00
ARCH = ` uname`
2009-01-25 23:47:02 +00:00
RUNNING_SIGNAL = " $0 _is_running " # Prevents running mutiple instances of PPSS..
GLOBAL_LOCK = "PPSS-GLOBAL-LOCK" # Global lock file used by local PPSS instance.
2009-03-15 00:35:53 +00:00
PAUSE_SIGNAL = "pause_signal" # Pause processing if this file is present.
PAUSE_DELAY = 300 # Polling every 5 minutes by default.
2009-02-08 16:41:34 +00:00
STOP_SIGNAL = "stop_signal"
2009-01-25 23:47:02 +00:00
ARRAY_POINTER_FILE = "ppss-array-pointer" #
JOB_LOG_DIR = "JOB_LOG" # Directory containing log files of processed items.
LOGFILE = "ppss-log.txt" # General PPSS log file. Contains lots of info.
2009-03-15 21:51:30 +00:00
STOP = 0 # STOP job.
2009-01-04 11:30:38 +00:00
MAX_DELAY = 2
PERCENT = "0"
PID = " $$ "
LISTENER_PID = ""
2009-01-05 10:26:10 +00:00
IFS_BACKUP = " $IFS "
2009-03-05 23:19:55 +00:00
INTERVAL = "30" # Polling interval to check if there are running jobs.
2009-03-10 20:36:33 +00:00
CPUINFO = /proc/cpuinfo
2009-03-13 10:12:10 +00:00
PROCESSORS = ""
2009-01-04 11:30:38 +00:00
2009-01-25 23:47:02 +00:00
SSH_SERVER = "" # Remote server or 'master'.
2009-01-24 22:24:26 +00:00
SSH_KEY = "" # SSH key for ssh account.
2009-03-15 00:35:53 +00:00
SSH_KNOWN_HOSTS = ""
2009-01-25 23:47:02 +00:00
SSH_SOCKET = "/tmp/PPSS-ssh-socket" # Multiplex multiple SSH connections over 1 master.
2009-03-05 23:19:55 +00:00
SSH_OPTS = " -o BatchMode=yes -o ControlPath= $SSH_SOCKET \
-o GlobalKnownHostsFile = ./known_hosts \
-o ControlMaster = auto \
2009-03-15 21:51:30 +00:00
-o ConnectTimeout = 5 "
2009-01-24 23:36:18 +00:00
SSH_MASTER_PID = ""
2009-02-27 19:14:38 +00:00
2009-02-08 21:36:40 +00:00
PPSS_HOME_DIR = "ppss"
2009-01-25 23:47:02 +00:00
ITEM_LOCK_DIR = "PPSS_ITEM_LOCK_DIR" # Remote directory on master used for item locking.
2009-03-01 10:17:27 +00:00
PPSS_LOCAL_TMPDIR = "PPSS_LOCAL_TMPDIR" # Local directory on slave for local processing.
PPSS_LOCAL_OUTPUT = "PPSS_LOCAL_OUTPUT" # Local directory on slave for local output.
2009-01-25 23:47:02 +00:00
TRANSFER_TO_SLAVE = "0" # Transfer item to slave via (s)cp.
SECURE_COPY = "1" # If set, use SCP, Otherwise, use cp.
REMOTE_OUTPUT_DIR = "" # Remote directory to which output must be uploaded.
2009-02-28 17:44:02 +00:00
SCRIPT = "" # Custom user script that is executed by ppss.
2009-01-24 22:24:26 +00:00
2009-02-27 19:14:38 +00:00
2009-01-10 12:32:40 +00:00
showusage ( ) {
2009-01-04 11:30:38 +00:00
echo
echo " $SCRIPT_NAME "
echo " Version: $SCRIPT_VERSION "
echo
2009-03-09 20:46:46 +00:00
echo "PPSS is a Bash shell script that executes commands in parallel on a set "
echo "of items, such as files, or lines in a file."
2009-03-05 23:19:55 +00:00
echo
echo " Usage: $0 MODE [ options ] "
echo " or "
echo " Usage: $0 MODE -c <config file> "
2009-01-04 11:30:38 +00:00
echo
2009-03-05 23:19:55 +00:00
echo "Modes are:"
echo
echo " standalone For execution of PPSS on a single host."
echo " node For execution of PPSS on a node, that is part of a 'cluster'."
echo " config Generate a config file based on the supplied option parameters."
echo " deploy Deploy PPSS and related files on the specified nodes."
echo " erase Erase PPSS and related files from the specified nodes."
2009-03-09 20:46:46 +00:00
echo
echo " start Starting PPSS on nodes."
echo " pause Pausing PPSS on all nodes."
echo " stop Stopping PPSS on all nodes."
2009-01-04 11:30:38 +00:00
echo
echo "Options are:"
echo
2009-03-05 23:19:55 +00:00
echo -e "--command | -c Command to execute. Syntax: '<command> ' including the single quotes."
echo -e " Example: -c 'ls -alh '. It is also possible to specify where an item "
echo -e " must be inserted: 'cp \"\$ITEM\" /somedir'."
echo
echo -e "--sourcedir | -d Directory that contains files that must be processed. Individual files"
echo -e " are fed as an argument to the command that has been specified with -c."
echo
echo -e "--sourcefile | -f Each single line of the supplied file will be fed as an item to the"
echo -e " command that has been specified with -c."
echo
echo -e "--config | -c If the mode is config, a config file with the specified name will be"
echo -e " generated based on all the options specified. In the other modes" .
echo -e " this option will result in PPSS reading the config file and start"
echo -e " processing items based on the settings of this file."
echo
echo -e "--enable-ht | -j Enable hyperthreading. Is disabled by default."
echo
echo -e "--log | -l Sets the name of the log file. The default is ppss-log.txt."
2009-01-04 11:30:38 +00:00
echo
2009-03-05 23:19:55 +00:00
echo -e "--processes | -p Start the specified number of processes. Ignore the number of available"
echo -e " CPU's."
echo
echo -e "The following options are used for distributed execution of PPSS."
echo
echo -e "--server | -s Specifies the SSH server that is used for communication between nodes."
echo -e " Using SSH, file locks are created, informing other nodes that an item "
echo -e " is locked. Also, often items, such as files, reside on this host. SCP "
echo -e " is used to transfer files from this host to nodes for local procesing."
echo
echo -e "--node | -n File containig a list of nodes that act as PPSS clients. One IP / DNS "
echo -e " name per line."
echo
echo -e "--key | -k The SSH key that a node uses to connect to the server."
echo
2009-03-15 00:35:53 +00:00
echo -e "--known-hosts | -K The file that contains the server public key. Can often be found on "
echo -e " hosts that already once connected to the server. See the file "
echo -e " ~/.ssh/known_hosts or else, manualy connect once and check this file."
echo
2009-03-05 23:19:55 +00:00
echo -e "--user | -u The SSH user name that is used when logging in into the master SSH"
echo -e " server."
2009-01-24 22:24:26 +00:00
echo
2009-03-21 22:37:38 +00:00
echo -e "--script | -S Specifies the script/program that must be copied to the nodes for "
2009-03-08 22:49:23 +00:00
echo -e " execution through PPSS. Only used in the deploy mode."
2009-03-05 23:19:55 +00:00
echo -e " This option should be specified if necessary when generating a config."
echo
echo -e "--transfer | -t This option specifies that an item will be downloaded by the node "
echo -e " from the server or share to the local node for processing."
2009-01-24 22:24:26 +00:00
echo
2009-03-08 14:24:28 +00:00
echo -e "--no-scp | -b Do not use scp for downloading items. Use cp instead. Assumes that a"
echo -e " network file system (NFS/SMB) is mounted under a local mountpoint."
echo
2009-03-08 22:49:23 +00:00
echo -e "--outputdir | -o Directory on server where processed files are put. If the result of "
echo -e " encoding a wav file is an mp3 file, the mp3 file is put in the "
echo -e " directory specified with this option."
echo
2009-01-04 11:30:38 +00:00
echo -e "Example: encoding some wav files to mp3 using lame:"
echo
2009-03-09 20:46:46 +00:00
echo -e " $0 standalone -c 'lame ' -d /path/to/wavfiles -j "
echo
echo -e "Running PPSS based on a configuration file."
echo
echo -e " $0 node -C config.cfg "
echo
echo -e "Running PPSS on a client as part of a cluster."
echo
echo -e " $0 node -d /somedir -c 'cp " $ITEM " /some/destination' -s 10.0.0.50 -u ppss -t -k ppss-key.key"
2009-01-24 19:06:11 +00:00
echo
2009-01-04 11:30:38 +00:00
}
2009-01-10 12:32:40 +00:00
kill_process ( ) {
2009-01-04 11:30:38 +00:00
kill $LISTENER_PID >> /dev/null 2>& 1
while true
do
2009-02-27 18:17:58 +00:00
JOBS = ` ps ax | grep -v grep | grep -v -i screen | grep ppss.sh | grep -i bash | wc -l`
2009-01-04 11:30:38 +00:00
if [ " $JOBS " -gt "2" ]
then
2009-02-27 18:17:58 +00:00
for x in ` ps ax | grep -v grep | grep -v -i screen | grep ppss.sh | grep -i bash | awk '{ print $1 }' `
2009-01-04 11:30:38 +00:00
do
if [ ! " $x " = = " $PID " ] && [ ! " $x " = = " $$ " ]
then
kill -9 $x >> /dev/null 2>& 1
fi
done
sleep 5
else
cleanup
echo -en "\033[1B"
2009-01-25 03:36:21 +00:00
# The master SSH connection should be killed.
if [ ! -z " $SSH_MASTER_PID " ]
then
kill -9 " $SSH_MASTER_PID "
fi
2009-01-04 11:30:38 +00:00
echo ""
exit 0
fi
done
2009-01-24 23:36:18 +00:00
2009-01-04 11:30:38 +00:00
}
2009-01-25 17:59:28 +00:00
exec_cmd ( ) {
CMD = " $1 "
2009-03-02 20:57:28 +00:00
if [ ! -z " $SSH_SERVER " ] && [ " $SECURE_COPY " = = "1" ]
2009-01-25 17:59:28 +00:00
then
2009-02-28 15:51:00 +00:00
ssh $SSH_OPTS $SSH_KEY $USER @$SSH_SERVER $CMD
2009-01-25 17:59:28 +00:00
else
eval " $CMD "
fi
}
# this function makes remote or local checking of existence of items transparent.
does_file_exist ( ) {
FILE = " $1 "
2009-03-01 16:47:44 +00:00
` exec_cmd " ls -1 $FILE " >> /dev/null 2>& 1`
2009-01-25 17:59:28 +00:00
if [ " $? " = = "0" ]
then
return 0
else
return 1
fi
}
2009-02-08 16:41:34 +00:00
check_for_interrupt ( ) {
does_file_exist " $STOP_SIGNAL "
if [ " $? " = = "0" ]
then
log INFO "STOPPING job. Stop signal found."
STOP = "1"
fi
does_file_exist " $PAUSE_SIGNAL "
if [ " $? " = = "0" ]
then
2009-03-02 20:57:28 +00:00
log INFO " PAUSE: sleeping for $PAUSE_DELAY SECONDS. "
2009-02-08 16:41:34 +00:00
sleep $PAUSE_DELAY
check_for_interrupt
fi
}
2009-01-04 11:30:38 +00:00
2009-01-10 12:32:40 +00:00
cleanup ( ) {
2009-01-04 11:30:38 +00:00
2009-02-28 08:40:11 +00:00
#log DEBUG "$FUNCNAME - Cleaning up all temp files and processes."
2009-01-04 11:30:38 +00:00
if [ -e " $FIFO " ]
then
rm $FIFO
fi
2009-01-25 03:36:21 +00:00
if [ -e " $ARRAY_POINTER_FILE " ]
2009-01-04 11:30:38 +00:00
then
rm $ARRAY_POINTER_FILE
fi
2009-01-25 03:36:21 +00:00
if [ -e " $GLOBAL_LOCK " ]
2009-01-04 11:30:38 +00:00
then
2009-01-25 03:36:21 +00:00
rm -rf $GLOBAL_LOCK
2009-01-04 11:30:38 +00:00
fi
if [ -e " $RUNNING_SIGNAL " ]
then
rm " $RUNNING_SIGNAL "
fi
2009-01-25 17:59:28 +00:00
if [ -e " $SSH_SOCKET " ]
then
rm -rf " $SSH_SOCKET "
fi
2009-01-04 11:30:38 +00:00
}
# check if ppss is already running.
2009-01-10 12:32:40 +00:00
is_running ( ) {
2009-01-04 11:30:38 +00:00
2009-03-22 09:40:31 +00:00
if [ -e " $RUNNING_SIGNAL " ] && [ ! " $MODE " = = "erase" ]
2009-01-04 11:30:38 +00:00
then
echo
2009-01-04 13:34:49 +00:00
log INFO " $0 is already running (lock file exists). "
2009-01-04 11:30:38 +00:00
echo
exit 1
fi
}
2009-02-28 08:40:11 +00:00
add_var_to_config ( ) {
if [ " $MODE " = = "config" ]
then
2009-01-24 23:36:18 +00:00
2009-02-28 08:40:11 +00:00
VAR = " $1 "
VALUE = " $2 "
2009-02-08 16:41:34 +00:00
2009-02-28 08:40:11 +00:00
echo -e " $VAR = $VALUE " >> $CONFIG
fi
}
2009-02-08 16:41:34 +00:00
2009-02-28 08:40:11 +00:00
# Process any command-line options that are specified."
while [ $# -gt 0 ]
do
case $1 in
2009-03-02 02:25:12 +00:00
--config| -C )
2009-03-01 23:25:16 +00:00
CONFIG = " $2 "
if [ " $MODE " = = "config" ]
then
if [ -e " $CONFIG " ]
then
echo "Do want to overwrite existing config file?"
read yn
if [ " $yn " = = "y" ]
then
rm " $CONFIG "
else
echo "Aborting..."
cleanup
exit
fi
fi
fi
if [ ! " $MODE " = = "config" ]
then
source $CONFIG
fi
if [ ! -z " $SSH_KEY " ]
then
SSH_KEY = " -i $SSH_KEY "
fi
2009-03-15 00:35:53 +00:00
if [ ! -e "./known_hosts" ]
then
if [ -e $SSH_KNOWN_HOSTS ]
then
cat $SSH_KNOWN_HOSTS > ./known_hosts
else
echo " File $SSH_KNOWN_HOSTS does not exist. "
exit
fi
fi
2009-03-01 23:25:16 +00:00
shift 2
; ;
2009-03-02 02:25:12 +00:00
--node| -n )
2009-03-01 23:25:16 +00:00
NODES_FILE = " $2 "
2009-03-08 15:18:15 +00:00
add_var_to_config NODES_FILE " $NODES_FILE "
2009-03-01 23:25:16 +00:00
shift 2
; ;
2009-03-02 02:25:12 +00:00
--sourcefile| -f )
2009-03-01 23:25:16 +00:00
INPUT_FILE = " $2 "
add_var_to_config INPUT_FILE " $INPUT_FILE "
shift 2
; ;
2009-03-02 02:25:12 +00:00
--sourcedir| -d )
2009-03-01 23:25:16 +00:00
SRC_DIR = " $2 "
add_var_to_config SRC_DIR " $SRC_DIR "
shift 2
; ;
2009-03-02 02:25:12 +00:00
--command| -c )
2009-03-01 23:25:16 +00:00
COMMAND = $2
if [ " $MODE " = = "config" ]
then
COMMAND = \' $COMMAND \'
add_var_to_config COMMAND " $COMMAND "
fi
shift 2
; ;
2009-03-15 00:35:53 +00:00
--help| -h )
2009-03-01 23:25:16 +00:00
showusage
exit 1; ;
2009-03-15 00:35:53 +00:00
--homedir| -H )
2009-03-08 22:49:23 +00:00
if [ ! -z " $2 " ]
then
PPSS_HOME_DIR = " $2 "
add_var_to_config PPSS_HOME_DIR $PPSS_HOME_DIR
shift 2
fi
2009-03-05 23:19:55 +00:00
; ;
2009-03-15 00:35:53 +00:00
--disable-ht| -j )
2009-03-10 20:36:33 +00:00
HYPERTHREADING = no
add_var_to_config HYPERTHREADING $HYPERTHREADING
2009-03-01 23:25:16 +00:00
shift 1
; ;
2009-03-02 02:25:12 +00:00
--log| -l )
2009-03-01 23:25:16 +00:00
LOGFILE = " $2 "
add_var_to_config LOGFILE " $LOGFILE "
shift 2
; ;
2009-03-02 02:25:12 +00:00
--key| -k )
2009-03-01 23:25:16 +00:00
SSH_KEY = " $2 "
add_var_to_config SSH_KEY " $SSH_KEY "
if [ ! -z " $SSH_KEY " ]
then
SSH_KEY = " -i $SSH_KEY "
fi
shift 2
; ;
2009-03-15 00:35:53 +00:00
--known-hosts | -K )
SSH_KNOWN_HOSTS = " $2 "
add_var_to_config SSH_KNOWN_HOSTS " $SSH_KNOWN_HOSTS "
shift 2
; ;
2009-03-08 14:24:28 +00:00
--no-scp | -b )
2009-03-01 23:25:16 +00:00
SECURE_COPY = 0
add_var_to_config SECURE_COPY " $SECURE_COPY "
shift 1
; ;
2009-03-02 02:25:12 +00:00
--outputdir| -o )
2009-03-01 23:25:16 +00:00
REMOTE_OUTPUT_DIR = " $2 "
add_var_to_config REMOTE_OUTPUT_DIR " $REMOTE_OUTPUT_DIR "
shift 2
; ;
2009-03-02 02:25:12 +00:00
--processes| -p )
2009-03-01 23:25:16 +00:00
TMP = " $2 "
if [ ! -z " $TMP " ]
then
MAX_NO_OF_RUNNING_JOBS = " $TMP "
add_var_to_config MAX_NO_OF_RUNNING_JOBS " $MAX_NO_OF_RUNNING_JOBS "
shift 2
fi
; ;
2009-03-02 02:25:12 +00:00
--server| -s )
2009-03-01 23:25:16 +00:00
SSH_SERVER = " $2 "
add_var_to_config SSH_SERVER " $SSH_SERVER "
shift 2
; ;
2009-03-02 02:25:12 +00:00
--script| -S )
2009-03-01 23:25:16 +00:00
SCRIPT = " $2 "
add_var_to_config SCRIPT " $SCRIPT "
shift 2
; ;
2009-03-02 02:25:12 +00:00
--transfer| -t )
2009-03-01 23:25:16 +00:00
TRANSFER_TO_SLAVE = "1"
add_var_to_config TRANSFER_TO_SLAVE " $TRANSFER_TO_SLAVE "
shift 1
; ;
2009-03-02 02:25:12 +00:00
--user| -u )
2009-03-01 23:25:16 +00:00
USER = " $2 "
add_var_to_config USER " $USER "
shift 2
; ;
2009-03-02 02:25:12 +00:00
--version| -v )
2009-03-01 23:25:16 +00:00
echo ""
echo " $SCRIPT_NAME version $SCRIPT_VERSION "
echo ""
exit 0
; ;
2009-02-28 08:40:11 +00:00
* )
2009-03-01 23:25:16 +00:00
showusage
exit 1; ;
2009-02-28 08:40:11 +00:00
esac
done
2009-01-24 22:24:26 +00:00
2009-03-12 20:48:14 +00:00
display_header ( ) {
log INFO "---------------------------------------------------------"
log INFO " $SCRIPT_NAME version $SCRIPT_VERSION "
log INDO "---------------------------------------------------------"
log INFO " Hostname: $HOSTNAME "
}
2009-01-24 22:24:26 +00:00
# Init all vars
2009-01-10 12:32:40 +00:00
init_vars ( ) {
2009-01-04 11:30:38 +00:00
2009-01-24 19:06:11 +00:00
if [ -e " $LOGFILE " ]
then
rm $LOGFILE
fi
2009-03-12 20:48:14 +00:00
display_header
2009-03-10 21:32:12 +00:00
2009-01-24 19:06:11 +00:00
if [ -z " $COMMAND " ]
then
echo
echo "ERROR - no command specified."
echo
showusage
cleanup
exit 1
fi
2009-01-25 12:12:22 +00:00
echo 0 > $ARRAY_POINTER_FILE
2009-01-04 11:30:38 +00:00
FIFO = $( pwd ) /fifo-$RANDOM -$RANDOM
if [ ! -e " $FIFO " ]
then
mkfifo -m 600 $FIFO
fi
exec 42<> $FIFO
touch $RUNNING_SIGNAL
if [ -z " $MAX_NO_OF_RUNNING_JOBS " ]
then
2009-03-13 10:12:10 +00:00
get_no_of_cpus $HYPERTHREADING
fi
if [ -e " $CPUINFO " ]
then
CPU = ` cat /proc/cpuinfo | grep 'model name' | cut -d ":" -f 2 | sed -e s/^\ //g | sort | uniq`
log INFO " CPU: $CPU "
2009-01-04 11:30:38 +00:00
fi
2009-03-10 22:04:13 +00:00
log INFO "---------------------------------------------------------"
2009-03-10 21:32:12 +00:00
2009-01-25 03:36:21 +00:00
does_file_exist " $JOB_LOG_DIR "
if [ ! " $? " = = "0" ]
2009-01-04 11:30:38 +00:00
then
2009-03-10 22:04:13 +00:00
log DEBUG " Job log directory $JOB_lOG_DIR does not exist. Creating. "
2009-01-25 03:36:21 +00:00
exec_cmd " mkdir $JOB_LOG_DIR "
2009-01-11 19:37:44 +00:00
else
2009-03-10 22:04:13 +00:00
log DEBUG " Job log directory $JOB_LOG_DIR exists. "
2009-01-04 11:30:38 +00:00
fi
2009-01-25 17:59:28 +00:00
does_file_exist " $ITEM_LOCK_DIR "
2009-02-08 16:41:34 +00:00
if [ ! " $? " = = "0" ] && [ ! -z " $SSH_SERVER " ]
2009-01-25 17:59:28 +00:00
then
log DEBUG "Creating remote item lock dir."
exec_cmd " mkdir $ITEM_LOCK_DIR "
fi
if [ ! -e " $JOB_LOG_DIR " ]
then
mkdir " $JOB_LOG_DIR "
fi
does_file_exist " $REMOTE_OUTPUT_DIR "
if [ ! " $? " = = "0" ]
then
echo " ERROR: remote output dir $REMOTE_OUTPUT_DIR does not exist. "
2009-02-27 19:14:38 +00:00
cleanup
2009-01-25 17:59:28 +00:00
exit
fi
2009-03-21 20:45:05 +00:00
if [ ! -e " $PPSS_LOCAL_TMPDIR " ]
2009-03-01 10:17:27 +00:00
then
mkdir " $PPSS_LOCAL_TMPDIR "
fi
2009-03-21 20:45:05 +00:00
if [ ! -e " $PPSS_LOCAL_OUTPUT " ]
2009-01-25 17:59:28 +00:00
then
2009-03-01 10:17:27 +00:00
mkdir " $PPSS_LOCAL_OUTPUT "
2009-01-25 17:59:28 +00:00
fi
2009-01-04 11:30:38 +00:00
}
2009-01-10 12:32:40 +00:00
expand_str ( ) {
2009-01-04 11:30:38 +00:00
STR = $1
LENGTH = $TYPE_LENGTH
SPACE = " "
while [ " ${# STR } " -lt " $LENGTH " ]
do
STR = $STR $SPACE
done
echo " $STR "
}
2009-01-10 12:32:40 +00:00
log ( ) {
2009-01-04 11:30:38 +00:00
TYPE = " $1 "
MESG = " $2 "
TMP_LOG = ""
TYPE_LENGTH = 6
TYPE_EXP = ` expand_str " $TYPE " `
DATE = ` date +%b\ %d\ %H:%M:%S`
PREFIX = " $DATE : ${ TYPE_EXP : 0 : $TYPE_LENGTH } - "
LOG_MSG = " $PREFIX $MESG "
echo -e " $LOG_MSG " >> " $LOGFILE "
2009-03-05 23:19:55 +00:00
if [ " $TYPE " = = "INFO" ]
2009-01-04 11:30:38 +00:00
then
echo -e " $LOG_MSG "
fi
}
2009-01-10 12:32:40 +00:00
check_status ( ) {
2009-01-09 20:25:44 +00:00
ERROR = " $1 "
FUNCTION = " $2 "
MESSAGE = " $3 "
if [ ! " $ERROR " = = "0" ]
then
log INFO " $FUNCTION - $MESSAGE "
2009-01-24 22:24:26 +00:00
cleanup
2009-01-09 20:25:44 +00:00
exit 1
fi
}
2009-02-28 18:42:16 +00:00
erase_ppss ( ) {
echo "Are you realy sure you want to erase PPSS from all nades!?"
read YN
if [ " $YN " = = "y" ]
then
2009-02-28 19:05:20 +00:00
for NODE in ` cat $NODES_FILE `
2009-02-28 18:42:16 +00:00
do
2009-02-28 19:05:20 +00:00
log INFO " Erasing PPSS homedir $PPSS_HOME_DIR from node $NODE . "
2009-03-22 09:40:31 +00:00
ssh $SSH_OPTS $SSH_KEY $USER @$NODE " ./ $PPSS_HOME_DIR / $0 kill "
2009-03-21 19:57:57 +00:00
ssh $SSH_OPTS $SSH_KEY $USER @$NODE " rm -rf $PPSS_HOME_DIR "
2009-02-28 18:42:16 +00:00
done
fi
}
2009-03-13 10:12:10 +00:00
deploy ( ) {
NODE = " $1 "
2009-02-08 21:36:40 +00:00
ERROR = 0
set_error ( ) {
if [ ! " $1 " = = "0" ]
then
ERROR = $1
fi
}
2009-03-08 15:18:15 +00:00
2009-03-21 19:57:57 +00:00
KEY = ` echo $SSH_KEY | cut -d " " -f 2`
ssh -q $SSH_OPTS $SSH_KEY $USER @$NODE " mkdir $PPSS_HOME_DIR >> /dev/null 2>&1 "
scp -q $SSH_OPTS $SSH_KEY $0 $USER @$NODE :~/$PPSS_HOME_DIR
2009-03-13 10:12:10 +00:00
set_error $?
2009-03-21 19:57:57 +00:00
scp -q $SSH_OPTS $SSH_KEY $KEY $USER @$NODE :~/$PPSS_HOME_DIR
2009-03-13 10:12:10 +00:00
set_error $?
2009-03-21 19:57:57 +00:00
scp -q $SSH_OPTS $SSH_KEY $CONFIG $USER @$NODE :~/$PPSS_HOME_DIR
2009-03-13 10:12:10 +00:00
set_error $?
2009-03-21 19:57:57 +00:00
scp -q $SSH_OPTS $SSH_KEY known_hosts $USER @$NODE :~/$PPSS_HOME_DIR
2009-03-13 10:12:10 +00:00
set_error $?
2009-03-21 22:37:38 +00:00
if [ ! -z " $SCRIPT " ]
then
scp -q $SSH_OPTS $SSH_KEY $SCRIPT $USER @$NODE :~/$PPSS_HOME_DIR
set_error $?
fi
2009-03-21 19:57:57 +00:00
2009-03-13 10:12:10 +00:00
if [ ! -z " $INPUT_FILE " ]
then
2009-03-21 22:37:38 +00:00
scp -q $SSH_OPTS $SSH_KEY $INPUT_FILE $USER @$NODE :~/$PPSS_HOME_DIR
set_error $?
2009-03-13 10:12:10 +00:00
fi
if [ " $ERROR " = = "0" ]
then
log INFO " PPSS installed on node $NODE . "
else
log INFO " PPSS failed to install on $NODE . "
fi
}
deploy_ppss ( ) {
2009-03-08 15:18:15 +00:00
if [ -z " $NODES_FILE " ]
then
log INFO "ERROR - are you using the right option? -C ?"
cleanup
exit 1
fi
2009-02-08 21:36:40 +00:00
KEY = ` echo $SSH_KEY | cut -d " " -f 2`
if [ -z " $KEY " ] || [ ! -e " $KEY " ]
then
log INFO "ERROR - nodes require a key file."
2009-02-28 17:44:02 +00:00
cleanup
exit 1
fi
2009-03-21 22:37:38 +00:00
if [ ! -e " $SCRIPT " ] && [ ! -z " $SCRIPT " ]
2009-02-28 17:44:02 +00:00
then
log INFO " ERROR - script $SCRIPT not found. "
cleanup
2009-02-08 21:36:40 +00:00
exit 1
fi
if [ ! -e " $NODES_FILE " ]
then
log INFO " ERROR file $NODES with list of nodes does not exist. "
2009-02-28 17:44:02 +00:00
cleanup
2009-02-08 21:36:40 +00:00
exit 1
else
for NODE in ` cat $NODES_FILE `
do
2009-03-13 10:12:10 +00:00
deploy " $NODE " &
2009-02-08 21:36:40 +00:00
done
fi
}
start_ppss_on_node ( ) {
2009-02-28 08:40:11 +00:00
NODE = " $1 "
2009-02-08 21:36:40 +00:00
2009-02-28 16:39:32 +00:00
log INFO " Starting PPSS on node $NODE . "
2009-03-21 19:57:57 +00:00
ssh $SSH_OPTS $SSH_KEY $USER @$NODE " cd $PPSS_HOME_DIR ; screen -d -m -S PPSS ./ppss.sh node --config $CONFIG "
2009-02-08 21:36:40 +00:00
}
2009-01-24 22:24:26 +00:00
test_server ( ) {
2009-01-24 23:36:18 +00:00
# Testing if the remote server works as expected.
2009-01-24 22:24:26 +00:00
if [ ! -z " $SSH_SERVER " ]
then
2009-01-25 03:36:21 +00:00
exec_cmd "date >> /dev/null"
2009-01-25 00:07:03 +00:00
check_status " $? " " $FUNCNAME " " Server $SSH_SERVER could not be reached "
2009-01-24 22:24:26 +00:00
2009-02-28 15:51:00 +00:00
ssh -N -M $SSH_OPTS $SSH_KEY $USER @$SSH_SERVER &
2009-01-24 23:36:18 +00:00
SSH_MASTER_PID = " $! "
2009-01-24 22:24:26 +00:00
else
log DEBUG "No remote server specified, assuming stand-alone mode."
fi
}
2009-01-10 12:32:40 +00:00
get_no_of_cpus ( ) {
2009-01-05 10:26:10 +00:00
# Use hyperthreading or not?
HPT = $1
NUMBER = ""
if [ -z " $HPT " ]
then
2009-03-10 20:36:33 +00:00
HPT = yes
2009-01-05 10:26:10 +00:00
fi
2009-01-10 12:32:40 +00:00
got_cpu_info ( ) {
2009-01-09 20:25:44 +00:00
ERROR = " $1 "
2009-03-01 20:32:28 +00:00
check_status " $ERROR " " $FUNCNAME " "cannot determine number of cpu cores. Specify with -p."
2009-01-09 20:25:44 +00:00
}
2009-01-05 10:26:10 +00:00
if [ " $HPT " = = "yes" ]
then
2009-02-28 17:44:02 +00:00
if [ " $ARCH " = = "Linux" ]
2009-01-05 10:26:10 +00:00
then
2009-03-10 20:36:33 +00:00
NUMBER = ` grep ^processor $CPUINFO | wc -l`
2009-01-09 20:25:44 +00:00
got_cpu_info " $? "
2009-02-28 17:44:02 +00:00
elif [ " $ARCH " = = "Darwin" ]
2009-01-05 10:26:10 +00:00
then
NUMBER = ` sysctl -a hw | grep -w logicalcpu | awk '{ print $2 }' `
2009-01-09 20:25:44 +00:00
got_cpu_info " $? "
2009-02-28 17:44:02 +00:00
elif [ " $ARCH " = = "FreeBSD" ]
2009-01-09 20:25:44 +00:00
then
NUMBER = ` sysctl hw.ncpu | awk '{ print $2 }' `
got_cpu_info " $? "
2009-01-05 10:26:10 +00:00
else
2009-03-10 20:36:33 +00:00
NUMBER = ` grep ^processor $CPUINFO | wc -l`
2009-01-09 20:25:44 +00:00
got_cpu_info " $? "
2009-01-05 10:26:10 +00:00
fi
2009-03-15 00:35:53 +00:00
log INFO " Found $NUMBER logic processors. "
2009-01-05 10:26:10 +00:00
elif [ " $HPT " = = "no" ]
then
2009-03-13 10:12:10 +00:00
log INFO "Hyperthreading is disabled."
2009-02-28 17:44:02 +00:00
if [ " $ARCH " = = "Linux" ]
2009-01-05 10:26:10 +00:00
then
2009-03-10 20:36:33 +00:00
PHYSICAL = ` grep 'physical id' $CPUINFO `
2009-02-27 18:17:58 +00:00
if [ " $? " = = "0" ]
then
2009-03-10 20:36:33 +00:00
PHYSICAL = ` grep 'physical id' $CPUINFO | sort | uniq | wc -l`
2009-03-13 10:12:10 +00:00
if [ " $PHYSICAL " = = "1" ]
then
log INFO " Found $PHYSICAL physical CPU. "
else
log INFO " Found $PHYSICAL physical CPUs. "
fi
2009-03-10 21:32:12 +00:00
TMP = ` grep 'core id' $CPUINFO `
2009-03-10 20:36:33 +00:00
if [ " $? " = = "0" ]
then
2009-03-10 21:32:12 +00:00
log DEBUG "Starting job only for each physical core on all physical CPU(s)."
NUMBER = ` grep 'core id' $CPUINFO | sort | uniq | wc -l`
2009-03-13 10:12:10 +00:00
log INFO " Found $NUMBER physical cores. "
2009-03-10 20:36:33 +00:00
else
2009-03-13 10:12:10 +00:00
log INFO "Single core processor(s) detected."
log INFO "Starting job for each physical CPU."
2009-03-10 20:36:33 +00:00
NUMBER = $PHYSICAL
fi
2009-02-27 18:17:58 +00:00
else
2009-03-13 10:12:10 +00:00
log INFO " No 'physical id' section found in $CPUINFO , is this a bug?. "
2009-03-10 20:36:33 +00:00
NUMBER = ` grep ^processor $CPUINFO | wc -l`
2009-02-27 18:17:58 +00:00
got_cpu_info " $? "
fi
2009-02-28 17:44:02 +00:00
elif [ " $ARCH " = = "Darwin" ]
2009-01-05 10:26:10 +00:00
then
NUMBER = ` sysctl -a hw | grep -w physicalcpu | awk '{ print $2 }' `
2009-01-09 20:25:44 +00:00
got_cpu_info " $? "
2009-02-28 17:44:02 +00:00
elif [ " $ARCH " = = "FreeBSD" ]
2009-01-09 20:25:44 +00:00
then
NUMBER = ` sysctl hw.ncpu | awk '{ print $2 }' `
got_cpu_info " $? "
2009-01-05 10:26:10 +00:00
else
2009-03-10 20:36:33 +00:00
NUMBER = ` cat $CPUINFO | grep "cpu cores" | cut -d ":" -f 2 | uniq | sed -e s/\ //g`
2009-01-09 20:25:44 +00:00
got_cpu_info " $? "
2009-01-05 10:26:10 +00:00
fi
fi
if [ ! -z " $NUMBER " ]
then
2009-03-13 10:12:10 +00:00
MAX_NO_OF_RUNNING_JOBS = $NUMBER
2009-01-05 10:26:10 +00:00
else
log INFO " $FUNCNAME ERROR - number of CPUs not obtained. "
exit 1
fi
}
2009-01-10 12:32:40 +00:00
random_delay ( ) {
2009-01-04 11:30:38 +00:00
ARGS = " $1 "
if [ -z " $ARGS " ]
then
log ERROR " $FUNCNAME Function random delay, no argument specified. "
exit 1
fi
NUMBER = $RANDOM
let " NUMBER %= $ARGS "
sleep " $NUMBER "
}
2009-01-24 22:24:26 +00:00
2009-01-10 12:32:40 +00:00
global_lock ( ) {
2009-01-04 11:30:38 +00:00
2009-01-25 12:12:22 +00:00
mkdir $GLOBAL_LOCK > /dev/null 2>& 1
2009-01-24 22:24:26 +00:00
ERROR = " $? "
2009-01-04 11:30:38 +00:00
if [ ! " $ERROR " = = "0" ]
then
return 1
else
return 0
fi
}
2009-01-10 12:32:40 +00:00
get_global_lock ( ) {
2009-01-04 11:30:38 +00:00
while true
do
global_lock
ERROR = " $? "
if [ ! " $ERROR " = = "0" ]
then
random_delay $MAX_DELAY
continue
else
break
fi
done
}
2009-01-10 12:32:40 +00:00
release_global_lock ( ) {
2009-01-04 11:30:38 +00:00
2009-01-25 12:12:22 +00:00
rm -rf " $GLOBAL_LOCK "
2009-01-04 11:30:38 +00:00
}
2009-01-10 12:32:40 +00:00
are_jobs_running ( ) {
2009-01-04 11:30:38 +00:00
NUMBER_OF_PROCS = ` jobs | wc -l`
if [ " $NUMBER_OF_PROCS " -gt "1" ]
then
return 0
else
return 1
fi
}
2009-01-25 23:20:59 +00:00
download_item ( ) {
2009-01-25 17:59:28 +00:00
2009-03-21 20:34:05 +00:00
ITEM = " $1 "
ITEM_NO_PATH = ` basename " $ITEM " `
2009-01-25 17:59:28 +00:00
if [ " $TRANSFER_TO_SLAVE " = = "1" ]
then
2009-03-21 20:34:05 +00:00
log DEBUG " Transfering item $ITEM_NO_PATH to local disk. "
2009-03-21 19:57:57 +00:00
if [ " $SECURE_COPY " = = "1" ] && [ ! -z " $SSH_SERVER " ]
2009-01-25 17:59:28 +00:00
then
2009-03-22 09:40:31 +00:00
ITEM_ESCAPED = ` echo " $ITEM " | sed s:\\ :\\ \\ \\ \\ \ :g`
scp -q $SSH_OPTS $SSH_KEY $USER @$SSH_SERVER :" $ITEM_ESCAPED " ./$PPSS_LOCAL_TMPDIR
2009-03-21 19:57:57 +00:00
log DEBUG " Exit code of remote transfer is $? "
2009-01-25 17:59:28 +00:00
else
2009-03-21 20:45:05 +00:00
cp " $ITEM " ./$PPSS_LOCAL_TMPDIR
2009-03-21 19:57:57 +00:00
log DEBUG " Exit code of local transfer is $? "
2009-01-25 23:20:59 +00:00
fi
2009-03-21 19:57:57 +00:00
else
2009-03-21 20:34:05 +00:00
log DEBUG " No transfer of item $ITEM_NO_PATH to local workpath. "
2009-01-25 23:20:59 +00:00
fi
}
upload_item ( ) {
2009-03-22 09:40:31 +00:00
2009-01-25 23:20:59 +00:00
ITEM = " $1 "
2009-03-22 09:40:31 +00:00
if [ " $TRANSFER_TO_SLAVE " = = "0" ]
then
log DEBUG "File transfer is disabled."
return 0
fi
#if [ ! -z "$INPUT_FILE" ]
#then
# ITEM_FILE=`basename "$ITEM"`
#fi
2009-03-01 20:32:28 +00:00
log DEBUG " Uploading item $ITEM . "
if [ " $SECURE_COPY " = = "1" ]
2009-01-25 23:20:59 +00:00
then
2009-03-22 09:40:31 +00:00
# ITEM_ESCAPED=`echo "$ITEM" | sed s:\\ :\\\\\\\\\ :g`
#log DEBUG "ITEM_ESCAPED = $ITEM_ESCAPED"
scp $SSH_KEY " $ITEM " $USER @$SSH_SERVER :$REMOTE_OUTPUT_DIR
2009-03-01 20:32:28 +00:00
ERROR = " $? "
if [ ! " $ERROR " = = "0" ]
2009-01-25 23:20:59 +00:00
then
2009-03-22 09:40:31 +00:00
log INFO " ERROR - uploading of $ITEM via SCP failed. "
2009-03-01 21:18:52 +00:00
else
log DEBUG " Upload of item $ITEM success "
2009-03-21 20:50:37 +00:00
rm " $ITEM "
2009-01-25 17:59:28 +00:00
fi
2009-01-25 23:20:59 +00:00
else
2009-03-02 20:57:28 +00:00
cp " $ITEM " $REMOTE_OUTPUT_DIR
2009-03-01 20:32:28 +00:00
ERROR = " $? "
if [ ! " $ERROR " = = "0" ]
then
2009-03-22 09:40:31 +00:00
log DEBUG " ERROR - uploading of $ITEM vi CP failed. "
2009-03-01 20:32:28 +00:00
fi
2009-01-25 17:59:28 +00:00
fi
}
2009-01-25 12:12:22 +00:00
lock_item ( ) {
2009-02-08 16:41:34 +00:00
if [ ! -z " $SSH_SERVER " ]
then
ITEM = " $1 "
2009-03-22 09:40:31 +00:00
LOCK_FILE_NAME = ` echo " $ITEM " | sed s/^\\ \. //g | sed s/^\\ \. \\ \. //g | sed s/\\ \/ //g | sed s/\\ //g | sed s/\\ '/\\\\\\\\\\\\\\' /g | sed s/\& /\\ \\ \\ \\ \\ \\ \\ & /g | sed s/\( /\\ \\ \\ \\ \\ ( /g | sed s/\) /\\ \\ \\ \\ \\ ) /g `
echo " ---> $LOCK_FILE_NAME "
2009-02-08 16:41:34 +00:00
ITEM_LOCK_FILE = " $ITEM_LOCK_DIR / $LOCK_FILE_NAME "
2009-03-21 23:30:15 +00:00
log DEBUG " Trying to lock item $ITEM - $ITEM_LOCK_FILE . "
2009-02-08 16:41:34 +00:00
exec_cmd " mkdir $ITEM_LOCK_FILE >> /dev/null 2>&1 "
ERROR = " $? "
return " $ERROR "
fi
2009-01-25 12:12:22 +00:00
}
2009-01-10 12:32:40 +00:00
get_all_items ( ) {
2009-01-04 11:30:38 +00:00
count = 0
if [ -z " $INPUT_FILE " ]
then
2009-01-24 23:49:16 +00:00
if [ ! -z " $SSH_SERVER " ] # Are we running stand-alone or as a slave?"
2009-01-24 22:24:26 +00:00
then
2009-01-25 03:36:21 +00:00
ITEMS = ` exec_cmd " ls -1 $SRC_DIR " `
check_status " $? " " $FUNCNAME " "Could not list files within remote source directory."
2009-01-24 22:24:26 +00:00
else
ITEMS = ` ls -1 $SRC_DIR `
fi
2009-03-02 20:57:28 +00:00
IFS = $'\n'
2009-01-04 11:30:38 +00:00
for x in $ITEMS
do
2009-01-05 10:26:10 +00:00
ARRAY[ $count ] = " $x "
2009-01-04 11:30:38 +00:00
( ( count++) )
done
2009-01-05 10:26:10 +00:00
IFS = $IFS_BACKUP
2009-01-04 11:30:38 +00:00
else
2009-01-24 23:49:16 +00:00
if [ ! -z " $SSH_SERVER " ] # Are we running stand-alone or as a slave?"
2009-01-24 22:24:26 +00:00
then
2009-02-28 17:44:02 +00:00
log DEBUG "Running as slave, input file has been pushed (hopefully)."
if [ ! -e " $INPUT_FILE " ]
then
log INFO " ERROR - input file $INPUT_FILE does not exist. "
2009-03-02 20:57:28 +00:00
cleanup
exit 1
2009-02-28 17:44:02 +00:00
fi
2009-01-24 22:24:26 +00:00
fi
2009-01-25 17:59:28 +00:00
2009-03-21 20:17:01 +00:00
exec 10<" $INPUT_FILE "
2009-01-04 11:30:38 +00:00
while read LINE <& 10
do
ARRAY[ $count ] = $LINE
( ( count++) )
done
fi
exec 10>& -
SIZE_OF_ARRAY = " ${# ARRAY [@] } "
if [ " $SIZE_OF_ARRAY " -le "0" ]
then
2009-03-02 20:57:28 +00:00
log INFO "ERROR: source file/dir seems to be empty."
2009-01-24 19:06:11 +00:00
cleanup
2009-01-04 11:30:38 +00:00
exit 1
fi
}
2009-01-10 12:32:40 +00:00
get_item ( ) {
2009-01-04 11:30:38 +00:00
2009-02-08 16:41:34 +00:00
check_for_interrupt
if [ " $STOP " = = "1" ]
then
return 1
fi
2009-01-04 11:30:38 +00:00
get_global_lock
SIZE_OF_ARRAY = " ${# ARRAY [@] } "
# Return error if the array is empty.
if [ " $SIZE_OF_ARRAY " -le "0" ]
then
release_global_lock
return 1
fi
# This variable is used to walk thtough all array items.
2009-01-25 12:12:22 +00:00
ARRAY_POINTER = ` cat $ARRAY_POINTER_FILE `
2009-01-04 11:30:38 +00:00
# Gives a status update on the current progress..
2009-02-28 19:05:20 +00:00
PERCENT = $(( 100 * $ARRAY_POINTER / $SIZE_OF_ARRAY ))
2009-01-04 11:30:38 +00:00
log INFO " Currently $PERCENT percent complete. Processed $ARRAY_POINTER of $SIZE_OF_ARRAY items. "
2009-03-22 09:40:31 +00:00
#echo -en "\033[1A"
2009-01-04 11:30:38 +00:00
# Check if all items have been processed.
if [ " $ARRAY_POINTER " -ge " $SIZE_OF_ARRAY " ]
then
release_global_lock
return 2
fi
# Select an item.
ITEM = " ${ ARRAY [ $ARRAY_POINTER ] } "
if [ -z " $ITEM " ]
then
( ( ARRAY_POINTER++) )
2009-01-25 12:12:22 +00:00
echo $ARRAY_POINTER > $ARRAY_POINTER_FILE
2009-01-04 11:30:38 +00:00
release_global_lock
get_item
else
( ( ARRAY_POINTER++) )
2009-01-25 12:12:22 +00:00
echo $ARRAY_POINTER > $ARRAY_POINTER_FILE
lock_item " $ITEM "
if [ ! " $? " = = "0" ]
then
2009-01-25 23:20:59 +00:00
log DEBUG " Item $ITEM is locked. "
2009-01-25 12:12:22 +00:00
release_global_lock
get_item
else
2009-02-28 16:39:32 +00:00
log DEBUG " Got lock on $ITEM , processing. "
2009-01-25 12:12:22 +00:00
release_global_lock
2009-01-25 23:20:59 +00:00
download_item " $ITEM "
2009-01-25 12:12:22 +00:00
return 0
fi
2009-01-04 11:30:38 +00:00
fi
}
2009-01-10 12:32:40 +00:00
start_single_worker ( ) {
2009-01-25 12:12:22 +00:00
2009-01-04 11:30:38 +00:00
get_item
ERROR = $?
if [ ! " $ERROR " = = "0" ]
2009-01-25 12:12:22 +00:00
then
2009-01-04 11:30:38 +00:00
log DEBUG "Item empty, we are probably almost finished."
return 1
else
get_global_lock
echo " $ITEM " > $FIFO
release_global_lock
return 0
fi
}
2009-03-02 20:57:28 +00:00
elapsed ( ) {
BEFORE = " $1 "
AFTER = " $2 "
ELAPSED = " $( expr $AFTER - $BEFORE ) "
REMAINDER = " $( expr $ELAPSED % 3600) "
HOURS = " $( expr $( expr $ELAPSED - $REMAINDER ) / 3600) "
SECS = " $( expr $REMAINDER % 60) "
MINS = " $( expr $( expr $REMAINDER - $SECS ) / 60) "
echo " Elapsed time (h:m:s): $HOURS : $MINS : $SECS "
}
2009-01-10 12:32:40 +00:00
commando ( ) {
2009-01-04 11:30:38 +00:00
ITEM = " $1 "
2009-03-21 20:17:01 +00:00
ITEM_NO_PATH = ` basename " $ITEM " `
2009-03-13 10:12:10 +00:00
OUTPUT_DIR = $PPSS_LOCAL_OUTPUT /" $ITEM_NO_PATH "
2009-03-22 09:40:31 +00:00
# This VAR can be used in scripts or command lines.
2009-03-13 10:12:10 +00:00
OUTPUT_FILE = " $ITEM_NO_PATH "
2009-03-01 20:32:28 +00:00
2009-02-28 15:51:00 +00:00
log DEBUG " Processing item $ITEM "
2009-01-04 11:30:38 +00:00
2009-03-21 20:17:01 +00:00
if [ " $TRANSFER_TO_SLAVE " = = "0" ]
2009-01-24 19:06:11 +00:00
then
2009-03-21 20:17:01 +00:00
if [ -z " $SRC_DIR " ] && [ ! -z " $INPUT_FILE " ]
then
log DEBUG "Using item straight from INPUT FILE"
else
ITEM = " $SRC_DIR / $ITEM "
fi
2009-01-25 17:59:28 +00:00
else
2009-03-21 20:45:05 +00:00
ITEM = " ./ $PPSS_LOCAL_TMPDIR / $ITEM_NO_PATH "
2009-01-24 19:06:11 +00:00
fi
2009-03-01 16:10:53 +00:00
LOG_FILE_NAME = ` echo " $ITEM " | sed s/^\\ \. //g | sed s/^\\ \. \\ \. //g | sed s/\\ \/ //g`
2009-01-11 21:57:00 +00:00
ITEM_LOG_FILE = " $JOB_LOG_DIR / $LOG_FILE_NAME "
2009-03-13 10:12:10 +00:00
mkdir -p " $OUTPUT_DIR "
2009-03-01 20:32:28 +00:00
2009-01-25 12:12:22 +00:00
does_file_exist " $ITEM_LOG_FILE "
2009-01-25 03:36:21 +00:00
if [ " $? " = = "0" ]
2009-01-04 11:30:38 +00:00
then
2009-01-11 21:57:00 +00:00
log DEBUG " Skipping item $ITEM - already processed. "
2009-01-04 11:30:38 +00:00
else
2009-01-11 21:57:00 +00:00
2009-03-01 16:10:53 +00:00
ERROR = ""
2009-03-01 16:47:44 +00:00
# Some formatting of item log files.
DATE = ` date +%b\ %d\ %H:%M:%S`
2009-03-02 20:57:28 +00:00
echo "===== PPSS Item Log File =====" > " $ITEM_LOG_FILE "
echo -e " Host:\t\t $HOSTNAME " >> " $ITEM_LOG_FILE "
echo -e " Item:\t\t $ITEM " >> " $ITEM_LOG_FILE "
echo -e " Start date:\t $DATE " >> " $ITEM_LOG_FILE "
echo -e "" >> " $ITEM_LOG_FILE "
2009-03-01 16:47:44 +00:00
# The actual execution of the command.
2009-03-01 16:10:53 +00:00
TMP = ` echo $COMMAND | grep -i '$ITEM' `
2009-03-01 10:17:27 +00:00
if [ " $? " = = "0" ]
then
2009-03-02 20:57:28 +00:00
BEFORE = " $( date +%s) "
2009-03-01 16:10:53 +00:00
eval " $COMMAND " >> " $ITEM_LOG_FILE " 2>& 1
ERROR = " $? "
2009-03-02 20:57:28 +00:00
AFTER = " $( date +%s) "
2009-03-01 10:17:27 +00:00
else
EXECME = '$COMMAND"$ITEM" >> "$ITEM_LOG_FILE" 2>&1'
2009-03-02 20:57:28 +00:00
BEFORE = " $( date +%s) "
2009-03-01 16:10:53 +00:00
eval " $EXECME "
ERROR = " $? "
2009-03-02 20:57:28 +00:00
AFTER = " $( date +%s) "
2009-03-01 10:17:27 +00:00
fi
2009-01-25 17:59:28 +00:00
2009-03-02 20:57:28 +00:00
echo -e "" >> " $ITEM_LOG_FILE "
2009-03-01 16:47:44 +00:00
# Some error logging. Success or fail.
if [ ! " $ERROR " = = "0" ]
2009-01-25 17:59:28 +00:00
then
2009-03-02 20:57:28 +00:00
echo -e "Status:\t\tError - something went wrong." >> " $ITEM_LOG_FILE "
2009-03-01 16:47:44 +00:00
else
2009-03-02 20:57:28 +00:00
echo -e "Status:\t\tSucces - item has been processed." >> " $ITEM_LOG_FILE "
2009-03-01 16:47:44 +00:00
fi
if [ " $TRANSFER_TO_SLAVE " = = "1" ]
2009-01-25 17:59:28 +00:00
then
2009-02-08 16:41:34 +00:00
if [ -e " $ITEM " ]
then
2009-03-21 20:50:37 +00:00
rm " $ITEM "
2009-02-08 16:41:34 +00:00
else
2009-02-28 19:27:30 +00:00
log DEBUG " ERROR Something went wrong removing item $ITEM from local work dir. "
2009-02-08 16:41:34 +00:00
fi
2009-03-01 20:32:28 +00:00
fi
2009-03-22 09:40:31 +00:00
#upload_item "$PPSS_LOCAL_OUTPUT/$ITEM_NO_PATH/*"
2009-03-02 20:57:28 +00:00
elapsed " $BEFORE " " $AFTER " >> " $ITEM_LOG_FILE "
echo -e "" >> " $ITEM_LOG_FILE "
2009-01-04 11:30:38 +00:00
2009-01-25 12:12:22 +00:00
if [ ! -z " $SSH_SERVER " ]
then
2009-02-28 15:51:00 +00:00
log DEBUG " Uploading item log file $ITEM_LOG_FILE to master. "
2009-03-22 09:40:31 +00:00
scp -q $SSH_OPTS $SSH_KEY " $ITEM_LOG_FILE " $USER @$SSH_SERVER :~/$JOB_LOG_DIR /
if [ ! " $? " = = "0" ]
then
log INFO "ERROR - uploading of item log file failed."
fi
2009-01-25 12:12:22 +00:00
fi
2009-01-24 23:36:18 +00:00
fi
2009-01-24 22:24:26 +00:00
2009-01-04 11:30:38 +00:00
start_single_worker
return $?
}
2009-02-28 15:51:00 +00:00
# This is the listener service. It listens on the pipe for events.
# A job is executed for every event received.
2009-01-10 12:32:40 +00:00
listen_for_job ( ) {
2009-01-04 11:30:38 +00:00
log INFO "Listener started."
while read event <& 42
do
commando " $event " &
done
}
2009-01-10 12:32:40 +00:00
# This starts an number of parallel workers based on the # of parallel jobs allowed.
start_all_workers ( ) {
2009-01-04 11:30:38 +00:00
2009-02-28 16:39:32 +00:00
if [ " $MAX_NO_OF_RUNNING_JOBS " = = "1" ]
then
log INFO " Starting $MAX_NO_OF_RUNNING_JOBS worker. "
else
log INFO " Starting $MAX_NO_OF_RUNNING_JOBS workers. "
fi
2009-01-04 11:30:38 +00:00
i = 0
while [ " $i " -lt " $MAX_NO_OF_RUNNING_JOBS " ]
do
start_single_worker
( ( i++) )
done
}
2009-02-28 18:42:16 +00:00
show_status ( ) {
source $CONFIG
if [ ! -z " $SSH_KEY " ]
then
SSH_KEY = " -i $SSH_KEY "
fi
if [ -z " $INPUT_FILE " ]
then
2009-03-21 19:57:57 +00:00
ITEMS = ` exec_cmd " ls -1 $SRC_DIR | wc -l " `
2009-02-28 18:42:16 +00:00
else
2009-03-21 19:57:57 +00:00
ITEMS = ` exec_cmd " cat $INPUT_FILE | wc -l " `
2009-02-28 18:42:16 +00:00
fi
2009-03-21 19:57:57 +00:00
PROCESSED = ` exec_cmd " ls -1 $ITEM_LOCK_DIR | wc -l " ` 2>& 1 >> /dev/null
2009-02-28 18:42:16 +00:00
STATUS = $(( 100 * $PROCESSED / $ITEMS ))
2009-02-28 19:05:20 +00:00
log INFO " $STATUS percent complete. "
2009-02-28 18:42:16 +00:00
}
2009-01-04 11:30:38 +00:00
2009-01-10 12:32:40 +00:00
# If this is called, the whole framework will execute.
main ( ) {
2009-01-04 11:30:38 +00:00
is_running
2009-02-08 16:41:34 +00:00
2009-03-10 21:32:12 +00:00
2009-02-08 16:41:34 +00:00
case $MODE in
2009-03-04 21:18:05 +00:00
node| standalone )
2009-02-28 18:42:16 +00:00
init_vars
test_server
get_all_items
listen_for_job " $MAX_NO_OF_RUNNING_JOBS " &
LISTENER_PID = $!
start_all_workers
; ;
2009-03-08 15:18:15 +00:00
start )
2009-02-28 18:42:16 +00:00
# This option only starts all nodes.
2009-03-12 20:48:14 +00:00
display_header
2009-02-28 18:42:16 +00:00
if [ ! -e " $NODES_FILE " ]
then
log INFO " ERROR file $NODES with list of nodes does not exist. "
cleanup
exit 1
else
for NODE in ` cat $NODES_FILE `
do
start_ppss_on_node " $NODE "
done
fi
2009-03-01 10:17:27 +00:00
cleanup
exit 0
2009-02-28 18:42:16 +00:00
; ;
2009-02-28 08:40:11 +00:00
config )
2009-03-12 20:48:14 +00:00
display_header
2009-02-28 18:42:16 +00:00
log INFO " Generating configuration file $CONFIG "
2009-03-01 10:17:27 +00:00
add_var_to_config PPSS_LOCAL_TMPDIR " $PPSS_LOCAL_TMPDIR "
add_var_to_config PPSS_LOCAL_OUTPUT " $PPSS_LOCAL_OUTPUT "
2009-02-28 18:42:16 +00:00
cleanup
exit 0
; ;
2009-02-28 08:40:11 +00:00
2009-02-08 21:36:40 +00:00
stop )
2009-03-12 20:48:14 +00:00
display_header
2009-03-08 15:18:15 +00:00
log INFO "Stopping PPSS on all nodes."
exec_cmd " touch $STOP_SIGNAL "
cleanup
exit
; ;
pause )
2009-03-12 20:48:14 +00:00
display_header
2009-03-08 15:18:15 +00:00
log INFO "Pausing PPSS on all nodes."
exec_cmd " touch $PAUSE_SIGNAL "
cleanup
exit
; ;
continue )
2009-03-12 20:48:14 +00:00
display_header
2009-03-08 15:18:15 +00:00
if does_file_exist " $STOP_SIGNAL "
then
log INFO " Continuing processing, please use $0 start to start PPSS on al nodes. "
exec_cmd " rm -f $STOP_SIGNAL "
fi
if does_file_exist " $PAUSE_SIGNAL "
then
log INFO "Continuing PPSS on all nodes."
exec_cmd " rm -f $PAUSE_SIGNAL "
fi
cleanup
exit
2009-02-28 18:42:16 +00:00
; ;
deploy )
2009-03-12 20:48:14 +00:00
display_header
2009-03-08 22:49:23 +00:00
log INFO "Deploying PPSS on nodes."
2009-02-28 18:42:16 +00:00
deploy_ppss
2009-03-13 10:12:10 +00:00
wait
2009-02-28 18:42:16 +00:00
cleanup
exit 0
; ;
status )
2009-03-12 20:48:14 +00:00
display_header
2009-02-28 18:42:16 +00:00
show_status
2009-02-28 19:27:30 +00:00
cleanup
exit 0
2009-02-28 18:42:16 +00:00
# some show command
; ;
erase )
2009-03-12 20:48:14 +00:00
display_header
2009-03-08 22:49:23 +00:00
log INFO "Erasing PPSS from all nodes."
2009-02-28 18:42:16 +00:00
erase_ppss
cleanup
exit 0
2009-02-28 19:05:20 +00:00
; ;
2009-03-22 09:40:31 +00:00
kill )
for x in ` ps ux | grep ppss | grep -v grep | grep bash | awk '{ print $2 }' `
do
kill " $x "
done
cleanup
exit 0
; ;
2009-02-08 16:41:34 +00:00
* )
2009-02-28 18:42:16 +00:00
showusage
2009-02-28 19:05:20 +00:00
exit 1
; ;
2009-02-08 16:41:34 +00:00
esac
2009-01-04 11:30:38 +00:00
}
2009-01-10 12:32:40 +00:00
# This command starts the that sets the whole framework in motion.
2009-01-04 11:30:38 +00:00
main
2009-02-28 17:44:02 +00:00
# Either start new jobs or exit, sleep in the meantime.
while true
do
sleep 5
JOBS = ` ps ax | grep -v grep | grep -v -i screen | grep ppss.sh | wc -l`
2009-03-10 21:32:12 +00:00
log DEBUG " There are $JOBS running processes. "
2009-02-28 17:44:02 +00:00
MIN_JOBS = 3
if [ " $ARCH " = = "Darwin" ]
then
MIN_JOBS = 4
elif [ " $ARCH " = = "Linux" ]
then
MIN_JOBS = 3
fi
if [ " $JOBS " -gt " $MIN_JOBS " ]
then
2009-03-10 21:32:12 +00:00
log DEBUG " Sleeping $INTERVAL seconds. "
2009-02-28 17:44:02 +00:00
sleep $INTERVAL
else
echo -en "\033[1B"
log INFO "There are no more running jobs, so we must be finished."
echo -en "\033[1B"
log INFO "Killing listener and remainig processes."
log INFO "Dying processes may display an error message."
kill_process
fi
done
# Exit after all processes have finished.
2009-01-04 11:30:38 +00:00
wait