ppss/ppss

2884 lines
82 KiB
Plaintext
Raw Normal View History

2010-01-31 22:31:22 +00:00
#!/usr/bin/env bash
#
# PPSS, the Parallel Processing Shell Script
#
# Copyright (c) 2010, Louwrentius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# See <http://www.gnu.org/licenses/>
# for a copy of the GNU General Public License
#
# "Patches or other contributions are always welcome!"
#
#
2010-01-31 22:31:22 +00:00
# Handling control-c for a clean shutdown.
#
2010-01-31 22:31:22 +00:00
trap 'kill_process' SIGINT
2010-06-13 14:39:04 +00:00
2010-01-31 22:31:22 +00:00
SCRIPT_NAME="Distributed Parallel Processing Shell Script"
2011-08-06 18:08:41 +00:00
SCRIPT_VERSION="2.90"
2010-01-31 22:31:22 +00:00
#
2010-01-31 22:31:22 +00:00
# The first argument to this script can be a mode.
#
2011-08-06 18:08:41 +00:00
MODES="node start config stop pause continue deploy status erase kill"
2010-01-31 22:31:22 +00:00
for x in $MODES
do
if [ "$x" == "$1" ]
then
MODE="$1"
shift
break
2010-01-31 22:31:22 +00:00
fi
done
#
2010-01-31 22:31:22 +00:00
# The working directory of PPSS can be set with
# export PPSS_DIR=/path/to/workingdir
#
2010-01-31 22:31:22 +00:00
if [ -z "$PPSS_DIR" ]
then
PPSS_DIR="ppss_dir"
2010-01-31 22:31:22 +00:00
fi
CONFIG=""
2010-02-10 23:54:09 +00:00
HOSTNAME="`hostname`"
ARCH="`uname`"
2010-02-18 19:37:07 +00:00
PPSS_HOME_DIR="ppss-home"
SOURCED="$0"
2010-01-31 22:31:22 +00:00
PID="$$"
PAUSE_SIGNAL="$PPSS_HOME_DIR/$PPSS_DIR/pause_signal" # Pause processing if this file is present.
2010-02-10 23:54:09 +00:00
PAUSE_DELAY="60" # Polling every 1 minutes by default.
STOP_SIGNAL="$PPSS_HOME_DIR/$PPSS_DIR/stop_signal" # Stop processing if this file is present.
2010-06-27 08:59:22 +00:00
GLOBAL_COUNTER=1
LISTOFITEMS="$PPSS_DIR/INPUT_FILE-$PID"
2010-01-31 22:31:22 +00:00
JOB_LOG_DIR="$PPSS_DIR/job_log" # Directory containing log files of processed items.
LOGFILE="$PPSS_DIR/ppss-log-$PID.txt" # General PPSS log file. Contains lots of info.
2010-04-10 22:35:22 +00:00
QUIET="0"
2010-02-10 23:54:09 +00:00
STOP="0" # STOP job.
MAX_DELAY="0" # MAX DELAY between jobs.
MAX_LOCK_DELAY="9" #
2010-01-31 22:31:22 +00:00
PERCENT="0"
LISTENER_PID=""
IFS_BACKUP="$IFS"
2010-02-10 23:54:09 +00:00
CPUINFO="/proc/cpuinfo"
2010-01-31 22:31:22 +00:00
PROCESSORS=""
START_KEY="$RANDOM$RANDOM$RANDOM"
2010-02-10 23:54:09 +00:00
KILL_KEY="$RANDOM$RANDOM$RANDOM" # This is a signal to stop immediately and kill
QUEUE=""
2010-07-18 09:42:29 +00:00
INOTIFY=""
2010-02-10 23:54:09 +00:00
RECURSION="1" # all running processes.
START_PPSS=""
STOP_PPSS=""
SIZE_OF_INPUT=""
LOCAL_LOCKING="1"
LIST_OF_PROCESSED_ITEMS="$PPSS_DIR/LIST_OF_PROCESSED_ITEMS"
2010-07-14 21:06:11 +00:00
PROCESSED_ITEMS=""
UNPROCESSED_ITEMS=""
ACTIVE_WORKERS="0"
DAEMON_POLLING_INTERVAL="10"
2010-07-18 08:14:47 +00:00
STAT=""
DAEMON_FILE_AGE="4"
2010-07-21 06:24:25 +00:00
ENABLE_INPUT_LOCK="0"
2010-08-05 12:16:59 +00:00
PROCESSING_TIME=""
2011-08-06 18:08:41 +00:00
NODE_ID="NODE_ID"
2010-01-31 22:31:22 +00:00
SSH_SERVER="" # Remote server or 'master'.
SSH_KEY="" # SSH key for ssh account.
SSH_KNOWN_HOSTS=""
2010-08-05 12:16:59 +00:00
SSH_SOCKET="$PPSS_DIR/ppss_ssh_socket-$$" # Multiplex multiple SSH connections over 1 master.
2010-01-31 22:31:22 +00:00
SSH_OPTS="-o BatchMode=yes -o ControlPath=$SSH_SOCKET \
-o GlobalKnownHostsFile=./known_hosts \
-o ControlMaster=auto \
-o Cipher=blowfish \
-o ConnectTimeout=10 "
2010-02-18 20:48:25 +00:00
SSH_OPTS_NOMP="-o BatchMode=yes -o GlobalKnownHostsFile=./known_hosts \
-o Cipher=blowfish \
-o ConnectTimeout=10 "
2010-01-31 22:31:22 +00:00
# Blowfish is faster but still secure.
SSH_MASTER_PID=""
ITEM_LOCK_DIR="$PPSS_DIR/PPSS_ITEM_LOCK_DIR" # Remote directory on master used for item locking.
2010-01-31 22:31:22 +00:00
PPSS_LOCAL_TMPDIR="$PPSS_DIR/PPSS_LOCAL_TMPDIR" # Local directory on slave for local processing.
PPSS_LOCAL_OUTPUT="$PPSS_DIR/PPSS_LOCAL_OUTPUT" # Local directory on slave for local output.
DOWNLOAD_TO_NODE="0" # Transfer item to slave via (s)cp.
2010-03-07 23:17:16 +00:00
UPLOAD_TO_SERVER="0" # Transfer output back to server via (s)cp.
2010-01-31 22:31:22 +00:00
SECURE_COPY="1" # If set, use SCP, Otherwise, use cp.
REMOTE_OUTPUT_DIR="" # Remote directory to which output must be uploaded.
SCRIPT="" # Custom user script that is executed by ppss.
ITEM_ESCAPED=""
DISABLE_SKIPPING=0
2011-08-06 18:08:41 +00:00
PPSS_NODE_STATUS="$PPSS_DIR/NODE_STATUS"
NODE_STATUS_FILE="$PPSS_NODE_STATUS/$HOSTNAME-status.txt"
2010-05-13 23:14:27 +00:00
DAEMON=0
2010-07-21 06:24:25 +00:00
EMAIL=""
2010-01-31 22:31:22 +00:00
REGISTER="" # For STACK
STACK=""
TMP_STACK=""
2010-01-31 22:31:22 +00:00
showusage_short () {
echo
echo "|P|P|S|S| $SCRIPT_NAME $SCRIPT_VERSION"
echo
echo "usage: $0 [ -d <sourcedir> | -f <sourcefile> ] [ -c '<command> \"\$ITEM\"' ]"
2010-02-09 22:01:05 +00:00
echo " [ -C <configfile> ] [ -j ] [ -l <logfile> ] [ -p <# jobs> ]"
2010-04-15 21:46:27 +00:00
echo " [ -q ] [ -D <delay> ] [ -h ] [ --help ] [ -r ] [ --daemon ]"
2010-01-31 22:31:22 +00:00
echo
echo "Examples:"
echo " $0 -d /dir/with/some/files -c 'gzip '"
echo " $0 -d /dir/with/some/files -c 'cp \"\$ITEM\" /tmp' -p 2"
echo " $0 -f <file> -c 'wget -q -P /destination/directory \"\$ITEM\"' -p 10"
2010-07-21 06:24:25 +00:00
echo
2010-01-31 22:31:22 +00:00
}
showusage_normal () {
echo
echo "|P|P|S|S| $SCRIPT_NAME $SCRIPT_VERSION"
echo
2010-03-08 09:21:25 +00:00
echo "PPSS is a Bash shell script that executes commands in parallel on a set"
echo "of items, such as files in a directory, or lines in a file. The purpose"
echo "of PPSS is to make it simple to benefit from multiple CPUs or CPU cores."
2010-01-31 22:31:22 +00:00
echo
2010-03-08 09:21:25 +00:00
echo "This short summary only discusses options for stand-alone mode. For a"
echo "full listing of all options, run PPSS with the options --help"
2010-01-31 22:31:22 +00:00
echo
echo "Usage $0 [ options ]"
echo
echo -e "--command | -c Command to execute. Syntax: '<command> ' including the single quotes."
echo -e " Example: -c 'ls -alh '. It is also possible to specify where an item "
echo -e " must be inserted: 'cp \"\$ITEM\" /somedir'."
echo
echo -e "--sourcedir | -d Directory that contains files that must be processed. Individual files"
echo -e " are fed as an argument to the command that has been specified with -c."
echo
echo -e "--sourcefile | -f Each single line of the supplied file will be fed as an item to the"
2010-04-03 17:33:58 +00:00
echo -e " command that has been specified with -c. Read input from stdin with"
echo -e " -f -"
2010-01-31 22:31:22 +00:00
echo
echo -e "--config | -C If the mode is config, a config file with the specified name will be"
echo -e " generated based on all the options specified. In the other modes".
echo -e " this option will result in PPSS reading the config file and start"
echo -e " processing items based on the settings of this file."
echo
2010-03-08 09:21:25 +00:00
echo -e "--disable-ht | -j Disable hyper threading. Is enabled by default."
2010-01-31 22:31:22 +00:00
echo
echo -e "--log | -l Sets the name of the log file. The default is ppss-log.txt."
echo
echo -e "--processes | -p Start the specified number of processes. Ignore the number of available"
echo -e " CPUs."
echo
2010-04-10 22:38:02 +00:00
echo -e "--quiet | -q Shows no output except for a progress indication using percents."
echo
2010-01-31 22:31:22 +00:00
echo -e "--delay | -D Adds an initial random delay to the start of all parallel jobs to spread"
2010-07-21 06:24:25 +00:00
echo -e " the load. The delay (seconds) is only used at the start of all 'threads'."
2010-02-09 22:01:05 +00:00
echo
2010-07-21 06:24:25 +00:00
echo -e "--daemon Daemon mode. Do not exit after items are professed, but keep looking "
echo -e " for new items and process them. Read the manual how to use this!"
echo -e " See --help for important additional options regarding daemon mode."
2010-04-15 21:46:27 +00:00
echo
echo -e "--no-recursion|-r By default, recursion of directories is enabled when the -d option is "
2010-02-09 22:01:05 +00:00
echo -e " used. If this is not prefered, this can be disabled with this option "
echo -e " Only files within the specified directory will be processed."
2010-01-31 22:31:22 +00:00
echo
2010-07-21 06:24:25 +00:00
echo -e "--email | -e PPSS sends an e-mail if PPSS has finished. It is also used if processing"
echo -e " of an item has failed (configurable, see -h). "
echo
2010-03-08 09:21:25 +00:00
echo -e "--help Extended help, including options for distributed mode and Amazon EC2."
echo
2010-01-31 22:31:22 +00:00
echo -e "Example: encoding some wav files to mp3 using lame:"
echo
echo -e "$0 -d /path/to/wavfiles -c 'lame '"
echo
echo -e "Extended usage: use --help"
echo
}
showusage_long () {
echo
echo "|P|P|S|S| $SCRIPT_NAME $SCRIPT_VERSION"
echo
echo "PPSS is a Bash shell script that executes commands in parallel on a set "
echo "of items, such as files in a directory, or lines in a file."
echo
echo "Usage: $0 [ MODE ] [ options ]"
echo
echo "Modes are optional and mainly used for running in distributed mode. Modes are:"
echo
echo " config Generate a config file based on the supplied option parameters."
echo " deploy Deploy PPSS and related files on the specified nodes."
echo " erase Erase PPSS and related files from the specified nodes."
echo
echo " start Starting PPSS on nodes."
echo " pause Pausing PPSS on all nodes."
echo " stop Stopping PPSS on all nodes."
2010-03-08 09:21:25 +00:00
echo " continue Continuing PPSS on all nodes."
2010-01-31 22:31:22 +00:00
echo " node Running PPSS as a node, requires additional options."
echo
echo "Options are:"
echo
echo -e "--command | -c Command to execute. Syntax: '<command> ' including the single quotes."
echo -e " Example: -c 'ls -alh '. It is also possible to specify where an item "
echo -e " must be inserted: 'cp \"\$ITEM\" /somedir'."
echo
echo -e "--sourcedir | -d Directory that contains files that must be processed. Individual files"
echo -e " are fed as an argument to the command that has been specified with -c."
echo
echo -e "--sourcefile | -f Each single line of the supplied file will be fed as an item to the"
2010-04-03 17:33:58 +00:00
echo -e " command that has been specified with -c. Instead of a file, stdin can"
echo -e " be specified like \"-f -\" in order to 'pipe' items to ppss."
echo -e " Example: cat file | ppss -f - -c 'echo '"
2010-01-31 22:31:22 +00:00
echo
echo -e "--config | -C If the mode is config, a config file with the specified name will be"
echo -e " generated based on all the options specified. In the other modes".
echo -e " this option will result in PPSS reading the config file and start"
echo -e " processing items based on the settings of this file."
echo
2010-03-08 09:21:25 +00:00
echo -e "--disable-ht | -j Disable hyper threading. Is enabled by default."
2010-01-31 22:31:22 +00:00
echo
2010-03-08 09:21:25 +00:00
echo -e "--log | -l Sets the name of the log file. The default is ppss-log-<pid>.txt."
2010-01-31 22:31:22 +00:00
echo
echo -e "--processes | -p Start the specified number of processes. Ignore the number of available"
echo -e " CPUs."
echo
2010-04-10 22:38:02 +00:00
echo -e "--quiet | -q Shows no output except for a progress indication using percents."
echo
2010-01-31 22:31:22 +00:00
echo -e "--delay | -D Adds an initial random delay to the start of all parallel jobs to spread"
echo -e " the load. The delay is only used at the start of all 'threads'."
echo
2010-04-15 21:46:27 +00:00
echo -e "--daemon Do not exit after items are professed, but keep looking for new items"
echo -e " and process them. Read the manual how to use this!"
echo
2010-07-21 06:24:25 +00:00
echo -e "--interval Specifies the polling interval when running in daemon mode. Polls every"
echo -e " x seconds for new items to process."
echo
echo -e "--file-age When not using inotify, specify how many seconds must have passed before"
echo -e " a file may be processed to prevent files being processed while being "
echo -e " written to."
echo
echo -e "--disable-inotify If for some reason, inotify must not be used, use this option to disable"
echo -e " usage of inotify. Regular polling will be used."
echo
echo -e "--enable-input-lock When PPSS is run in daemon mode, create a directory INPUT_LOCK to"
echo -e " signal that items are processed and may not be touched by PPSS."
echo -e " Once this directory is removed, PPSS will start processing items."
echo
echo -e "--no-recursion|-r By default, recursion of directories is enabled when the -d option is "
echo -e " used. If this is not prefered, this can be disabled with this option "
echo -e " Only files within the specified directory will be processed."
echo
echo -e "--no-recursion|-r By default, recursion of directories is enabled when the -d option is "
2010-03-08 09:21:25 +00:00
echo -e " used. If this is not prefered, this can be disabled with this option."
2010-02-09 22:01:05 +00:00
echo -e " Only files within the specified directory will be processed."
echo
2010-01-31 22:31:22 +00:00
echo -e "The following options are used for distributed execution of PPSS."
echo
echo -e "--master | -m Specifies the SSH server that is used for communication between nodes."
echo -e " Using SSH, file locks are created, informing other nodes that an item "
2010-03-08 09:21:25 +00:00
echo -e " is locked. If items are files that must be processed, they must reside"
echo -e " on this host. SCP is used to transfer files from this host to nodes"
echo -e " for local procesing."
2010-01-31 22:31:22 +00:00
echo
2010-03-08 09:21:25 +00:00
echo -e "--node | -n File containig a list of nodes that act as PPSS clients. One IP / DNS"
2010-01-31 22:31:22 +00:00
echo -e " name per line."
echo
echo -e "--key | -k The SSH key that a node uses to connect to the master."
echo
echo -e "--known-hosts | -K The file that contains the server public key. Can often be found on "
echo -e " hosts that already once connected to the server. See the file "
echo -e " ~/.ssh/known_hosts or else, manualy connect once and check this file."
echo
2010-03-08 09:21:25 +00:00
echo -e "--user | -u The SSH user name that is used by the node when logging in into the"
echo -e " master SSH server."
2010-01-31 22:31:22 +00:00
echo
2010-03-08 09:21:25 +00:00
echo -e "--script | -S Specifies the script/program that must be copied to the nodes for"
2010-01-31 22:31:22 +00:00
echo -e " execution through PPSS. Only used in the deploy mode."
echo -e " This option should be specified if necessary when generating a config."
echo
2010-03-08 09:21:25 +00:00
echo -e "--download This option specifies that an item will be downloaded by the node"
2010-01-31 22:31:22 +00:00
echo -e " from the server or share to the local node for processing."
echo
2010-03-07 23:17:16 +00:00
echo -e "--upload This option specifies that the output file will be copied back to"
echo -e " the server, the --outputdir option is mandatory."
echo
2010-01-31 22:31:22 +00:00
echo -e "--no-scp | -b Do not use scp for downloading items. Use cp instead. Assumes that a"
2010-03-08 09:21:25 +00:00
echo -e " network file system (NFS/SMB) is mounted under a local mount point."
2010-01-31 22:31:22 +00:00
echo
echo -e "--outputdir | -o Directory on server where processed files are put. If the result of "
echo -e " encoding a wav file is an mp3 file, the mp3 file is put in the "
echo -e " directory specified with this option."
echo
2010-03-08 09:21:25 +00:00
echo -e "--homedir | -H Directory in which PPSS is installed on the node."
2010-03-07 23:17:16 +00:00
echo -e " Default is '$PPSS_HOME_DIR'."
echo
echo -e "--script | -S Script to run on the node. PPSS must copy this script to the node."
echo
echo -e "Amazon EC2 platform specific options:"
echo
2010-03-08 09:21:25 +00:00
echo -e "--awskeypair | -P The Amazon EC2 SSH keypair that new instances should use."
echo
2010-03-08 09:21:25 +00:00
echo -e "--AMI | -A The Amazon Machine Image that should be used to create new"
echo -e " running instances."
echo
echo -e "--type | -T The type of EC2 instance that should be created."
echo -e " Example: c1.xlarge or m1.medium"
echo
2010-03-08 09:21:25 +00:00
echo -e "--security | -G The security group that should be used for networking access."
echo
2010-03-08 09:21:25 +00:00
echo -e "--instances | -I The number of instances that should be started."
echo
2010-01-31 22:31:22 +00:00
echo
echo -e "Example: encoding some wav files to mp3 using lame:"
echo
echo -e "$0 -c 'lame ' -d /path/to/wavfiles -j "
echo
echo -e "Running PPSS based on a configuration file."
echo
echo -e "$0 -C config.cfg"
echo
2010-03-07 23:17:16 +00:00
echo -e "Generating a configuration file. Wavs are converted to mp3. SCP is used for data transfer."
echo
echo -e "$0 config -C ppss-config.cfg -d /some/dir -o output --download --upload -K known_hosts \\"
echo -e "-k ppss-key.dsa -n nodes.txt -m 10.0.0.100 \\"
echo -e "-c 'lame --quiet \"\$ITEM\" -o \"\$OUTPUT_DIR/\$OUTPUT_FILE\".mp3' "
echo
2010-01-31 22:31:22 +00:00
echo -e "Running PPSS on a client as part of a cluster."
echo
2010-03-07 23:17:16 +00:00
echo -e "$0 node -d /somedir -c 'cp \"\$ITEM\" /some/destination' -m 10.0.0.50 -u ppss -k ppss-key.key"
2010-01-31 22:31:22 +00:00
echo
}
kill_process () {
echo "$KILL_KEY" >> "$FIFO"
}
exec_cmd () {
STATUS=""
2010-01-31 22:31:22 +00:00
CMD="$1"
2010-02-18 20:48:25 +00:00
NOMP="$2" # Disable multiplexing.
2010-01-31 22:31:22 +00:00
2011-02-13 22:53:30 +00:00
if [[ "$ARCH" == "FreeBSD" ]]
then
CMD="bash $CMD"
fi
2010-01-31 22:31:22 +00:00
if [ ! -z "$SSH_SERVER" ]
then
2010-02-18 20:48:25 +00:00
if [ -z "$NOMP" ]
then
# log DEBUG "REMOTE EXEC"
# log DEBUG "$USER@$SSH_SERVER $CMD"
2010-02-18 20:48:25 +00:00
ssh $SSH_OPTS $SSH_KEY $USER@$SSH_SERVER $CMD
STATUS=$?
elif [ "$NOMP" == "1" ]
then
# log DEBUG "REMOTE EXEC NO MP"
2010-02-18 20:48:25 +00:00
ssh $SSH_OPTS_NOMP $SSH_KEY $USER@$SSH_SERVER $CMD
STATUS=$?
fi
2010-01-31 22:31:22 +00:00
else
eval "$CMD"
2010-02-18 20:48:25 +00:00
STATUS=$?
# log DEBUG "LOCAL EXEC - status is $STATUS"
2010-01-31 22:31:22 +00:00
fi
2010-02-18 20:48:25 +00:00
return $STATUS
2010-01-31 22:31:22 +00:00
}
does_file_exist () {
#
# this function makes remote or local checking of existence of items transparent.
#
2010-01-31 22:31:22 +00:00
FILE="$1"
RES=`exec_cmd "ls -1 $FILE" 2>&1`
if [ "$?" = "0" ]
2010-01-31 22:31:22 +00:00
then
#log DEBUG "$FILE does exist"
2010-01-31 22:31:22 +00:00
return 0
else
#log DEBUG "$FILE does not exist"
2010-01-31 22:31:22 +00:00
return 1
fi
}
check_for_interrupt () {
#
# PPSS can be interupted with a stop or pause command.
#
2010-01-31 22:31:22 +00:00
does_file_exist "$STOP_SIGNAL"
if [ "$?" = "0" ]
2010-01-31 22:31:22 +00:00
then
set_status "STOPPED"
log INFO "STOPPING job. Stop signal found."
STOP="1"
return 1
fi
does_file_exist "$PAUSE_SIGNAL"
if [ "$?" = "0" ]
2010-01-31 22:31:22 +00:00
then
2010-08-05 12:16:59 +00:00
set_status "PAUSED"
2010-01-31 22:31:22 +00:00
log INFO "PAUSE: sleeping for $PAUSE_DELAY SECONDS."
sleep $PAUSE_DELAY
2010-01-31 22:31:22 +00:00
check_for_interrupt
else
set_status "RUNNING"
fi
}
cleanup () {
log DEBUG "$FUNCNAME - Cleaning up all temp files and processes."
2010-01-31 22:31:22 +00:00
if [ -e "$FIFO" ]
then
rm "$FIFO"
fi
if [ -e "$SSH_SOCKET" ]
then
rm -rf "$SSH_SOCKET"
fi
}
add_var_to_config () {
if [ "$MODE" == "config" ]
then
VAR="$1"
VALUE="$2"
echo -e "$VAR=$VALUE" >> $CONFIG
fi
}
is_var_empty () {
if [ -z "$1" ]
then
showusage_normal
cleanup
exit 1
fi
}
detect_source_dir_nfs_exported () {
log DEBUG "Executing $FUNCNAME"
if [ -e /etc/exports ]
then
log DEBUG "NFS /etc/exports found."
NFS=0
EXPORTS=`cat /etc/exports | grep ^/ | awk '{ print $1 }'`
for export in $EXPORTS
do
#
# If this for loop matches anything, the SRC_DIR is NFS exported.
# inotify does not play well with NFS. So it must be disabled.
#
DIRECTORY=`dirname "$SRC_DIR"`
while true
do
if [ ! "$DIRECTORY" = "/" ] && [ ! "$DIRECTORY" = "." ]
then
if [ "$export" = "$DIRECTORY" ]
then
NFS=1
break
fi
else
break
fi
DIRECTORY=`dirname "$DIRECTORY"`
done
done
fi
if [ "$NFS" = "1" ]
then
log INFO "Source directory is NFS exported. Disabling inotify."
return 1
else
log INFO "Source directory is NOT NFS exported. Enabling inotify."
return 0
fi
}
detect_inotify () {
if [ -e /usr/bin/inotifywait ] && [ ! "$INOTIFY" = "0" ] && detect_source_dir_nfs_exported
then
INOTIFY=1
2010-08-05 12:16:59 +00:00
else
INOTIFY=0
fi
}
process_arguments () {
#
# Process any command-line options that are specified."
#
if [ "$#" = "0" ]
then
showusage_short
exit 1
fi
while [ $# -gt 0 ]
do
case $1 in
2010-01-31 22:31:22 +00:00
--config|-C )
CONFIG="$2"
is_var_empty "$CONFIG"
2010-01-31 22:31:22 +00:00
if [ "$MODE" == "config" ]
2010-01-31 22:31:22 +00:00
then
if [ -e "$CONFIG" ]
2010-01-31 22:31:22 +00:00
then
echo "Do want to overwrite existing config file? [y/n]"
read yn
if [ "$yn" == "y" ] || [ "$yn" == "yes" ]
then
rm "$CONFIG"
else
echo "Aborting..."
cleanup
exit 1
fi
fi
2010-01-31 22:31:22 +00:00
fi
if [ ! "$MODE" == "config" ]
then
source $CONFIG
fi
2010-01-31 22:31:22 +00:00
if [ ! -z "$SSH_KEY" ]
then
SSH_KEY="-i $SSH_KEY"
fi
2010-01-31 22:31:22 +00:00
if [ ! -e "./known_hosts" ]
2010-01-31 22:31:22 +00:00
then
if [ -e $SSH_KNOWN_HOSTS ]
2010-01-31 22:31:22 +00:00
then
if [ ! "$SSH_KNOWN_HOSTS" == "known_hosts" ]
then
cat $SSH_KNOWN_HOSTS > ./known_hosts
fi
else
echo "File $SSH_KNOWN_HOSTS does not exist."
exit 1
2010-01-31 22:31:22 +00:00
fi
fi
shift 2 ;;
--working-dir|-w )
PPSS_DIR="$2"
add_var_to_config PPSS_DIR "$PPSS_DIR"
shift 2 ;;
--node|-n )
NODES_FILE="$2"
add_var_to_config NODES_FILE "$NODES_FILE"
shift 2 ;;
--sourcefile|-f )
INPUT_FILE="$2"
is_var_empty "$INPUT_FILE"
add_var_to_config INPUT_FILE "$INPUT_FILE"
shift 2 ;;
--sourcedir|-d )
SRC_DIR="$2"
is_var_empty "$SRC_DIR"
add_var_to_config SRC_DIR "$SRC_DIR"
shift 2 ;;
--delay|-D)
MAX_DELAY="$2"
add_var_to_config MAX_DELAY "$MAX_DELAY"
shift 2 ;;
2010-07-18 09:42:29 +00:00
--disable-inotify)
INOTIFY=0
add_var_to_config INOTIFY "$INOTIFY"
shift 1 ;;
2010-07-21 06:24:25 +00:00
--enable-input-lock)
ENABLE_INPUT_LOCK=1
add_var_to_config ENABLE_INPUT_LOCK "$ENABLE_INPUT_LOCK"
shift 1 ;;
--daemon)
DAEMON="1"
QUIET="1"
detect_inotify
add_var_to_config DAEMON "$DAEMON"
add_var_to_config QUIET "$QUIET"
add_var_to_config INOTIFY "$INOTIFY"
shift 1 ;;
--interval)
is_var_empty "$2"
DAEMON_POLLING_INTERVAL="$2"
add_var_to_config DAEMON_POLLING_INTERVAL "$DAEMON_POLLING_INTERVAL"
shift 2 ;;
2010-07-18 08:14:47 +00:00
--file-age)
is_var_empty "$2"
add_var_to_config DAEMON_FILE_AGE "$DAEMON_FILE_AGE"
shift 2 ;;
2010-07-21 06:24:25 +00:00
--email|-e)
is_var_empty "$2"
EMAIL="$2"
add_var_to_config EMAIL "$EMAIL"
shift 2 ;;
--awskeypair|-P)
AWS_KEYPAIR="$2"
add_var_to_config AWS_KEYPAIR "$AWS_KEYPAIR"
shift 2 ;;
--AMI|-A)
AMI_ID="$2"
add_var_to_config AMI_ID "$AMI_ID"
shift 2 ;;
--type|-T)
INSTANCE_TYPE="$2"
add_var_to_config INSTANCE_TYPE "$INSTANCE_TYPE"
shift 2 ;;
--security|-G)
SECURITY_GROUP="$2"
add_var_to_config SECURITY_GROUP "$SECURITY_GROUP"
shift 2 ;;
--instances|-I)
NUM_NODES="$2"
add_var_to_config NUM_NODES "$NUM_NODES"
shift 2 ;;
--command|-c )
COMMAND="$2"
is_var_empty "$COMMAND"
if [ "$MODE" == "config" ]
then
COMMAND=\'$COMMAND\'
add_var_to_config COMMAND "$COMMAND"
fi
shift 2 ;;
-h )
showusage_normal
exit 1 ;;
--help)
showusage_long
exit 1 ;;
--homedir|-H )
is_var_empty "$2"
PPSS_HOME_DIR="$2"
add_var_to_config PPSS_DIR $PPSS_HOME_DIR
shift 2 ;;
--disable-ht|-j )
HYPERTHREADING=no
add_var_to_config HYPERTHREADING $HYPERTHREADING
shift 1 ;;
--log|-l )
LOGFILE="$2"
add_var_to_config LOGFILE "$LOGFILE"
shift 2 ;;
--no-recursion|-r )
RECURSION="0"
add_var_to_config LOGFILE "$RECURSION"
shift 1 ;;
--workingdir|-w )
WORKINGDIR="$2"
add_var_to_config WORKINGDIR "$WORKINGDIR"
shift 2 ;;
--key|-k )
SSH_KEY="$2"
is_var_empty "$SSH_KEY"
add_var_to_config SSH_KEY "$SSH_KEY"
if [ ! -z "$SSH_KEY" ]
then
SSH_KEY="-i $SSH_KEY"
fi
shift 2 ;;
--known-hosts | -K )
SSH_KNOWN_HOSTS="$2"
add_var_to_config SSH_KNOWN_HOSTS "$SSH_KNOWN_HOSTS"
shift 2 ;;
--no-scp |-b )
SECURE_COPY=0
add_var_to_config SECURE_COPY "$SECURE_COPY"
shift 1 ;;
--outputdir|-o )
REMOTE_OUTPUT_DIR="$2"
add_var_to_config REMOTE_OUTPUT_DIR "$REMOTE_OUTPUT_DIR"
shift 2 ;;
--processes|-p )
is_var_empty "$2"
MAX_NO_OF_RUNNING_JOBS="$2"
add_var_to_config MAX_NO_OF_RUNNING_JOBS "$MAX_NO_OF_RUNNING_JOBS"
shift 2 ;;
--master|-m )
SSH_SERVER="$2"
add_var_to_config SSH_SERVER "$SSH_SERVER"
shift 2 ;;
--script|-S )
SCRIPT="$2"
add_var_to_config SCRIPT "$SCRIPT"
shift 2 ;;
--download)
DOWNLOAD_TO_NODE="1"
add_var_to_config DOWNLOAD_TO_NODE "$DOWNLOAD_TO_NODE"
shift 1 ;;
--upload)
if [ -z "$REMOTE_OUTPUT_DIR" ]
then
echo "ERROR: no server-side output directory specified with -o"
exit 1
fi
UPLOAD_TO_SERVER="1"
add_var_to_config UPLOAD_TO_SERVER "$UPLOAD_TO_SERVER"
shift 1 ;;
--quiet|-q )
QUIET="1"
add_var_to_config QUIET "$QUIET"
shift 1 ;;
--user|-u )
USER="$2"
add_var_to_config USER "$USER"
shift 2 ;;
--version|-v )
echo ""
echo "$SCRIPT_NAME version $SCRIPT_VERSION"
echo ""
exit 0 ;;
* )
showusage_short
echo
echo "Unknown option $1 "
echo
exit 1 ;;
esac
done
2010-01-31 22:31:22 +00:00
if [ -z "$SRC_DIR" ] && [ -z "$INPUT_FILE" ]
then
showusage_short
echo
echo "No source file or directory specified with -f or -d."
exit 1
fi
if [ ! -e "$SRC_DIR" ] && [ -z "$MODE" ] && [ -z "$INPUT_FILE" ]
2010-06-08 23:10:35 +00:00
then
showusage_short
echo
echo "Source directory $SRC_DIR does not exist."
2010-06-08 23:10:35 +00:00
exit 1
fi
2011-08-11 20:50:33 +00:00
if [ "$SRC_DIR" == "." ]
then
echo
echo "PPSS is not designed to process items from within the directory"
echo "it is being run. PPSS will start to process its own files from"
echo "its working directory $PPSS_DIR which is probably not wat you"
echo "want. Are you sure you want to continue?"
echo
read YN
if [ ! "$YN" == "y" ] || [ ! "$YN" == "Y" ]
then
exit 1
fi
fi
2010-06-08 23:10:35 +00:00
if [ "$DAEMON" == "1" ] && [ -z "$SRC_DIR" ]
then
showusage_short
echo
2010-07-12 21:37:49 +00:00
echo "Daemon monitors a specified directory (with the -d option) for files to process."
echo "Read the on-line manual for more information."
exit 1
fi
}
2010-01-31 22:31:22 +00:00
display_header () {
2010-02-09 22:01:05 +00:00
log DSPLY ""
log DSPLY "========================================================="
log DSPLY " |P|P|S|S| "
log DSPLY "$SCRIPT_NAME vers. $SCRIPT_VERSION"
log DSPLY "========================================================="
log DSPLY "Hostname:\t\t$HOSTNAME"
log DSPLY "---------------------------------------------------------"
2010-01-31 22:31:22 +00:00
}
create_working_directory () {
if [ ! -e "$PPSS_DIR" ]
then
mkdir -p "$PPSS_DIR"
fi
}
expand_str () {
STR=$1
LENGTH=$TYPE_LENGTH
SPACE=" "
while [ "${#STR}" -lt "$LENGTH" ]
do
STR=$STR$SPACE
done
echo "$STR"
}
are_we_sourced () {
2010-06-13 22:09:21 +00:00
RES=`basename $SOURCED`
if [ "$RES" = "ppss" ]
then
return 1
2010-06-13 22:09:21 +00:00
else
return 0
fi
}
get_time_in_seconds () {
if [ "$ARCH" == "SunOS" ]
then
#
# Dirty hack because this ancient operating system does not support +%s...
#
THE_TIME=`truss /usr/bin/date 2>&1 | grep ^time | awk '{ print $3 }'`
else
THE_TIME="$(date +%s)"
fi
echo "$THE_TIME"
}
set_md5 () {
case $ARCH in
"Darwin") MD5=md5 ;;
"FreeBSD") MD5=md5 ;;
"SunOS") MD5="digest -a md5" ;;
"Linux") MD5=md5sum ;;
esac
echo "test" | $MD5 > /dev/null 2>&1
if [ ! "$?" ]
then
LOG ERROR "ERROR - PPSS requires $MD5. It may not be within the path or installed."
return 1
else
return 0
fi
}
2010-07-18 08:14:47 +00:00
set_stat () {
if [ "$DAEMON" = "1" ] && [ "$INOTIFY" = "0" ]
then
case $ARCH in
"Darwin") STAT="stat -f%m" ;;
"FreeBSD") STAT="stat -f%m" ;;
"SunOS") STAT="gstat -c%Y" ;;
"Linux") STAT="stat -c%Y" ;;
esac
$STAT . >> /dev/null 2>&1
if [ ! "$?" ]
then
LOG ERROR "ERROR - PPSS daemon mode requires stat. It may not be within the path or installed."
return 1
else
return 0
fi
else
return 0
fi
}
log () {
#
# Type 'DSPLY ERROR and WARN' is logged to the screen
# Any other log-type is only logged to the logfile.
#
TYPE="$1"
MESG="$2"
TYPE_LENGTH=5
#
# Performance hack. Don't go through all the code if not required.
#
if [ "$TYPE" = "DEBUG" ] && [ "$PPSS_DEBUG" == "0" ]
then
return
fi
TYPE_EXP=`expand_str "$TYPE"`
DATE=`date +%b\ %d\ %H:%M:%S`
PREFIX="$DATE: ${TYPE_EXP:0:$TYPE_LENGTH}"
PREFIX_SMALL="$DATE: "
if [ ! "$TYPE" = "ERROR" ]
then
ECHO_MSG="$PREFIX_SMALL $MESG"
else
ECHO_MSG="$PREFIX_SMALL [ERROR] $MESG"
fi
LOG_MSG="$PREFIX $MESG"
2010-05-13 23:14:27 +00:00
if [ ! -z "$PPSS_DEBUG" ] && [ ! "$PPSS_DEBUG" == "0" ]
then
echo -e "$LOG_MSG" >> "$LOGFILE"
elif [ "$TYPE" == "INFO" ] || [ "$TYPE" == "ERROR" ] || [ "$TYPE" == "WARN" ] || [ "$TYPE" == "DSPLY" ]
then
echo -e "$LOG_MSG" >> "$LOGFILE"
fi
2010-04-10 22:35:22 +00:00
if [ "$TYPE" == "DSPLY" ] || [ "$TYPE" == "ERROR" ] || [ "$TYPE" == "WARN" ] && [ "$QUIET" == "0" ]
then
echo -e "$ECHO_MSG"
2010-04-10 22:35:22 +00:00
elif [ "$TYPE" == "ERROR" ] && [ "$QUIET" == "1" ]
then
echo -e "$ECHO_MSG"
fi
2010-04-10 22:35:22 +00:00
if [ "$TYPE" == "PRCNT" ]
then
echo -en "\r$ECHO_MSG"
fi
}
2010-01-31 22:31:22 +00:00
init_vars () {
#
# Get start time to measure how long PPSS has been running.
#
START_PPSS=`get_time_in_seconds`
#
# Check if MD5(SUM) is present on the system.
#
set_md5
2010-07-18 08:14:47 +00:00
#
# Chec if stat is present and works on the system if daemon mode is enabled.
#
set_stat
#
# Is PPSS run as a daemon? Then use input locking, which is not required otherwise.
#
if [ "$DAEMON" == "1" ]
then
INPUT_LOCK="$SRC_DIR/INPUT_LOCK"
fi
#
# For some strange reason, this value differ on different operating systems due to
# different behaviour betwen the ps utilily acros operating systems.
#
2010-01-31 22:31:22 +00:00
if [ "$ARCH" == "Darwin" ]
then
MIN_JOBS=4
elif [ "$ARCH" == "Linux" ]
then
MIN_JOBS=3
fi
2010-08-05 12:16:59 +00:00
FIFO="$PPSS_DIR"/ppss-fifo-$RANDOM-$RANDOM
2010-01-31 22:31:22 +00:00
if [ ! -e "$FIFO" ]
then
mkfifo -m 600 $FIFO
fi
exec 42<> $FIFO
set_status "RUNNING"
if [ -e "$CPUINFO" ]
then
CPU=`cat $CPUINFO | grep 'model name' | cut -d ":" -f 2 | sed -e s/^\ //g | sort | uniq`
2010-02-09 22:01:05 +00:00
log DSPLY "CPU: $CPU"
2010-01-31 22:31:22 +00:00
elif [ "$ARCH" == "Darwin" ]
then
MODEL=`system_profiler SPHardwareDataType | grep "Processor Name" | cut -d ":" -f 2`
SPEED=`system_profiler SPHardwareDataType | grep "Processor Speed" | cut -d ":" -f 2`
2010-02-09 22:01:05 +00:00
log DSPLY "CPU: $MODEL $SPEED"
2010-01-31 22:31:22 +00:00
elif [ "$ARCH" == "SunOS" ]
then
CPU=`psrinfo -v | grep MHz | cut -d " " -f 4,8 | awk '{ printf ("Processor architecture: %s @ %s MHz.\n", $1,$2) }' | head -n 1`
2010-02-09 22:01:05 +00:00
log DSPLY "$CPU"
2010-01-31 22:31:22 +00:00
else
2010-02-09 22:01:05 +00:00
log DSPLY "CPU: Cannot determine. Provide a patch for your arch!"
log DSPLY "Arch is $ARCH"
2010-01-31 22:31:22 +00:00
fi
if [ -z "$MAX_NO_OF_RUNNING_JOBS" ]
then
get_no_of_cpus $HYPERTHREADING
fi
if [ ! -z "$SSH_SERVER" ]
2010-01-31 22:31:22 +00:00
then
does_file_exist "$PPSS_HOME_DIR/$JOB_LOG_DIR"
if [ ! "$?" = "0" ]
then
log DEBUG "Remote Job log directory $PPSS_HOME_DIR/$JOB_lOG_DIR does not exist. Creating."
exec_cmd "mkdir $PPSS_HOME_DIR/$JOB_LOG_DIR"
fi
fi
if [ ! -e "$JOB_LOG_DIR" ]
then
mkdir -p "$JOB_LOG_DIR"
2010-01-31 22:31:22 +00:00
fi
if [ ! -z "$SSH_SERVER" ]
then
ITEM_LOCK_DIR="$PPSS_HOME_DIR/$ITEM_LOCK_DIR"
fi
2010-01-31 22:31:22 +00:00
does_file_exist "$ITEM_LOCK_DIR"
if [ ! "$?" = "0" ]
2010-01-31 22:31:22 +00:00
then
if [ ! -z "$SSH_SERVER" ]
then
log DEBUG "Creating remote item lock dir."
else
log DEBUG "Creating local item lock dir."
fi
2010-01-31 22:31:22 +00:00
exec_cmd "mkdir $ITEM_LOCK_DIR"
if [ ! "$?" ]
then
log DEBUG "Failed to create item lock dir."
fi
2010-01-31 22:31:22 +00:00
fi
if [ ! -z "$SSH_SERVER" ]
2010-01-31 22:31:22 +00:00
then
does_file_exist "$REMOTE_OUTPUT_DIR"
if [ ! "$?" = "0" ]
then
log DEBUG "Remote output dir $REMOTE_OUTPUT_DIR does not exist."
exec_cmd "mkdir $REMOTE_OUTPUT_DIR"
fi
2010-01-31 22:31:22 +00:00
fi
if [ ! -e "$PPSS_LOCAL_TMPDIR" ]
then
mkdir "$PPSS_LOCAL_TMPDIR"
fi
if [ ! -e "$PPSS_LOCAL_OUTPUT" ]
then
mkdir "$PPSS_LOCAL_OUTPUT"
fi
2010-06-27 08:59:22 +00:00
2011-08-06 18:08:41 +00:00
if [ ! -e "$PPSS_NODE_STATUS" ]
then
mkdir -p "$PPSS_NODE_STATUS"
fi
}
upload_status () {
scp -q $SSH_OPTS $SSH_KEY $NODE_STATUS_FILE $USER@$SSH_SERVER:$PPSS_HOME_DIR/$PPSS_NODE_STATUS/
if [ "$?" == "0" ]
then
log DEBUG "Uploaded status to server ok."
else
log DEBUG "Uploaded status to server failed."
fi
2010-01-31 22:31:22 +00:00
}
set_status () {
2011-08-06 18:08:41 +00:00
if [ ! -z "$SSH_SERVER" ]
then
2011-08-11 20:50:33 +00:00
STATUS="$1"
NO_PROCESSED=$(wc -l "$LIST_OF_PROCESSED_ITEMS" | awk '{ print $1 }' )
NODE=`cat $PPSS_DIR/$NODE_ID`
echo "$NODE $HOSTNAME $STATUS $NO_PROCESSED" > "$NODE_STATUS_FILE"
2011-08-06 18:08:41 +00:00
upload_status
fi
2010-01-31 22:31:22 +00:00
}
check_status () {
ERROR="$1"
FUNCTION="$2"
MESSAGE="$3"
if [ ! "$ERROR" == "0" ]
then
2010-03-06 23:26:07 +00:00
log DSPLY "$FUNCTION - $MESSAGE"
set_status ERROR
2010-01-31 22:31:22 +00:00
cleanup
exit 1
fi
}
erase_ppss () {
echo "Are you realy sure you want to erase PPSS from all nodes!? (YES/NO)"
read YN
if [ "$YN" == "yes" ] || [ "$YN" == "YES" ]
then
for NODE in `cat $NODES_FILE`
do
2010-06-13 14:39:04 +00:00
log DSPLY "Erasing PPSS homedir $PPSS_HOME_DIR from node $NODE."
2010-01-31 22:31:22 +00:00
ssh -q $SSH_KEY $SSH_OPTS $USER@$NODE "rm -rf $PPSS_HOME_DIR"
done
else
2010-02-09 22:01:05 +00:00
log DSPLY "Aborting.."
2010-01-31 22:31:22 +00:00
fi
}
stack_push_tmp () {
TMP1="$1"
if [ -z "$TMP_STACK" ]
then
TMP_STACK="$TMP1"
else
TMP_STACK="$TMP_STACK"$'\n'"$TMP1"
fi
}
stack_push () {
line="$1"
if [ -z "$STACK" ]
then
STACK="$line"
else
STACK="$line"$'\n'"$STACK"
fi
}
unprocessed_stack_push () {
line="$1"
if [ -z "$PROCESSED_ITEMS" ]
then
UNPROCESSED_ITEMS="$line"
else
UNPROCESSED_ITEMS="$line"$'\n'"$UNPROCESSED_ITEMS"
fi
}
processed_stack_push () {
line="$1"
if [ -z "$PROCESSED_ITEMS" ]
then
PROCESSED_ITEMS="$line"
else
PROCESSED_ITEMS="$line"$'\n'"$PROCESSED_ITEMS"
fi
}
stack_pop () {
TMP_STACK=""
i=0
tmp=""
for x in $STACK
do
if [ "$i" = "0" ]
then
tmp="$x"
else
stack_push_tmp "$x"
fi
((i++))
done
STACK="$TMP_STACK"
REGISTER="$tmp"
2010-07-18 08:14:47 +00:00
if [ -z "$REGISTER" ]
then
return 1
else
return 0
fi
}
2011-08-06 18:08:41 +00:00
is_screen_installed () {
2011-08-06 18:08:41 +00:00
if [ "$DISABLE_SCREEN_TEST" == "1" ]
then
return 0
fi
2011-08-06 18:08:41 +00:00
NODE="$1"
ssh -q $SSH_OPTS_NODE $SSH_KEY $USER@$NODE "screen -m -D -S test ls" > /dev/null 2>&1
if [ ! "$?" == "0" ]
then
2011-08-11 20:50:33 +00:00
log ERROR "The 'Screen' command may not be installed on node $NODE."
log ERROR "Or some other SSH related error occurred."
2011-08-06 18:08:41 +00:00
return 1
else
log DEBUG "'Screen' is installed on node $NODE."
fi
}
2010-01-31 22:31:22 +00:00
deploy () {
NODE="$1"
2011-08-06 18:08:41 +00:00
SSH_SOCKET="ppss_ssh_socket-$NODE"
SSH_OPTS_NODE="-o BatchMode=yes -o ControlPath=$SSH_SOCKET \
2010-01-31 22:31:22 +00:00
-o GlobalKnownHostsFile=./known_hosts \
-o ControlMaster=auto \
-o Cipher=blowfish \
-o ConnectTimeout=5 "
2011-08-06 18:08:41 +00:00
2010-01-31 22:31:22 +00:00
ERROR=0
set_error () {
if [ "$ERROR" == "1" ]
then
2010-01-31 22:31:22 +00:00
ERROR=1
elif [ ! "$1" == "0" ]
then
ERROR=1
2010-01-31 22:31:22 +00:00
fi
}
ssh -N -M $SSH_OPTS_NODE $SSH_KEY $USER@$NODE &
SSH_PID=$!
2011-08-06 18:08:41 +00:00
is_screen_installed "$NODE"
2010-01-31 22:31:22 +00:00
KEY=`echo $SSH_KEY | cut -d " " -f 2`
2011-08-06 18:08:41 +00:00
ssh -q $SSH_OPTS_NODE $SSH_KEY $USER@$NODE "cd ~ && mkdir -p $PPSS_HOME_DIR && mkdir -p $PPSS_HOME_DIR/$JOB_LOG_DIR && mkdir -p $PPSS_HOME_DIR/ITEM_LOCK_DIR >> /dev/null 2>&1"
set_error $?
2011-08-06 18:08:41 +00:00
ssh -q $SSH_OPTS_NODE $SSH_KEY $USER@$NODE "cd ~ && cd $PPSS_HOME_DIR && cd $PPSS_DIR && echo $NODE > $NODE_ID"
set_error $?
2010-01-31 22:31:22 +00:00
scp -q $SSH_OPTS_NODE $SSH_KEY $0 $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q $SSH_OPTS_NODE $SSH_KEY $KEY $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q $SSH_OPTS_NODE $SSH_KEY $CONFIG $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
scp -q $SSH_OPTS_NODE $SSH_KEY known_hosts $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
2011-08-06 18:08:41 +00:00
2010-01-31 22:31:22 +00:00
if [ ! -z "$SCRIPT" ]
then
scp -q $SSH_OPTS_NODE $SSH_KEY $SCRIPT $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
fi
if [ ! -z "$INPUT_FILE" ]
then
scp -q $SSH_OPTS_NODE $SSH_KEY $INPUT_FILE $USER@$NODE:~/$PPSS_HOME_DIR
set_error $?
fi
if [ "$ERROR" == "0" ]
then
2010-02-09 22:01:05 +00:00
log DSPLY "PPSS installed on node $NODE."
2010-01-31 22:31:22 +00:00
else
2010-02-09 22:01:05 +00:00
log DSPLY "PPSS failed to install on $NODE."
2010-01-31 22:31:22 +00:00
fi
kill $SSH_PID
}
deploy_ppss () {
2011-08-06 18:08:41 +00:00
if [ -z "$NODES_FILE" ] || [ ! -e "$NODES_FILE" ]
2010-01-31 22:31:22 +00:00
then
2011-08-06 18:08:41 +00:00
log ERROR "No file containing list of nodes missing / not specified."
2010-01-31 22:31:22 +00:00
set_status ERROR
cleanup
exit 1
fi
2011-08-06 18:08:41 +00:00
exec_cmd "mkdir $PPSS_HOME_DIR/$PPSS_NODE_STATUS"
2010-01-31 22:31:22 +00:00
KEY=`echo $SSH_KEY | cut -d " " -f 2`
if [ -z "$KEY" ] || [ ! -e "$KEY" ]
then
2011-08-06 18:08:41 +00:00
log ERROR "Private SSH key $KEY not found."
2010-01-31 22:31:22 +00:00
cleanup
set_status "ERROR"
exit 1
fi
if [ ! -e "$SCRIPT" ] && [ ! -z "$SCRIPT" ]
then
log ERROR "Script $SCRIPT not found."
set_status "ERROR"
cleanup
exit 1
fi
INSTALLED_ON_SSH_SERVER=0
2011-08-06 18:08:41 +00:00
for NODE in `cat $NODES_FILE`
do
deploy "$NODE" &
if [ "$ARCH" == "SunOS" ]
2010-01-31 22:31:22 +00:00
then
2011-08-06 18:08:41 +00:00
sleep 1
else
sleep 0.1
fi
if [ "$NODE" == "$SSH_SERVER" ]
then
INSTALLED_ON_SSH_SERVER=1
2010-01-31 22:31:22 +00:00
fi
2011-08-06 18:08:41 +00:00
done
if [ "$INSTALLED_ON_SSH_SERVER" == "0" ]
then
log DEBUG "SSH SERVER $SSH_SERVER is not a node."
else
log DEBUG "SSH SERVER $SSH_SERVER is also a node."
2010-01-31 22:31:22 +00:00
fi
}
start_ppss_on_node () {
NODE="$1"
2010-02-09 22:01:05 +00:00
log DSPLY "Starting PPSS on node $NODE."
2011-08-06 18:08:41 +00:00
ssh $SSH_KEY $USER@$NODE -o ConnectTimeout=5 -o GlobalKnownHostsFile=./known_hosts "cd $PPSS_HOME_DIR ; screen -d -m -S PPSS ~/$PPSS_HOME_DIR/$0 node --config ~/$PPSS_HOME_DIR/$CONFIG"
2010-01-31 22:31:22 +00:00
}
test_server () {
# Testing if the remote server works as expected.
if [ ! -z "$SSH_SERVER" ]
then
exec_cmd "date >> /dev/null"
check_status "$?" "$FUNCNAME" "Server $SSH_SERVER could not be reached"
2010-01-31 22:31:22 +00:00
ssh -N -M $SSH_OPTS $SSH_KEY $USER@$SSH_SERVER &
SSH_MASTER_PID="$!"
log DEBUG "SSH Master pid is $SSH_MASTER_PID"
log DSPLY "Connected to server: $SSH_SERVER"
2010-06-13 14:39:04 +00:00
does_file_exist "$PPSS_HOME_DIR/$PPSS_DIR"
if [ ! "$?" = "0" ] && [ ! -z "$SSH_SERVER" ]
then
log DEBUG "Remote PPSS home directory $PPSS_HOME_DIR/$PPSS_DIR does not exist. Creating."
exec_cmd "mkdir -p $PPSS_HOME_DIR/$PPSS_DIR"
fi
2010-01-31 22:31:22 +00:00
else
log DEBUG "No remote server specified, assuming stand-alone mode."
fi
2010-06-13 14:39:04 +00:00
2010-01-31 22:31:22 +00:00
}
get_no_of_cpus () {
# Use hyperthreading or not?
HPT=$1
NUMBER=""
if [ -z "$HPT" ]
then
HPT=yes
fi
got_cpu_info () {
ERROR="$1"
check_status "$ERROR" "$FUNCNAME" "cannot determine number of cpu cores. Specify with -p."
}
if [ "$HPT" == "yes" ]
then
if [ "$ARCH" == "Linux" ]
then
NUMBER=`grep -c ^processor $CPUINFO`
2010-01-31 22:31:22 +00:00
got_cpu_info "$?"
elif [ "$ARCH" == "Darwin" ]
then
NUMBER=`sysctl -a hw | grep -w logicalcpu | awk '{ print $2 }'`
got_cpu_info "$?"
elif [ "$ARCH" == "FreeBSD" ]
then
NUMBER=`sysctl hw.ncpu | awk '{ print $2 }'`
got_cpu_info "$?"
elif [ "$ARCH" == "SunOS" ]
then
NUMBER=`psrinfo | grep -c on-line`
2010-01-31 22:31:22 +00:00
got_cpu_info "$?"
else
if [ -e "$CPUINFO" ]
then
NUMBER=`grep -c ^processor $CPUINFO`
2010-01-31 22:31:22 +00:00
got_cpu_info "$?"
fi
fi
if [ ! -z "$NUMBER" ]
then
2010-02-09 22:01:05 +00:00
log DSPLY "Found $NUMBER logic processors."
2010-01-31 22:31:22 +00:00
fi
elif [ "$HPT" == "no" ]
then
2010-02-09 22:01:05 +00:00
log DSPLY "Hyperthreading is disabled."
2010-01-31 22:31:22 +00:00
if [ "$ARCH" == "Linux" ]
then
PHYSICAL=`grep 'physical id' $CPUINFO`
if [ "$?" ]
2010-01-31 22:31:22 +00:00
then
PHYSICAL=`grep 'physical id' $CPUINFO | sort | uniq | wc -l`
if [ "$PHYSICAL" == "1" ]
then
2010-02-09 22:01:05 +00:00
log DSPLY "Found $PHYSICAL physical CPU."
2010-01-31 22:31:22 +00:00
else
2010-02-09 22:01:05 +00:00
log DSPLY "Found $PHYSICAL physical CPUs."
2010-01-31 22:31:22 +00:00
fi
TMP=`grep 'core id' $CPUINFO`
if [ "$?" ]
2010-01-31 22:31:22 +00:00
then
log DEBUG "Starting job only for each physical core on all physical CPU(s)."
NUMBER=`grep 'core id' $CPUINFO | sort | uniq | wc -l`
2010-02-09 22:01:05 +00:00
log DSPLY "Found $NUMBER physical cores."
2010-01-31 22:31:22 +00:00
else
2010-02-09 22:01:05 +00:00
log DSPLY "Single core processor(s) detected."
log DSPLY "Starting job for each physical CPU."
2010-01-31 22:31:22 +00:00
NUMBER=$PHYSICAL
fi
else
log INFO "No 'physical id' section found in $CPUINFO, typical for older cpus."
NUMBER=`grep -c ^processor $CPUINFO`
2010-01-31 22:31:22 +00:00
got_cpu_info "$?"
fi
elif [ "$ARCH" == "Darwin" ]
then
NUMBER=`sysctl -a hw | grep -w physicalcpu | awk '{ print $2 }'`
got_cpu_info "$?"
elif [ "$ARCH" == "FreeBSD" ]
then
NUMBER=`sysctl hw.ncpu | awk '{ print $2 }'`
got_cpu_info "$?"
else
NUMBER=`cat $CPUINFO | grep "cpu cores" | cut -d ":" -f 2 | uniq | sed -e s/\ //g`
got_cpu_info "$?"
fi
fi
if [ ! -z "$NUMBER" ]
then
MAX_NO_OF_RUNNING_JOBS=$NUMBER
else
log ERROR "Number of CPUs not obtained."
log ERROR "Please specify manually with -p."
set_status "ERROR"
exit 1
fi
}
random_delay () {
ARGS="$1"
if [ -z "$ARGS" ]
then
log ERROR "$FUNCNAME Function random delay, no argument specified."
2010-08-05 12:16:59 +00:00
set_status "ERROR"
2010-01-31 22:31:22 +00:00
exit 1
fi
NUMBER=$RANDOM
2010-07-21 06:24:25 +00:00
2010-01-31 22:31:22 +00:00
let "NUMBER %= $ARGS"
2010-07-21 06:24:25 +00:00
sleep "$NUMBER"
2010-01-31 22:31:22 +00:00
}
escape_item () {
TMP="$1"
ITEM_ESCAPED=`echo "$TMP" | \
sed s/\\ /\\\\\\\\\\\\\\ /g | \
sed s/\\'/\\\\\\\\\\\\\\'/g | \
sed s/\\|/\\\\\\\\\\\\\\|/g | \
sed s/\&/\\\\\\\\\\\\\\&/g | \
sed s/\;/\\\\\\\\\\\\\\;/g | \
sed s/\>/\\\\\\\\\\>/g | \
sed s/\</\\\\\\\\\\</g | \
2010-01-31 22:31:22 +00:00
sed s/\(/\\\\\\\\\\(/g | \
sed s/\)/\\\\\\\\\\)/g `
}
download_item () {
2010-06-08 23:10:35 +00:00
if [ ! "$DOWNLOAD_TO_NODE" == "1" ] || [ "$VIRTUAL" == "1" ]
then
return 1
fi
2010-01-31 22:31:22 +00:00
ITEM="$1"
VIRTUAL="0"
ERR_STATE="0"
if [ "$RECURSION" = "1" ]
2010-01-31 22:31:22 +00:00
then
escape_item "$ITEM"
does_file_exist "$ITEM_ESCAPED"
ERR_STATE="$?"
DOWNLOAD_ITEM="$ITEM"
LOCAL_DIR=`dirname "$DOWNLOAD_ITEM"`
2010-01-31 22:31:22 +00:00
else
escape_item "$ITEM"
does_file_exist "$SRC_DIR/$ITEM_ESCAPED"
ERR_STATE="$?"
DOWNLOAD_ITEM="$SRC_DIR/$ITEM"
2010-01-31 22:31:22 +00:00
fi
if [ "$ERR_STATE" == "0" ]
2010-01-31 22:31:22 +00:00
then
log DEBUG "$FUNCNAME Remote item $ITEM exists"
VIRTUAL=0
else
log DEBUG "$FUNCNAME Remote item $ITEM does NOT exist"
VIRTUAL=1
fi
if [ "$DOWNLOAD_TO_NODE" == "1" ] && [ "$VIRTUAL" == "0" ]
then
log DEBUG "Transfering item $ITEM from source to local disk."
2010-01-31 22:31:22 +00:00
if [ "$SECURE_COPY" == "1" ] && [ ! -z "$SSH_SERVER" ]
then
if [ "$RECURSION" == "1" ]
then
escape_item "$DOWNLOAD_ITEM"
mkdir -p "$PPSS_LOCAL_TMPDIR/$LOCAL_DIR"
log DEBUG "$SSH_SERVER:$ITEM_ESCAPED $PPSS_LOCAL_TMPDIR/$LOCAL_DIR"
2010-06-13 22:17:24 +00:00
scp -q $SSH_OPTS $SSH_KEY $USER@$SSH_SERVER:"$ITEM_ESCAPED" ./$PPSS_LOCAL_TMPDIR/"$LOCAL_DIR"
log DEBUG "Exit code of remote transfer is $?"
else
escape_item "$DOWNLOAD_ITEM"
log DEBUG "$SSH_SERVER:$ITEM_ESCAPED $PPSS_LOCAL_TMPDIR"
scp -q $SSH_OPTS $SSH_KEY $USER@$SSH_SERVER:"$ITEM_ESCAPED" $PPSS_LOCAL_TMPDIR
log DEBUG "Exit code of remote transfer is $?"
fi
2010-01-31 22:31:22 +00:00
else
cp "$ITEM" $PPSS_LOCAL_TMPDIR
2010-01-31 22:31:22 +00:00
log DEBUG "Exit code of local transfer is $?"
fi
else
log DEBUG "No transfer of item $ITEM to local workpath."
2010-01-31 22:31:22 +00:00
fi
}
upload_item () {
if [ ! "$UPLOAD_TO_SERVER" == "1" ]
then
log DEBUG "Upload to server is disabled."
return 1
fi
OUTPUT_ITEM="$1"
2010-01-31 22:31:22 +00:00
ITEMDIR="$2"
log DEBUG "Uploading item $OUTPUT_ITEM."
2010-01-31 22:31:22 +00:00
if [ "$SECURE_COPY" == "1" ]
then
if [ "$RECURSION" = "1" ]
then
escape_item "$REMOTE_OUTPUT_DIR$ITEMDIR"
else
escape_item "$REMOTE_OUTPUT_DIR"
fi
2010-01-31 22:31:22 +00:00
DIR_ESCAPED="$ITEM_ESCAPED"
exec_cmd "mkdir -p $DIR_ESCAPED"
2010-03-07 19:48:00 +00:00
scp -q $SSH_OPTS $SSH_KEY "$OUTPUT_ITEM"/* $USER@$SSH_SERVER:"$DIR_ESCAPED"
2010-01-31 22:31:22 +00:00
ERROR="$?"
if [ ! "$ERROR" == "0" ]
then
2010-03-07 19:48:00 +00:00
log ERROR "Uploading of $OUTPUT_ITEM via SCP failed."
2010-01-31 22:31:22 +00:00
else
log DEBUG "Upload of item $OUTPUT_ITEM success"
rm -rf ./"$OUTPUT_ITEM"
2010-01-31 22:31:22 +00:00
fi
else
cp "$OUTPUT_ITEM" "$ITEMDIR"
2010-01-31 22:31:22 +00:00
ERROR="$?"
if [ ! "$ERROR" == "0" ]
then
log DEBUG "ERROR - uploading of $OUTPUT_ITEM vi CP failed."
2010-01-31 22:31:22 +00:00
fi
fi
}
lock_item () {
2010-07-18 09:37:19 +00:00
if [ "$INOTIFY" = "1" ] && [ "$DAEMON" = "1" ]
then
return 0
else
ITEM="$1"
LOCK_FILE_NAME_MD5=`echo "$ITEM" | $MD5 | awk '{ print $1 }'`
ITEM_LOCK_FILE="$ITEM_LOCK_DIR/$LOCK_FILE_NAME_MD5"
log DEBUG "Locking item $ITEM_LOCK_FILE"
exec_cmd "mkdir $ITEM_LOCK_FILE >> /dev/null 2>&1"
ERROR="$?"
return "$ERROR"
fi
2010-01-31 22:31:22 +00:00
}
2010-04-15 21:46:27 +00:00
get_input_lock () {
while true
do
exec_cmd "mkdir $INPUT_LOCK >> /dev/null 2>&1 "
if [ "$?" ]
2010-04-15 21:46:27 +00:00
then
log DEBUG "Input lock is obtained..."
break
else
log DEBUG "Input lock is present...sleeping.."
sleep 5
fi
done
}
release_input_lock () {
exec_cmd "rm -rf $INPUT_LOCK"
if [ "$?" ]
2010-04-15 21:46:27 +00:00
then
log DEBUG "Input lock was released..."
return 0
else
log ERROR "Input lock was already gone...this should never happen..."
return 1
fi
}
2010-07-14 21:06:11 +00:00
list_all_input_items () {
2010-07-17 20:26:28 +00:00
oldIFS=$IFS # save the field separator
IFS=$'\n' # new field separator, the end of line
while read line
2010-07-14 21:06:11 +00:00
do
2010-07-17 20:26:28 +00:00
echo "$line"
done < "$LISTOFITEMS"
IFS="$oldIFS"
2010-07-14 21:06:11 +00:00
}
remove_processed_items_from_input_file () {
#
# This function removes all items that have already been processed.
# Processed items have a lock dir in the PPPSS_ITEM_LOCK_DIR.
#
UNPROCESSED_ITEMS=""
2010-07-14 21:06:11 +00:00
if [ -e "$LIST_OF_PROCESSED_ITEMS" ]
then
PROCESSED_ITEMS=`cat $LIST_OF_PROCESSED_ITEMS`
fi
log DEBUG "Running $FUNCNAME"
2010-07-17 20:26:28 +00:00
if [ -z "$PROCESSED_ITEMS" ]
then
log DEBUG "Variable processed_items is empty."
2010-07-17 20:26:28 +00:00
return 1
fi
2010-07-14 21:51:10 +00:00
if [ "$MODE" = "status" ]
then
log DEBUG "Mode is status."
2010-07-14 21:51:10 +00:00
return 1
fi
if [ ! -e "$LISTOFITEMS" ]
then
echo "$LISTOFITEMS does not exist!"
return 1
else
SIZE=`wc -l "$LISTOFITEMS"`
if [ "$SIZE" = "0" ]
then
echo "$LISTOFITEMS exists but is empty."
return 1
fi
fi
2010-07-14 21:51:10 +00:00
2010-07-14 21:06:11 +00:00
INPUTFILES=`list_all_input_items`
2010-07-17 20:26:28 +00:00
oldIFS=$IFS # save the field separator
IFS=$'\n' # new field separator, the end of line
2010-07-14 21:06:11 +00:00
log DEBUG "Now removing processed items from input."
2010-07-14 21:06:11 +00:00
for x in $INPUTFILES
do
FILE_IS_PROCESSED=0
for y in $PROCESSED_ITEMS
2010-07-14 21:06:11 +00:00
do
if [ "$y" = "$x" ]
2010-07-14 21:06:11 +00:00
then
FILE_IS_PROCESSED=1
fi
done
if [ "$FILE_IS_PROCESSED" = "0" ]
then
log DEBUG "ITEM $x is not processed."
unprocessed_stack_push "$x"
2010-07-14 21:06:11 +00:00
else
log DEBUG "ITEM $x is already processed!."
2010-07-14 21:06:11 +00:00
fi
done
2010-07-17 20:26:28 +00:00
IFS="$oldIFS"
echo "$UNPROCESSED_ITEMS" > "$LISTOFITEMS"
2010-07-14 21:06:11 +00:00
}
2010-01-31 22:31:22 +00:00
get_all_items () {
2010-07-21 06:24:25 +00:00
if [ "$DAEMON" == "1" ] && [ "$INOTIFY" = "0" ] && [ "$ENABLE_INPUT_LOCK" = "1" ]
2010-04-15 21:46:27 +00:00
then
get_input_lock
fi
2010-08-05 12:16:59 +00:00
GLOBAL_COUNTER=1
2010-06-08 23:10:35 +00:00
2010-07-14 21:06:11 +00:00
if [ -e "$LISTOFITEMS" ]
2010-06-08 23:10:35 +00:00
then
rm "$LISTOFITEMS"
2010-06-08 23:10:35 +00:00
fi
2010-04-15 21:46:27 +00:00
2010-01-31 22:31:22 +00:00
count=0
if [ -z "$INPUT_FILE" ]
then
if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a node?"
2010-01-31 22:31:22 +00:00
then
2010-02-09 22:01:05 +00:00
if [ "$RECURSION" == "1" ]
then
`exec_cmd "find $SRC_DIR/ ! -type d" > "$LISTOFITEMS"`
2010-02-09 22:01:05 +00:00
check_status "$?" "$FUNCNAME" "Could not list files within remote source directory."
else
log DEBUG "Recursion is disabled."
`exec_cmd "find $SRC_DIR/ -depth 1 ! -type d" > "$LISTOFITEMS"`
2010-02-09 22:01:05 +00:00
check_status "$?" "$FUNCNAME" "Could not list files within remote source directory."
fi
2010-07-14 21:06:11 +00:00
2010-01-31 22:31:22 +00:00
else
if [ -e "$SRC_DIR" ]
then
2010-02-09 22:01:05 +00:00
if [ "$RECURSION" == "1" ]
then
log DEBUG "Recursion is enabled."
`find "$SRC_DIR"/ ! -type d >> "$LISTOFITEMS"`
2010-03-06 23:26:07 +00:00
check_status "$?" "$FUNCNAME" "Could not list files within local source directory."
2010-02-09 22:01:05 +00:00
else
log DEBUG "Recursion is disabled."
`find "$SRC_DIR"/ -depth 1 ! -type d >> "$LISTOFITEMS"`
2010-03-06 23:26:07 +00:00
check_status "$?" "$FUNCNAME" "Could not list files within local source directory."
2010-02-09 22:01:05 +00:00
fi
if [ ! -e "$LISTOFITEMS" ]
then
log ERROR "Local input file is not created, something is wrong. Bug?"
set_status "ERROR"
cleanup
exit 1
fi
2010-01-31 22:31:22 +00:00
else
ITEMS=""
fi
fi
else # Using an input file as the source of our items or STDIN.
2010-01-31 22:31:22 +00:00
if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a slave?"
then
log DEBUG "Running as node, input file has been pushed (hopefully)."
2010-01-31 22:31:22 +00:00
fi
2010-04-03 16:55:36 +00:00
if [ ! -e "$INPUT_FILE" ] && [ ! "$INPUT_FILE" == "-" ]
2010-01-31 22:31:22 +00:00
then
log ERROR "Input file $INPUT_FILE does not exist."
set_status "ERROR"
cleanup
exit 1
fi
2010-04-03 16:55:36 +00:00
if [ ! "$INPUT_FILE" == "-" ]
then
cp "$INPUT_FILE" "$LISTOFITEMS"
check_status "$?" "$FUNCNAME" "Copy of input file failed!"
2010-04-03 16:55:36 +00:00
else
log DEBUG "Reading from stdin.."
while read LINE
do
echo "$LINE" >> "$LISTOFITEMS"
2010-04-03 16:55:36 +00:00
done
fi
if [ ! -e "$LISTOFITEMS" ]
then
log ERROR "Input is empty."
infanticide
terminate_listener
cleanup
exit 1
fi
2010-01-31 22:31:22 +00:00
fi
remove_processed_items_from_input_file
2010-04-15 21:46:27 +00:00
if [ "$DAEMON" == "1" ]
then
release_input_lock
fi
SIZE_OF_INPUT=$(wc -l "$LISTOFITEMS" | awk '{ print $1 }')
2010-06-27 08:59:22 +00:00
#if [ "$SIZE_OF_INPUT" -eq "1" ]
#then
# MAX_NO_OF_RUNNING_JOBS=1
#fi
if [ "$SIZE_OF_INPUT" -le "0" ] && [ "$DAEMON" = "0" ]
2010-01-31 22:31:22 +00:00
then
log ERROR "Source file/dir seems to be empty."
2010-08-05 12:16:59 +00:00
set_status "STOPPED"
2010-01-31 22:31:22 +00:00
cleanup
exit 1
fi
2010-01-31 22:31:22 +00:00
}
get_item () {
check_for_interrupt
if [ "$STOP" == "1" ]
then
2010-06-27 08:59:22 +00:00
log DEBUG "Found stop signal."
2010-01-31 22:31:22 +00:00
return 1
fi
#
2010-06-27 08:59:22 +00:00
# Return error if list size is empty.
#
if [ -z "$SIZE_OF_INPUT" ]
2010-06-27 08:59:22 +00:00
then
log DEBUG "Got no size of input..."
return 1
fi
2010-01-31 22:31:22 +00:00
#
2010-06-27 08:59:22 +00:00
# Return error if the list is empty.
#
2010-06-27 08:59:22 +00:00
if [ "$SIZE_OF_INPUT" -le "0" ]
then
return 1
2010-06-27 08:59:22 +00:00
fi
#
2010-01-31 22:31:22 +00:00
# Check if all items have been processed.
#
2010-06-27 08:59:22 +00:00
if [ "$GLOBAL_COUNTER" -gt "$SIZE_OF_INPUT" ]
then
log DEBUG "Counter $GLOBAL_COUNTER is greater than sizeof input $SIZE_OF_INPUT."
return 1
fi
2010-01-31 22:31:22 +00:00
ITEM="$(sed -n $GLOBAL_COUNTER\p $LISTOFITEMS)"
2010-01-31 22:31:22 +00:00
if [ -z "$ITEM" ]
then
log DEBUG "Item was emtpy..."
2010-08-05 12:16:59 +00:00
((GLOBAL_COUNTER++))
get_item
2010-01-31 22:31:22 +00:00
else
((GLOBAL_COUNTER++))
2010-06-27 08:59:22 +00:00
if [ ! -z "$SSH_SERVER" ] || [ "$LOCAL_LOCKING" = "1" ]
then
lock_item "$ITEM"
2010-06-13 21:39:25 +00:00
LOCK="$?"
2010-06-27 08:59:22 +00:00
if [ ! "$LOCK" = "0" ]
then
log DEBUG "Item $ITEM is locked."
get_item
else
log DEBUG "Got lock on $ITEM"
download_item "$ITEM"
return 0
2010-06-27 08:59:22 +00:00
fi
2010-01-31 22:31:22 +00:00
else
return 0
fi
fi
}
start_new_worker () {
2010-01-31 22:31:22 +00:00
#
# This function kicks the listener to start a worker process.
#
if ! are_we_sourced
2010-01-31 22:31:22 +00:00
then
echo "$START_KEY" >> "$FIFO"
return $?
2010-01-31 22:31:22 +00:00
fi
}
stop-ppss () {
2010-03-26 22:36:36 +00:00
STOP_PPSS=`get_time_in_seconds`
2010-03-26 22:12:12 +00:00
elapsed "$START_PPSS" "$STOP_PPSS"
2010-08-05 12:16:59 +00:00
log DSPLY "$PROCESSING_TIME"
}
2010-01-31 22:31:22 +00:00
elapsed () {
BEFORE="$1"
AFTER="$2"
ELAPSED="$(expr $AFTER - $BEFORE)"
REMAINDER="$(expr $ELAPSED % 3600)"
HOURS="$(expr $(expr $ELAPSED - $REMAINDER) / 3600)"
SECS="$(expr $REMAINDER % 60)"
MINS="$(expr $(expr $REMAINDER - $SECS) / 60)"
2010-08-05 12:16:59 +00:00
PROCESSING_TIME=$(printf "Total processing time (hh:mm:ss): %02d:%02d:%02d" $HOURS $MINS $SECS)
2010-07-21 06:24:25 +00:00
}
mail_on_error () {
ITEM="$1"
LOGFILE="$2"
if [ "$MAIL_ON_ERROR" = "1" ]
then
cat "$LOGFILE" | mail -s "$HOSTNAME - PPSS: procesing failed for item." "$EMAIL"
if [ "$?" = "0" ]
then
log DEBUG "Error mail sent."
else
log ERROR "Sending of error email failed."
fi
fi
2010-01-31 22:31:22 +00:00
}
commando () {
#
# This function will start a chain reaction of events.
#
# The commando executes a command on an item and, when finished,
# executes the start_new_worker. This function selects a new
2010-01-31 22:31:22 +00:00
# item and sends it to the fifo. The listener process receives
# the item and excutes this commando function on the item.
# So in essence, the commando function keeps calling itself
# indirectly until no items are left. This will form a single
# working queue. By executing multiple start_new_worker
2010-01-31 22:31:22 +00:00
# functions based on the CPU cores available, parallel processing
# is achieved, with a queue for each core.
#
ERR_STATE=0
VIRTUAL=0
2010-01-31 22:31:22 +00:00
#
# This code tests if the item exist (is physical or virtuel)
# Example: a file is physical, a URL is virtual.
#
2010-01-31 22:31:22 +00:00
ITEM="$1"
if [ "$RECURSION" == "1" ]
2010-01-31 22:31:22 +00:00
then
escape_item "$ITEM"
does_file_exist "$ITEM_ESCAPED"
ERR_STATE="$?"
else
escape_item "$ITEM"
does_file_exist "$SRC_DIR/$ITEM_ESCAPED"
ERR_STATE="$?"
fi
#
# If recursion is used, a file name of an item may not be unique.
# The same filename can be used for files in differen directories.
# Therefore, the output directory must reflect the original directory
# structure. If recursion is not used, this is not necessary.
#
if [ "$ERR_STATE" == "0" ]
then
VIRTUAL="0"
if [ "$RECURSION" == "1" ]
then
DIR_NAME=`dirname "$ITEM"`
ITEM_NO_PATH=`basename "$ITEM"`
OUTPUT_DIR=$PPSS_LOCAL_OUTPUT/"$DIR_NAME"
else
DIR_NAME="$SRC_DIR"
ITEM_NO_PATH="$ITEM"
OUTPUT_DIR="$PPSS_LOCAL_OUTPUT"
fi
2010-01-31 22:31:22 +00:00
else
VIRTUAL="1"
DIR_NAME=""
ITEM_NO_PATH="$ITEM"
escape_item "$ITEM_NO_PATH"
OUTPUT_DIR=$PPSS_LOCAL_OUTPUT/"$ITEM_ESCAPED"
2010-01-31 22:31:22 +00:00
fi
OUTPUT_FILE="$ITEM_NO_PATH"
2010-01-31 22:31:22 +00:00
#
# Decide if an item must be transfered from server to the node.
# or be processed in-place (NFS / SMB mount?)
#
if [ "$DOWNLOAD_TO_NODE" == "0" ]
2010-01-31 22:31:22 +00:00
then
if [ "$VIRTUAL" == "1" ]
2010-01-31 22:31:22 +00:00
then
log DEBUG "Item is virtual, thus not downloading."
2010-01-31 22:31:22 +00:00
else
log DEBUG "Using item straight from the server, no copy."
if [ "$RECURSION" == "0" ]
then
ITEM="$SRC_DIR/$ITEM"
else
ITEM="$ITEM"
fi
2010-01-31 22:31:22 +00:00
fi
else
if [ "$RECURSION" == "1" ]
then
ITEM="$PPSS_LOCAL_TMPDIR/$ITEM"
else
ITEM="$PPSS_LOCAL_TMPDIR/$ITEM_NO_PATH"
fi
2010-01-31 22:31:22 +00:00
fi
2010-01-31 22:31:22 +00:00
#
# Create the log file containing the output of the command.
#
LOG_FILE_NAME=`echo "$ITEM" | $MD5 | awk '{ print $1 }'`
2010-01-31 22:31:22 +00:00
ITEM_LOG_FILE="$JOB_LOG_DIR/$LOG_FILE_NAME"
if [ -e "$ITEM_LOG_FILE" ] && [ "$DISABLE_SKIPPING" = "0" ]
then
log DEBUG "Item is already processed, skipping..."
start_new_worker
return 0
fi
#
# Create the output directory that will contain the output of the command.
# Example: When converting wav to mp3, the mp3 will be put in this directory.
#
if [ "$VIRTUAL" == "0" ]
then
if [ "$RECURSION" == "1" ]
then
OUTPUT_DIR=$PPSS_LOCAL_OUTPUT/"$DIR_NAME"/
else
OUTPUT_DIR=$PPSS_LOCAL_OUTPUT/"$ITEM_NO_PATH"
fi
else
#
# If the item is virtual, the item can contain special characters.
# These characters are stripted from the log file name, so this is used.
#
OUTPUT_DIR="$PPSS_LOCAL_OUTPUT"
fi
#
# Create the local output directory.
#
if [ ! -z "$OUTPUT_DIR" ]
then
log DEBUG "Local output dir is $OUTPUT_DIR"
mkdir -p "$OUTPUT_DIR"
fi
ERROR=""
#
# Some formatting of item log files.
#
DATE=`date +%b\ %d\ %H:%M:%S`
echo "===== PPSS Item Log File =====" > "$ITEM_LOG_FILE"
echo -e "Host:\t\t$HOSTNAME" >> "$ITEM_LOG_FILE"
echo -e "Process:\t$PID" >> "$ITEM_LOG_FILE"
echo -e "Item:\t\t$ITEM" >> "$ITEM_LOG_FILE"
echo -e "Start date:\t$DATE" >> "$ITEM_LOG_FILE"
echo -e "" >> "$ITEM_LOG_FILE"
#
# The actual execution of the command as specified by
# the -c option.
#
BEFORE=`get_time_in_seconds`
`echo $COMMAND | grep -i '$ITEM' >> /dev/null 2>&1`
RETVAL="$?"
if [ "$RETVAL" = "0" ]
then
eval "$COMMAND" >> "$ITEM_LOG_FILE" 2>&1
ERROR="$?"
MYPID="$!"
else
eval '$COMMAND"$ITEM" >> "$ITEM_LOG_FILE" 2>&1'
ERROR="$?"
MYPID="$!"
fi
AFTER=`get_time_in_seconds`
2010-01-31 22:31:22 +00:00
echo -e "" >> "$ITEM_LOG_FILE"
# Some error logging. Success or fail.
if [ ! "$ERROR" == "0" ]
2010-01-31 22:31:22 +00:00
then
2010-07-21 06:24:25 +00:00
mail_on_error "$ITEM" "$ITEM_LOG_FILE"
echo -e "Status:\t\tFAILURE" >> "$ITEM_LOG_FILE"
2010-01-31 22:31:22 +00:00
else
echo -e "Status:\t\tSUCCESS" >> "$ITEM_LOG_FILE"
fi
2010-01-31 22:31:22 +00:00
#
# If part of a cluster, remove the downloaded item after
# it has been processed and uploaded as not to fill up disk space.
#
if [ "$DOWNLOAD_TO_NODE" == "1" ]
then
if [ -e "$ITEM" ]
2010-01-31 22:31:22 +00:00
then
rm -rf "$ITEM"
else
log DEBUG "There is no local file to remove.. strange..."
2010-01-31 22:31:22 +00:00
fi
fi
2010-01-31 22:31:22 +00:00
#
# Create remote output dir and transfer output to server.
#
escape_item "$DIR_NAME"
ITEM_OUTPUT_DIR="$REMOTE_OUTPUT_DIR/$ITEM_ESCAPED"
2010-01-31 22:31:22 +00:00
if [ "$DOWNLOAD_TO_NODE" == "0" ]
then
log DEBUG "Download to node is disabled."
else
if [ "$DIR_NAME" == "." ]
2010-01-31 22:31:22 +00:00
then
DIR_NAME=""
2010-01-31 22:31:22 +00:00
fi
fi
2010-02-07 19:14:48 +00:00
#
# Upload the output file back to the server.
#
upload_item "$OUTPUT_DIR" "$DIR_NAME"
#
# Upload the log file to the server.
#
2010-08-05 12:16:59 +00:00
elapsed "$BEFORE" "$AFTER"
echo "$PROCESSING_TIME" >> "$ITEM_LOG_FILE"
echo -e "" >> "$ITEM_LOG_FILE"
2010-01-31 22:31:22 +00:00
if [ ! -z "$SSH_SERVER" ]
then
log DEBUG "Uploading item log file $ITEM_LOG_FILE to master $PPSS_HOME_DIR/$JOB_LOG_DIR"
scp -q $SSH_OPTS $SSH_KEY "$ITEM_LOG_FILE" $USER@$SSH_SERVER:$PPSS_HOME_DIR/$JOB_LOG_DIR
if [ ! "$?" ]
2010-01-31 22:31:22 +00:00
then
log DEBUG "Uploading of item log file failed."
2010-01-31 22:31:22 +00:00
fi
fi
start_new_worker
2010-01-31 22:31:22 +00:00
}
infanticide () {
log DEBUG "Running $FUNCNAME"
#
# This code is run if ctrl+c is pressed. Very important to prevent
# any child processes running after the parent has died. Keeps the system clean.
#
# This command kills all processes that are related to the master
# process as defined by $PID. All processes that have ever been
# spawned, although disowned or backgrounded will be killed...
#
PROCLIST=`ps a -o pid,pgid,ppid,command | grep [0-9] | grep $PID | grep -v -i grep`
oldIFS=$IFS # save the field separator
IFS=$'\n' # new field separator, the end of line
for x in `echo "$PROCLIST"`
do
MYPPID=`echo $x | awk '{ print $3 }'`
MYPID=`echo $x | awk '{ print $1 }'`
if [ ! "$MYPPID" == "$PID" ] && [ ! "$MYPPID" == "1" ]
then
if [ ! "$MYPID" == "$PID" ]
then
log DEBUG "Killing process $MYPID"
kill $MYPID >> /dev/null 2>&1
else
log DEBUG "Not killing master process..$MYPID.."
fi
else
log DEBUG "Not killing listener process. $MYPID.."
fi
done
IFS=$oldIFS
}
run_command () {
INPUT="$1"
log DEBUG "Current active workers is $ACTIVE_WORKERS"
2010-07-17 21:19:31 +00:00
if [ "$ACTIVE_WORKERS" -lt "$MAX_NO_OF_RUNNING_JOBS" ]
then
if [ -z "$INPUT" ]
then
stack_pop
INPUT="$REGISTER"
fi
2010-07-21 06:24:25 +00:00
log INFO "Now processing $INPUT"
2010-07-18 09:37:19 +00:00
if [ ! -z "$INPUT" ] && [ ! -d "$INPUT" ]
then
commando "$INPUT" &
MYPID="$!"
disown
PIDS="$PIDS $MYPID"
((ACTIVE_WORKERS++))
log DEBUG "Increasing active workers to $ACTIVE_WORKERS"
echo "$INPUT" >> "$LIST_OF_PROCESSED_ITEMS"
return 0
else
log DEBUG "Item is a directory or is empty."
return 0
fi
2010-07-17 21:19:31 +00:00
else
log DEBUG "Maximum number of workers are bussy, no more additional workers..."
fi
}
display_jobs_remaining () {
if [ "$ACTIVE_WORKERS" == "1" ] && [ "$QUIET" == "0" ]
then
log PRCNT "One job is remaining. "
elif [ "$QUIET" == "0" ]
then
if [ "$ACTIVE_WORKERS" == "1" ]
2010-01-31 22:31:22 +00:00
then
echo -en "\n"
2010-01-31 22:31:22 +00:00
fi
log PRCNT "$((ACTIVE_WORKERS)) jobs are remaining. "
fi
}
2010-07-21 06:24:25 +00:00
show_eta () {
CURRENT_PROCESSED=$((GLOBAL_COUNTER-MAX_NO_OF_RUNNING_JOBS))
TOTAL="$SIZE_OF_INPUT"
START_TIME=$START_PPSS
NOW=`get_time_in_seconds`
2010-08-05 12:16:59 +00:00
MODULO=$((GLOBAL_COUNTER % 5 ))
2010-07-21 06:24:25 +00:00
2010-08-05 12:16:59 +00:00
if [ "$QUIET" = "1" ]
then
return 0
fi
if [ "$CURRENT_PROCESSED" -le "0" ]
2010-07-21 06:24:25 +00:00
then
return 0
else
2010-08-05 12:16:59 +00:00
if [ "$MODULO" = "0" ]
2010-07-21 06:24:25 +00:00
then
2010-08-05 12:16:59 +00:00
RUNNING_TIME=$((NOW-START_TIME))
if [ ! "$RUNNING_TIME" -le "0" ] && [ ! "$CURRENT_PROCESSED" = "0" ] && [ "$CURRENT_PROCESSED" -gt "$MAX_NO_OF_RUNNING_JOBS" ]
then
TIME_PER_ITEM=$(( RUNNING_TIME / ( CURRENT_PROCESSED - MAX_NO_OF_RUNNING_JOBS ) ))
log DEBUG "Time per item is $TIME_PER_ITEM seconds."
2010-08-05 12:16:59 +00:00
TOTAL_TIME=$(( ($TIME_PER_ITEM * SIZE_OF_INPUT) + $TIME_PER_ITEM ))
TOTAL_TIME_IN_SECONDS=$((START_TIME+TOTAL_TIME))
if [ "$ARCH" = "Darwin" ]
then
DATE=`date -r $TOTAL_TIME_IN_SECONDS`
else
DATE=`date -d @$TOTAL_TIME_IN_SECONDS`
fi
echo
log DSPLY "ETA: $DATE"
echo -en "\033[2A"
fi
2010-07-21 06:24:25 +00:00
fi
fi
}
display_progress () {
if [ "$DAEMON" = "0" ]
then
SIZE_OF_INPUT=$(wc -l "$LISTOFITEMS" | awk '{ print $1 }')
PERCENT=$((100 * $GLOBAL_COUNTER / $SIZE_OF_INPUT ))
if [ ! "$ACTIVE_WORKERS" == "0" ] && [ "$FINISHED" == "0" ]
2010-01-31 22:31:22 +00:00
then
2010-04-10 22:35:22 +00:00
if [ "$QUIET" == "0" ]
then
2010-08-05 12:16:59 +00:00
log PRCNT "Currently $PERCENT percent complete. Processed $GLOBAL_COUNTER of $SIZE_OF_INPUT."
2010-07-21 06:24:25 +00:00
show_eta
2010-05-13 23:14:27 +00:00
elif [ "$DAEMON" == "0" ]
then
2010-08-05 12:16:59 +00:00
echo -en "\r$PERCENT% --"
2010-04-10 22:35:22 +00:00
fi
2010-01-31 22:31:22 +00:00
if [ "$PERCENT" == "100" ]
then
2010-04-10 22:35:22 +00:00
if [ "$QUIET" == "1" ]
then
2010-08-05 12:16:59 +00:00
echo
2010-04-10 22:35:22 +00:00
fi
2010-01-31 22:31:22 +00:00
FINISHED=1
fi
fi
fi
}
2010-01-31 22:31:22 +00:00
terminate_listener () {
2010-04-15 21:46:27 +00:00
log DEBUG "Running $FUNCNAME"
if [ ! -z "$SSH_MASTER_PID" ]
then
kill "$SSH_MASTER_PID"
else
log DEBUG "SSH master PID is empty."
fi
2010-08-05 12:16:59 +00:00
set_status "STOPPED"
2010-01-31 22:31:22 +00:00
log DEBUG "Listener stopped."
2010-01-31 22:31:22 +00:00
if [ ! "$PERCENT" == "100" ]
then
echo
stop-ppss
2010-02-09 22:01:05 +00:00
log DSPLY "Finished. Consult $JOB_LOG_DIR for job output."
#log DSPLY "Press ENTER to continue."
2010-01-31 22:31:22 +00:00
else
echo
stop-ppss
2010-02-09 22:01:05 +00:00
log DSPLY "Finished. Consult $JOB_LOG_DIR for job output."
2010-01-31 22:31:22 +00:00
fi
2010-04-10 22:35:22 +00:00
if [ "$QUIET" == "1" ]
then
echo
fi
2010-07-21 06:24:25 +00:00
if [ ! -z "$EMAIL" ]
then
echo "PPSS job finished." | mail -s "$HOSTNAME - PPSS has finished." "$EMAIL"
if [ ! "$?" = "0" ]
then
log ERROR "Sending os status mail failed."
fi
fi
2010-01-31 22:31:22 +00:00
cleanup
}
inotify_listener () {
inotifywait "$SRC_DIR" -m -e close -q --format '%w%f' | \
while read -r line
do
2010-07-18 09:37:19 +00:00
if [ ! -d "$line" ]
then
echo "$line" > "$FIFO"
fi
done
}
is_item_unprocessed () {
2010-07-12 21:37:49 +00:00
VAR="$1"
STATUS=0
if [ -z "$VAR" ]
then
log DEBUG "$FUNCNAME: something is wrong, no argument received."
return 1
fi
for x in $PROCESSED_ITEMS
do
if [ "$x" = "$VAR" ]
then
STATUS=1
fi
done
log DEBUG "Is item $VAR unprocessed: $STATUS"
return $STATUS
2010-07-12 21:37:49 +00:00
}
2010-07-18 08:14:47 +00:00
is_item_file_and_unmodified () {
ITEM="$1"
if [ -e "$ITEM" ]
then
NOW=`date +%s`
FILEDATE=`$STAT "$ITEM"`
ELAPSED="$(expr $NOW - $FILEDATE)"
if [ "$ELAPSED" -gt "$DAEMON_FILE_AGE" ]
then
log DEBUG "$FUNCNAME File $ITEM is aged $ELAPSED"
return 0
else
log DEBUG "$FUNCNAME File $ITEM too young $ELAPSED"
return 1
fi
else
2010-08-05 12:16:59 +00:00
log DEBUG "$FUNCNAME: file does not exist."
2010-07-18 08:14:47 +00:00
return 0
fi
}
process_item_as_daemon () {
ITEM="$1"
if is_item_unprocessed "$ITEM"
then
if is_item_file_and_unmodified "$ITEM"
then
echo "$ITEM" >> "$FIFO"
processed_stack_push "$ITEM"
else
stack_push "$ITEM"
fi
fi
}
daemon_listener () {
while true
do
get_all_items
while get_item
do
2010-07-18 08:14:47 +00:00
process_item_as_daemon "$ITEM"
done
while stack_pop
do
process_item_as_daemon "$REGISTER"
done
sleep "$DAEMON_POLLING_INTERVAL"
done
}
start_daemon_listener () {
daemon_listener &
MYPID="$!"
disown
PIDS="$PIDS $MYPID"
}
start_inotify_listener () {
ACTIVE_WORKERS=0
inotify_listener &
MYPID="$!"
disown
PIDS="$PIDS $MYPID"
}
start_as_daemon () {
2010-07-14 21:06:11 +00:00
if [ "$DAEMON" = "1" ]
then
log DEBUG "Daemon mode enabled."
2010-07-14 21:06:11 +00:00
if [ "$INOTIFY" = "1" ]
then
log INFO "Linux inotify enabled."
start_inotify_listener
else
start_daemon_listener
2010-07-14 21:06:11 +00:00
log INFO "Linux inotify disabled."
fi
else
log DEBUG "Daemon mode disabled."
fi
}
decrease_active_workers () {
if [ "$ACTIVE_WORKERS" -gt "0" ]
then
((ACTIVE_WORKERS--))
fi
}
listen_for_job () {
FINISHED=0
ACTIVE_WORKERS="$MAX_NO_OF_RUNNING_JOBS"
PIDS=""
log DEBUG "Listener started."
start_as_daemon
while read event <& 42
do
display_progress
2010-07-21 06:24:25 +00:00
log INFO "Current active workers is $ACTIVE_WORKERS"
if [ "$event" = "$START_KEY" ]
then
2010-07-17 21:19:31 +00:00
decrease_active_workers
2010-07-18 08:14:47 +00:00
2010-07-14 21:06:11 +00:00
log DEBUG "Got a 'start-key' event"
if [ "$DAEMON" = "0" ]
then
if get_item
then
log DEBUG "Got an item, running command..."
run_command "$ITEM"
else
log DEBUG "No more new items..."
if [ "$ACTIVE_WORKERS" = "0" ]
then
break
else
display_jobs_remaining
fi
fi
else
log DEBUG "Daemon mode: a worker finished..."
2010-07-18 09:37:19 +00:00
run_command
fi
elif [ "$event" == "$KILL_KEY" ]
then
infanticide
break
else
2010-07-21 06:24:25 +00:00
log DEBUG "Event is an item."
stack_push "$event"
run_command
fi
done
terminate_listener
}
2010-01-31 22:31:22 +00:00
start_all_workers () {
if [ "$MAX_NO_OF_RUNNING_JOBS" == "1" ]
then
2010-05-13 23:14:27 +00:00
log DSPLY "Starting one (1) single worker."
2010-01-31 22:31:22 +00:00
else
2010-02-09 22:01:05 +00:00
log DSPLY "Starting $MAX_NO_OF_RUNNING_JOBS parallel workers."
2010-01-31 22:31:22 +00:00
fi
2010-05-13 23:14:27 +00:00
if [ "$DAEMON" == "0" ]
then
log DSPLY "---------------------------------------------------------"
elif [ "$INOTIFY" = "1" ]
then
return 0
2010-05-13 23:14:27 +00:00
fi
2010-01-31 22:31:22 +00:00
i=0
while [ "$i" -lt "$MAX_NO_OF_RUNNING_JOBS" ]
do
start_new_worker
log DEBUG "Starting worker $i"
2010-01-31 22:31:22 +00:00
((i++))
if [ ! "$MAX_DELAY" == "0" ]
then
random_delay "$MAX_DELAY"
fi
done
}
get_status_of_nodes () {
RESULT_FILE="$1"
ssh $SSH_OPTS $SSH_KEY $USER@$SSH_SERVER cat "$PPSS_HOME_DIR/$PPSS_NODE_STATUS/*" > "$RESULT_FILE" 2>&1
2010-01-31 22:31:22 +00:00
2011-08-06 18:08:41 +00:00
IFS=$'\n'
for x in `cat $RESULT_FILE`
2011-08-06 18:08:41 +00:00
do
IP=`echo $x | awk '{ print $1 }'`
HOST=`echo $x | awk '{ print $2 }'`
STATUS=`echo $x | awk '{ print $3 }'`
RES=`echo $x | awk '{ print $4 }'`
if [ -z "$RES" ]
then
RES="0"
fi
PROCESSED=$((PROCESSED+RES))
LINE=`echo "$IP $HOST $RES $STATUS" | awk '{ printf ("%-16s %-18s % 10s %10s\n",$1,$2,$3,$4) }'`
log DSPLY "$LINE"
2011-08-06 18:08:41 +00:00
done
log DSPLY "---------------------------------------------------------"
LINE=`echo $PROCESSED | awk '{ printf ("Total processed: % 29s\n",$1) }'`
log DSPLY "$LINE"
rm "$RESULT_FILE"
2010-01-31 22:31:22 +00:00
}
show_status () {
. $CONFIG
2010-01-31 22:31:22 +00:00
if [ ! -z "$SSH_KEY" ]
then
SSH_KEY="-i $SSH_KEY"
fi
2010-06-13 14:39:04 +00:00
get_all_items
2010-01-31 22:31:22 +00:00
ITEMS=`wc -l $LISTOFITEMS | awk '{ print $1 }'`
2010-02-18 20:48:25 +00:00
if [ ! -z "$ITEMS" ] && [ ! "$ITEMS" == "0" ]
then
2011-08-06 18:08:41 +00:00
PROCESSED=`exec_cmd "ls -1 $PPSS_HOME_DIR/$ITEM_LOCK_DIR 2>/dev/null | wc -l" 1` 2>&1 >> /dev/null
2010-02-18 20:48:25 +00:00
TMP_STATUS=$((100 * $PROCESSED / $ITEMS))
log DSPLY "Status:\t\t$TMP_STATUS percent complete."
else
log DSPLY "Status: UNKNOWN - is PPSS deployed on nodes?"
fi
2010-01-31 22:31:22 +00:00
if [ ! -z $NODES_FILE ]
then
TMP_NO=`cat $NODES_FILE | wc -l`
2010-02-09 22:01:05 +00:00
log DSPLY "Nodes:\t $TMP_NO"
2010-01-31 22:31:22 +00:00
fi
2010-02-09 22:01:05 +00:00
log DSPLY "Items:\t\t$ITEMS"
2010-01-31 22:31:22 +00:00
2010-02-09 22:01:05 +00:00
log DSPLY "---------------------------------------------------------"
2010-01-31 22:31:22 +00:00
HEADER=`echo IP-address Hostname Processed Status | awk '{ printf ("%-16s %-18s % 10s %10s\n",$1,$2,$3,$4) }'`
2010-02-09 22:01:05 +00:00
log DSPLY "$HEADER"
log DSPLY "---------------------------------------------------------"
2010-01-31 22:31:22 +00:00
PROCESSED=0
2011-08-06 18:08:41 +00:00
RESULT_FILE="$RADOM$RANDOM.deleteme"
get_status_of_nodes "$RESULT_FILE"
2010-01-31 22:31:22 +00:00
}
main () {
case $MODE in
node )
create_working_directory
2010-01-31 22:31:22 +00:00
test_server
init_vars
2010-01-31 22:31:22 +00:00
get_all_items
listen_for_job "$MAX_NO_OF_RUNNING_JOBS" & 2>&1 >> /dev/null
LISTENER_PID=$!
start_all_workers
;;
start )
# This option only starts all nodes.
LOGFILE=/dev/null
display_header
if [ ! -e "$NODES_FILE" ]
then
log ERROR "File $NODES with list of nodes does not exist."
2010-08-05 12:16:59 +00:00
set_status "STOPPED"
2010-01-31 22:31:22 +00:00
cleanup
exit 1
else
for NODE in `cat $NODES_FILE`
do
start_ppss_on_node "$NODE"
done
fi
cleanup
exit 0
;;
config )
LOGFILE=/dev/null
display_header
2010-02-09 22:01:05 +00:00
log DSPLY "Generating configuration file $CONFIG"
2010-01-31 22:31:22 +00:00
add_var_to_config PPSS_LOCAL_TMPDIR "$PPSS_LOCAL_TMPDIR"
add_var_to_config PPSS_LOCAL_OUTPUT "$PPSS_LOCAL_OUTPUT"
cleanup
exit 0
;;
stop )
LOGFILE=/dev/null
display_header
2010-02-09 22:01:05 +00:00
log DSPLY "Stopping PPSS on all nodes."
2010-01-31 22:31:22 +00:00
exec_cmd "touch $STOP_SIGNAL"
cleanup
exit 0
2010-01-31 22:31:22 +00:00
;;
pause )
LOGFILE=/dev/null
display_header
2010-02-09 22:01:05 +00:00
log DSPLY "Pausing PPSS on all nodes."
2010-01-31 22:31:22 +00:00
exec_cmd "touch $PAUSE_SIGNAL"
cleanup
exit 0
2010-01-31 22:31:22 +00:00
;;
continue )
LOGFILE=/dev/null
display_header
if does_file_exist "$STOP_SIGNAL"
then
2010-02-09 22:01:05 +00:00
log DSPLY "Continuing processing, please use $0 start to start PPSS on al nodes."
2010-01-31 22:31:22 +00:00
exec_cmd "rm -f $STOP_SIGNAL"
fi
if does_file_exist "$PAUSE_SIGNAL"
then
2010-02-09 22:01:05 +00:00
log DSPLY "Continuing PPSS on all nodes."
2010-01-31 22:31:22 +00:00
exec_cmd "rm -f $PAUSE_SIGNAL"
fi
cleanup
exit 0
;;
deploy )
2011-08-06 18:08:41 +00:00
LOGFILE=ppss-deploy.txt
display_header
2011-08-06 18:08:41 +00:00
log DSPLY "Deploying PPSS on nodes. See ppss-deploy.txt for details."
deploy_ppss
wait
cleanup
exit 0
;;
2010-01-31 22:31:22 +00:00
status )
LOGFILE=/dev/null
display_header
2011-08-06 18:08:41 +00:00
test_server
2010-01-31 22:31:22 +00:00
show_status
exit 0
;;
erase )
LOGFILE=/dev/null
display_header
2010-02-09 22:01:05 +00:00
log DSPLY "Erasing PPSS from all nodes."
2010-01-31 22:31:22 +00:00
erase_ppss
cleanup
exit 0
;;
kill )
LOGFILE=/dev/null
for x in `ps ux | grep ppss | grep -v grep | grep bash | awk '{ print $2 }'`
do
kill "$x"
done
cleanup
exit 0
;;
* )
create_working_directory
display_header
2010-01-31 22:31:22 +00:00
init_vars
get_all_items
listen_for_job "$MAX_NO_OF_RUNNING_JOBS" & 2>&1 >> /dev/null
LISTENER_PID=$!
start_all_workers
;;
esac
}
#
# PPSS can be sourced. This is mainly for testing purposes (unit tests).
#
if ! are_we_sourced
then
#
# First step: process all command-line arguments.
#
process_arguments "$@"
#
# This command starts the that sets the whole framework in motion.
# But only if the file is not sourced.
#
main
#
# Exit after all processes have finished.
#
wait
fi