This commit is contained in:
Louwrentius 2010-07-21 06:24:25 +00:00
parent 2c42f7568e
commit 654b52d91f
2 changed files with 100 additions and 18 deletions

116
ppss
View File

@ -26,7 +26,7 @@
trap 'kill_process' SIGINT
SCRIPT_NAME="Distributed Parallel Processing Shell Script"
SCRIPT_VERSION="2.81"
SCRIPT_VERSION="2.83"
#
# The first argument to this script can be a mode.
@ -89,6 +89,7 @@ ACTIVE_WORKERS="0"
DAEMON_POLLING_INTERVAL="10"
STAT=""
DAEMON_FILE_AGE="4"
ENABLE_INPUT_LOCK="0"
SSH_SERVER="" # Remote server or 'master'.
SSH_KEY="" # SSH key for ssh account.
@ -118,6 +119,7 @@ ITEM_ESCAPED=""
DISABLE_SKIPPING=0
NODE_STATUS="$PPSS_DIR/status.txt"
DAEMON=0
EMAIL=""
REGISTER="" # For STACK
STACK=""
@ -136,6 +138,7 @@ showusage_short () {
echo " $0 -d /dir/with/some/files -c 'gzip '"
echo " $0 -d /dir/with/some/files -c 'cp \"\$ITEM\" /tmp' -p 2"
echo " $0 -f <file> -c 'wget -q -P /destination/directory \"\$ITEM\"' -p 10"
echo
}
showusage_normal () {
@ -178,15 +181,19 @@ showusage_normal () {
echo -e "--quiet | -q Shows no output except for a progress indication using percents."
echo
echo -e "--delay | -D Adds an initial random delay to the start of all parallel jobs to spread"
echo -e " the load. The delay is only used at the start of all 'threads'."
echo -e " the load. The delay (seconds) is only used at the start of all 'threads'."
echo
echo -e "--daemon Do not exit after items are professed, but keep looking for new items"
echo -e " and process them. Read the manual how to use this!"
echo -e "--daemon Daemon mode. Do not exit after items are professed, but keep looking "
echo -e " for new items and process them. Read the manual how to use this!"
echo -e " See --help for important additional options regarding daemon mode."
echo
echo -e "--no-recursion|-r By default, recursion of directories is enabled when the -d option is "
echo -e " used. If this is not prefered, this can be disabled with this option "
echo -e " Only files within the specified directory will be processed."
echo
echo -e "--email | -e PPSS sends an e-mail if PPSS has finished. It is also used if processing"
echo -e " of an item has failed (configurable, see -h). "
echo
echo -e "--help Extended help, including options for distributed mode and Amazon EC2."
echo
echo -e "Example: encoding some wav files to mp3 using lame:"
@ -254,6 +261,24 @@ showusage_long () {
echo -e "--daemon Do not exit after items are professed, but keep looking for new items"
echo -e " and process them. Read the manual how to use this!"
echo
echo -e "--interval Specifies the polling interval when running in daemon mode. Polls every"
echo -e " x seconds for new items to process."
echo
echo -e "--file-age When not using inotify, specify how many seconds must have passed before"
echo -e " a file may be processed to prevent files being processed while being "
echo -e " written to."
echo
echo -e "--disable-inotify If for some reason, inotify must not be used, use this option to disable"
echo -e " usage of inotify. Regular polling will be used."
echo
echo -e "--enable-input-lock When PPSS is run in daemon mode, create a directory INPUT_LOCK to"
echo -e " signal that items are processed and may not be touched by PPSS."
echo -e " Once this directory is removed, PPSS will start processing items."
echo
echo -e "--no-recursion|-r By default, recursion of directories is enabled when the -d option is "
echo -e " used. If this is not prefered, this can be disabled with this option "
echo -e " Only files within the specified directory will be processed."
echo
echo -e "--no-recursion|-r By default, recursion of directories is enabled when the -d option is "
echo -e " used. If this is not prefered, this can be disabled with this option."
echo -e " Only files within the specified directory will be processed."
@ -449,7 +474,6 @@ is_var_empty () {
fi
}
detect_inotify () {
if [ -e /usr/bin/inotifywait ] && [ ! "$INOTIFY" = "0" ]
@ -546,6 +570,10 @@ process_arguments () {
INOTIFY=0
add_var_to_config INOTIFY "$INOTIFY"
shift 1 ;;
--enable-input-lock)
ENABLE_INPUT_LOCK=1
add_var_to_config ENABLE_INPUT_LOCK "$ENABLE_INPUT_LOCK"
shift 1 ;;
--daemon)
DAEMON="1"
QUIET="1"
@ -563,6 +591,11 @@ process_arguments () {
is_var_empty "$2"
add_var_to_config DAEMON_FILE_AGE "$DAEMON_FILE_AGE"
shift 2 ;;
--email|-e)
is_var_empty "$2"
EMAIL="$2"
add_var_to_config EMAIL "$EMAIL"
shift 2 ;;
--awskeypair|-P)
AWS_KEYPAIR="$2"
add_var_to_config AWS_KEYPAIR "$AWS_KEYPAIR"
@ -1435,13 +1468,9 @@ random_delay () {
fi
NUMBER=$RANDOM
let "NUMBER %= $ARGS"
if [ "$ARCH" == "SunOS" ]
then
sleep "$NUMBER"
else
sleep "0.$NUMBER"
fi
sleep "$NUMBER"
}
escape_item () {
@ -1709,7 +1738,7 @@ remove_processed_items_from_input_file () {
get_all_items () {
if [ "$DAEMON" == "1" ]
if [ "$DAEMON" == "1" ] && [ "$INOTIFY" = "0" ] && [ "$ENABLE_INPUT_LOCK" = "1" ]
then
GLOBAL_COUNTER=1
get_input_lock
@ -1900,7 +1929,24 @@ elapsed () {
MINS="$(expr $(expr $REMAINDER - $SECS) / 60)"
RES=$(printf "Total processing time (hh:mm:ss): %02d:%02d:%02d" $HOURS $MINS $SECS)
log DSPLY "$RES"
echo "$RES"
}
mail_on_error () {
ITEM="$1"
LOGFILE="$2"
if [ "$MAIL_ON_ERROR" = "1" ]
then
cat "$LOGFILE" | mail -s "$HOSTNAME - PPSS: procesing failed for item." "$EMAIL"
if [ "$?" = "0" ]
then
log DEBUG "Error mail sent."
else
log ERROR "Sending of error email failed."
fi
fi
}
commando () {
@ -2073,6 +2119,7 @@ commando () {
# Some error logging. Success or fail.
if [ ! "$ERROR" == "0" ]
then
mail_on_error "$ITEM" "$ITEM_LOG_FILE"
echo -e "Status:\t\tFAILURE" >> "$ITEM_LOG_FILE"
else
echo -e "Status:\t\tSUCCESS" >> "$ITEM_LOG_FILE"
@ -2182,7 +2229,7 @@ run_command () {
INPUT="$REGISTER"
fi
log DEBUG "Now processing $INPUT"
log INFO "Now processing $INPUT"
if [ ! -d "$INPUT" ] && [ ! -z "$INPUT" ]
then
@ -2217,6 +2264,31 @@ display_jobs_remaining () {
fi
}
show_eta () {
CURRENT_PROCESSED=$((GLOBAL_COUNTER-MAX_NO_OF_RUNNING_JOBS))
TOTAL="$SIZE_OF_INPUT"
START_TIME=$START_PPSS
NOW=`get_time_in_seconds`
if [ "$CURRENT_PROCESSED" -le "0" ]
then
return 0
else
TIME_PER_ITEM=$(( (NOW-START_TIME)/CURRENT_PROCESSED ))
TOTAL_TIME=$(( ($TIME_PER_ITEM * SIZE_OF_INPUT) + $TIME_PER_ITEM ))
TOTAL_TIME_IN_SECONDS=$((START_TIME+TOTAL_TIME))
if [ "$ARCH" = "Darwin" ]
then
DATE=`date -r $TOTAL_TIME_IN_SECONDS`
else
DATE=`date -d @$TOTAL_TIME_IN_SECONDS`
fi
echo
log DSPLY "ETA: $DATE"
fi
}
display_progress () {
if [ "$DAEMON" = "0" ]
@ -2228,6 +2300,7 @@ display_progress () {
if [ "$QUIET" == "0" ]
then
log PRCNT "Currently $PERCENT percent complete. Processed $GLOBAL_COUNTER of $SIZE_OF_INPUT items."
show_eta
elif [ "$DAEMON" == "0" ]
then
echo -en "\r$PERCENT%"
@ -2251,7 +2324,6 @@ terminate_listener () {
if [ ! -z "$SSH_MASTER_PID" ]
then
log DEBUG "SSH master PID is $SSH_MASTER_PID"
kill "$SSH_MASTER_PID"
else
log DEBUG "SSH master PID is empty."
@ -2275,6 +2347,16 @@ terminate_listener () {
then
echo
fi
if [ ! -z "$EMAIL" ]
then
echo "PPSS job finished." | mail -s "$HOSTNAME - PPSS has finished." "$EMAIL"
if [ ! "$?" = "0" ]
then
log ERROR "Sending os status mail failed."
fi
fi
cleanup
}
@ -2422,7 +2504,7 @@ listen_for_job () {
while read event <& 42
do
log DEBUG "Current active workers is $ACTIVE_WORKERS"
log INFO "Current active workers is $ACTIVE_WORKERS"
if [ "$event" = "$START_KEY" ]
then
@ -2455,7 +2537,7 @@ listen_for_job () {
infanticide
break
else
log DEBUG "Event $event is an item!"
log DEBUG "Event is an item."
stack_push "$event"
run_command
fi

View File

@ -1,7 +1,7 @@
#!/bin/bash
DEBUG="$1"
VERSION="2.81"
VERSION="2.82"
TMP_DIR="/tmp/ppss"
PPSS=./ppss
PPSS_DIR=ppss_dir