Major rework, no longer using arrays. Arrays don't scale and require enormous amounts of memmory when processing large input files. Will be released as 2.70.
This commit is contained in:
		
							parent
							
								
									9feea3fb59
								
							
						
					
					
						commit
						0db3781021
					
				
							
								
								
									
										93
									
								
								ppss
									
									
									
									
									
								
							
							
						
						
									
										93
									
								
								ppss
									
									
									
									
									
								
							| @ -25,7 +25,7 @@ trap 'kill_process' SIGINT | ||||
| 
 | ||||
| # Setting some vars.  | ||||
| SCRIPT_NAME="Distributed Parallel Processing Shell Script" | ||||
| SCRIPT_VERSION="2.65" | ||||
| SCRIPT_VERSION="2.70" | ||||
| 
 | ||||
| # The first argument to this script can be a mode. | ||||
| MODES="node start config stop pause continue deploy status erase kill ec2" | ||||
| @ -73,6 +73,9 @@ PAUSE_DELAY="60"                                          # Polling every 1 minu | ||||
| STOP_SIGNAL="$PPSS_HOME_DIR/$PPSS_DIR/stop_signal"                     # Stop processing if this file is present. | ||||
| ARRAY_POINTER_FILE="$PPSS_DIR/ppss-array-pointer-$PID"  # Pointer for keeping track of processed items. | ||||
| ARRAY="" | ||||
| GLOBAL_COUNTER="" | ||||
| GLOBAL_COUNTER_FILE="$PPSS_DIR/ppss-input-counter-$PID" | ||||
| LOCAL_INPUT_FILE="$PPSS_DIR/INPUT_FILE-$PID" | ||||
| JOB_LOG_DIR="$PPSS_DIR/job_log"                         # Directory containing log files of processed items. | ||||
| LOGFILE="$PPSS_DIR/ppss-log-$PID.txt"                     # General PPSS log file. Contains lots of info. | ||||
| QUIET="0" | ||||
| @ -435,9 +438,9 @@ cleanup () { | ||||
|         rm "$FIFO" | ||||
|     fi | ||||
| 
 | ||||
|     if [ -e "$ARRAY_POINTER_FILE" ]  | ||||
|     if [ -e "$GLOBAL_COUNTER_FILE" ]  | ||||
|     then | ||||
|         rm "$ARRAY_POINTER_FILE" | ||||
|         rm "$GLOBAL_COUNTER_FILE" | ||||
|     fi | ||||
| 
 | ||||
|     if [ -e "$GLOBAL_LOCK" ]  | ||||
| @ -836,7 +839,7 @@ init_vars () { | ||||
|         exit 1 | ||||
|     fi | ||||
| 
 | ||||
|     echo 0 > $ARRAY_POINTER_FILE | ||||
|     echo 1 > $GLOBAL_COUNTER_FILE | ||||
| 
 | ||||
|     FIFO=/tmp/ppss-fifo-$RANDOM-$RANDOM | ||||
| 
 | ||||
| @ -1508,15 +1511,17 @@ get_all_items () { | ||||
| 
 | ||||
|     if [ -z "$INPUT_FILE" ] | ||||
|     then | ||||
|         if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a slave?" | ||||
|         if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a node?" | ||||
|         then | ||||
|             if [ "$RECURSION" == "1" ] | ||||
|             then | ||||
|                 ITEMS=`exec_cmd "find $SRC_DIR/ ! -type d"` | ||||
|                 #ITEMS=`exec_cmd "find $SRC_DIR/ ! -type d"` | ||||
|                 `exec_cmd "find $SRC_DIR/ ! -type d"` >> "$LOCAL_INPUT_FILE" | ||||
|                 check_status "$?" "$FUNCNAME" "Could not list files within remote source directory." | ||||
|             else | ||||
|                 log DEBUG "Recursion is disabled." | ||||
|                 ITEMS=`exec_cmd "ls -1 $SRC_DIR"` | ||||
|                 #ITEMS=`exec_cmd "ls -1 $SRC_DIR"` | ||||
|                 `exec_cmd "ls -1 $SRC_DIR"` >> "$LOCAL_INPUT_FILE" | ||||
|                 check_status "$?" "$FUNCNAME" "Could not list files within remote source directory." | ||||
|             fi | ||||
|         else  | ||||
| @ -1524,29 +1529,31 @@ get_all_items () { | ||||
|             then | ||||
|                 if [ "$RECURSION" == "1" ] | ||||
|                 then | ||||
|                     ITEMS=`find "$SRC_DIR/" ! -type d` | ||||
|                     log DEBUG "Recursion is enabled." | ||||
|                     #ITEMS=`find "$SRC_DIR/" ! -type d` | ||||
|                     `find "$SRC_DIR/" ! -type d >> "$LOCAL_INPUT_FILE"`  | ||||
|                     check_status "$?" "$FUNCNAME" "Could not list files within local source directory." | ||||
|                 else | ||||
|                     log DEBUG "Recursion is disabled." | ||||
|                     ITEMS=`ls -1 "$SRC_DIR"` | ||||
|                     #ITEMS=`ls -1 "$SRC_DIR"` | ||||
|                     `ls -1 "$SRC_DIR" >> "$LOCAL_INPUT_FILE"` | ||||
|                     check_status "$?" "$FUNCNAME" "Could not list files within local source directory." | ||||
|                 fi | ||||
|                 if [ ! -e "$LOCAL_INPUT_FILE" ] | ||||
|                 then | ||||
|                     log ERROR "Local input file is not created, something is wrong. Bug?" | ||||
|                     set_status "ERROR" | ||||
|                     cleanup  | ||||
|                     exit 1 | ||||
|                 fi | ||||
|             else | ||||
|                 ITEMS="" | ||||
|             fi | ||||
|         fi | ||||
|         IFS=$'\n' | ||||
| 
 | ||||
|         for x in $ITEMS | ||||
|         do | ||||
|             ARRAY[$count]="$x" | ||||
|             ((count++)) | ||||
|         done | ||||
|         IFS=$IFS_BACKUP | ||||
|     else | ||||
|         if [ ! -z "$SSH_SERVER" ] # Are we running stand-alone or as a slave?" | ||||
|         then | ||||
|             log DEBUG "Running as slave, input file has been pushed (hopefully)." | ||||
|             log DEBUG "Running as node, input file has been pushed (hopefully)." | ||||
|         fi | ||||
|         if [ ! -e "$INPUT_FILE" ] && [ ! "$INPUT_FILE" == "-" ] | ||||
|         then | ||||
| @ -1558,22 +1565,13 @@ get_all_items () { | ||||
|      | ||||
|         if [ ! "$INPUT_FILE" == "-" ] | ||||
|         then | ||||
|          | ||||
|             exec 10<"$INPUT_FILE" | ||||
| 
 | ||||
|             while read LINE <&10 | ||||
|             do | ||||
|                 ARRAY[$count]=$LINE | ||||
|                 ((count++)) | ||||
|             done | ||||
| 
 | ||||
|             exec 10>&- | ||||
|             cp "$INPUT_FILE" "$LOCAL_INPUT_FILE" | ||||
|             check_status "$?" "$FUNCNAME" "Copy of input file failed!" | ||||
|         else | ||||
|             log DEBUG "Reading from stdin.." | ||||
|             while read LINE | ||||
|             do | ||||
|                 ARRAY[$count]=$LINE | ||||
|                 ((count++)) | ||||
|                 echo "$LINE" >> "$LOCAL_INPUT_FILE" | ||||
|             done | ||||
|         fi | ||||
|     fi | ||||
| @ -1583,8 +1581,8 @@ get_all_items () { | ||||
|         release_input_lock | ||||
|     fi | ||||
| 
 | ||||
|     SIZE_OF_ARRAY="${#ARRAY[@]}" | ||||
|     if [ "$SIZE_OF_ARRAY" -le "0" ] | ||||
|     SIZE_OF_INPUT=$(wc -l "$LOCAL_INPUT_FILE" | awk '{ print $1 }') | ||||
|     if [ "$SIZE_OF_INPUT" -le "0" ] | ||||
|     then | ||||
|         log ERROR "Source file/dir seems to be empty." | ||||
|         set_status STOPPED | ||||
| @ -1604,12 +1602,12 @@ get_item () { | ||||
|      | ||||
|     get_global_lock | ||||
| 
 | ||||
|     SIZE_OF_ARRAY="${#ARRAY[@]}" | ||||
| 
 | ||||
|     SIZE_OF_INPUT=$(wc -l "$LOCAL_INPUT_FILE" | awk '{ print $1 }') | ||||
|     log DEBUG "sizeofinput $SIZE_OF_INPUT" | ||||
|     # | ||||
|     # Return error if the array is empty. | ||||
|     # | ||||
|     if [ "$SIZE_OF_ARRAY" -le "0" ] | ||||
|     if [ "$SIZE_OF_INPUT" -le "0" ]  | ||||
|     then | ||||
|         release_global_lock | ||||
|         return 1 | ||||
| @ -1618,28 +1616,31 @@ get_item () { | ||||
|     # | ||||
|     # This variable is used to walk thtough all array items. | ||||
|     # | ||||
|     ARRAY_POINTER=`cat $ARRAY_POINTER_FILE` | ||||
|     GLOBAL_COUNTER=$(cat $GLOBAL_COUNTER_FILE) | ||||
|     log DEBUG "globalcounter $GLOBAL_COUNTER"    | ||||
|      | ||||
|     # | ||||
|     # Check if all items have been processed. | ||||
|     # | ||||
|     if [ "$ARRAY_POINTER" -ge "$SIZE_OF_ARRAY" ] | ||||
|     if [ "$GLOBAL_COUNTER" -gt "$SIZE_OF_INPUT" ] | ||||
|     then | ||||
|         release_global_lock | ||||
|         return 1 | ||||
|     fi | ||||
| 
 | ||||
|     ITEM="${ARRAY[$ARRAY_POINTER]}"  | ||||
|     ITEM="$(sed -n $GLOBAL_COUNTER\p $LOCAL_INPUT_FILE)" | ||||
|     log DEBUG "item dus is $ITEM" | ||||
| 
 | ||||
|     if [ -z "$ITEM" ] | ||||
|     then | ||||
|         ((ARRAY_POINTER++)) | ||||
|         echo $ARRAY_POINTER > $ARRAY_POINTER_FILE | ||||
|         ((GLOBAL_COUNTER++)) | ||||
|         log DEBUG "Item was emtpy..." | ||||
|         echo $GLOBAL_COUNTER > $GLOBAL_COUNTER_FILE | ||||
|         release_global_lock | ||||
|         get_item | ||||
|     else | ||||
|         ((ARRAY_POINTER++)) | ||||
|         echo $ARRAY_POINTER > $ARRAY_POINTER_FILE | ||||
|         ((GLOBAL_COUNTER++)) | ||||
|         echo $GLOBAL_COUNTER > $GLOBAL_COUNTER_FILE | ||||
|         lock_item "$ITEM" | ||||
|         if [ ! "$?" == "0" ] | ||||
|         then | ||||
| @ -2039,15 +2040,15 @@ listen_for_job () { | ||||
|         fi | ||||
| 
 | ||||
|         get_global_lock | ||||
|         SIZE_OF_ARRAY="${#ARRAY[@]}" | ||||
|         ARRAY_POINTER=`cat $ARRAY_POINTER_FILE` | ||||
|         SIZE_OF_INPUT=$(wc -l "$LOCAL_INPUT_FILE" | awk '{ print $1 }') | ||||
|         GLOBAL_COUNTER=$(cat $GLOBAL_COUNTER_FILE) | ||||
|         release_global_lock | ||||
|         PERCENT=$((100 * $ARRAY_POINTER / $SIZE_OF_ARRAY )) | ||||
|         PERCENT=$((100 * $GLOBAL_COUNTER / $SIZE_OF_INPUT )) | ||||
|         if [ "$DIED" == "0" ] && [ "$FINISHED" == "0" ]  | ||||
|         then | ||||
|             if [ "$QUIET" == "0" ] | ||||
|             then | ||||
|                 log PRCNT "Currently $PERCENT percent complete. Processed $ARRAY_POINTER of $SIZE_OF_ARRAY items."  | ||||
|                 log PRCNT "Currently $PERCENT percent complete. Processed $GLOBAL_COUNTER of $SIZE_OF_INPUT items."  | ||||
|             elif [ "$DAEMON" == "0" ] | ||||
|             then | ||||
|                 echo -en "\r$PERCENT%" | ||||
|  | ||||
| @ -1,7 +1,7 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| DEBUG="$1" | ||||
| VERSION="2.65" | ||||
| VERSION="2.70" | ||||
| TMP_DIR="ppss" | ||||
| PPSS=./ppss | ||||
| PPSS_DIR=ppss_dir | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user