Merge pull request #740 from thomasjacquin/removeBadImages-changes-fixes

removeBadImage.sh: misc changes/fixes
AllskyTeam · Nov 5, 2021 · 98212d4 · 98212d4
2 parents ad701d7 + 8ecff7e
commit 98212d4
Showing 1 changed file with 163 additions and 52 deletions.
diff --git a/scripts/removeBadImages.sh b/scripts/removeBadImages.sh
@@ -1,80 +1,191 @@
 #!/bin/bash
 
-REMOVE_BAD_IMAGES_THRESHOLD_LOW=${REMOVE_BAD_IMAGES_THRESHOLD_LOW:-0}	# in case not in config.sh file
-REMOVE_BAD_IMAGES_THRESHOLD_HIGH=${REMOVE_BAD_IMAGES_THRESHOLD_HIGH:-0}	# in case not in config.sh file
+ME="$(basename "${BASH_ARGV0}")"
 
-ME="$(basename "$BASH_ARGV0")"	# Include script name in output so it's easier to find in the log file
+source "${ALLSKY_HOME}/variables.sh"
+source "${ALLSKY_CONFIG}/config.sh"
+source "${ALLSKY_SCRIPTS}/filename.sh"
 
-if [ $# -ne 1 -o 'x$1' == 'x-h' ] ; then
-	echo "Remove images with corrupt data which might mess up startrails and keograms"
-	echo "usage: $ME <directory>"
-	exit 1
+usage()
+{
+	retcode="${1}"
+	echo
+	echo "Remove images with corrupt data which might mess up startrails and keograms."
+	[ "${retcode}" -ne 0 ] && echo -en "${RED}"
+	echo -n "Usage: ${ME} [--help] [--debug]  directory  [file]"
+	[ "${retcode}" -ne 0 ] && echo -e "${NC}"
+	echo
+	echo "You must enter the arguments in the above order."
+# TODO: use getopts to allow any order
+	echo "Turning on debug will indicate bad images but will not remove them."
+	echo "If 'file' is specified, only that file in 'directory' will be checked,"
+	echo "otherwise all files in 'directory' will be checked."
+	exit ${retcode}
+}
+[ "${1}" = "-h" -o "${1}" = "--help" ] && usage 0
+if [ "${1}" = "-d" -o "${1}" = "--debug" ]; then
+	DEBUG="true"
+	r="would be removed"
+	shift
+else
+	DEBUG="false"
+	r="removed"
+fi
+
+[ $# -eq 0 -o $# -gt 2 ] && usage 1
+
+DATE="${1}"
+FILE="${2}"
+
+# If we're running in debug mode don't display ${ME} since it makes the output harder to read.
+if [ ${DEBUG} = "true" -o "${ON_TTY}" = "1" ]; then
+	ME=""
+else
+	ME="${ME}:"
+fi
+if [ ! -d "${DATE}" ]; then
+	echo -e "${RED}${ME} '${DATE}' is not a directory${NC}"
+	exit 2
 fi
 
-if [ \! -d "$1" ] ; then
-	echo "$ME: $1 is not a directory"
-	exit 1
+if [ "${FILE}" != "" -a ! -f "${DATE}/${FILE}" ]; then
+	echo -e "${RED}${ME} '${FILE}' not found in '${DATE}'${NC}"
+	exit 2
 fi
-DIR=$1
-
-# Super simple: find the full size image-*jpg and image-*png files (not the
-# thumbnails) and ask imagemagick to compute a histogram (which is discarded)
-# in order to capture the diagnostics from libjpeg. Will have to benchmark to
-# be sure, but I suspect it's faster to produce histogram output than another
-# image format which would be discarded anyway. If an input image does produce
-# a warning message grep will match it and it will be deleted.
-#
-# This leaves us just images that decompress properly and won't introduce junk
-# into the processing pipeline.
-#
-# Why on G-d's green earth would I do something like this? Because for whatever
-# reason, my raspberry pi produces corrupt captures occasionally and this tool
-# means I get good startrails and keograms in the morning.
-#
+
+DARK_MODE=$(jq -r '.darkframe' "${CAMERA_SETTINGS}")
+if [ "${DARK_MODE}" = "1" ]; then
+	# Disable low brightness check since darks will have extremely low brightness.
+	# But continue with the other checks in case the dark file is corrupted.
+	REMOVE_BAD_IMAGES_THRESHOLD_LOW=0
+fi
+
+# Find the full size image-*jpg and image-*png files (not the thumbnails) and
+# have "convert" compute a histogram (which is discarded),
+# in order to capture any error messages.
+# If an image DOES produce an error message grep will match it and the file will be deleted.
+
+# Doing this allows good startrails and keograms to be produced on machines that
+# sometimes produce corrupt or zero-length files.
+
 # If GNU Parallel is installed (it's not by default), then blast through and
 # clean all the images as fast as possible without regard for CPU utilization.
 
 # Use IMAGE_FILES and ERROR_WORDS to avoid duplicating them.
 # Remove 0-length files ("insufficient image data") and files too dim or bright.
-# $DIR may end in a "/" so there will be "//" in the filenames, but there's no harm in that.
-cd $DIR
-IMAGE_FILES="$( find . -type f \( -iname image-\*.jpg -o -iname image-\*.png \) \! -ipath \*thumbnail\* )"
-ERROR_WORDS="Huffman|Bogus|Corrupt|Invalid|Trunc|Missing|insufficient image data|no decode delegate"
+# ${DATE} may end in a "/" so there will be "//" in the filenames, but there's no harm in that.
+
+cd "${DATE}"
+if [ "${FILE}" != "" ]; then
+	IMAGE_FILES="${FILE}"
+else
+	IMAGE_FILES="$( find . -type f -iname "${FILENAME}"-\*.${EXTENSION} \! -ipath \*thumbnail\* )"
+fi
+ERROR_WORDS="Huffman|Bogus|Corrupt|Invalid|Trunc|Missing|insufficient image data|no decode delegate|no images defined"
 
-TMP=badError.txt
+TMP="${ALLSKY_TMP}/badError.txt"
 
+# Save all output to a temp file so don't potentially swamp the system log file.
+OUTPUT="${ALLSKY_TMP}/removeBadImages.log"
+> ${OUTPUT}
+
+typeset -i num_bad=0
 if which parallel > /dev/null ; then
-	echo $IMAGE_FILES | \
-		parallel -- "convert {} histogram:/dev/null 2>&1 | egrep -q "$ERROR_WORDS" && rm -vf {}"
+	if [ ${DEBUG} = "true" ]; then
+		rm=""
+	else
+		rm="&& rm -vf {}"
+	fi
+	echo ${IMAGE_FILES} | \
+		parallel -- "convert {} histogram:/dev/null 2>&1 | egrep -q ${ERROR_WORDS} ${rm}"
 	# xxxxxxxxxx need to add THRESHOLD checking here and remove bad thumbnails...
 	# xxxxxxxxxx Can we replace "rm -vf" above with "echo" and redirect output to the tmp file,
-	# xxxxxxxxxx then do a "for f in $(cat $TMP); do" and remove the files that way?
+	# xxxxxxxxxx then do a "for f in $(< ${TMP}); do" and remove the files that way?
+
 else
-	typeset -i num_bad=0
+	# If the low threshold is 0 it's disabled.
+	# If the high one is 0 or 100 (nothing can be brighter than 100) it's disabled.
+	if [ ${REMOVE_BAD_IMAGES_THRESHOLD_HIGH} -gt 100 -o ${REMOVE_BAD_IMAGES_THRESHOLD_HIGH} -eq 0 ]; then
+		HIGH=0
+	else
+		HIGH=${REMOVE_BAD_IMAGES_THRESHOLD_HIGH}
+	fi
+	LOW=${REMOVE_BAD_IMAGES_THRESHOLD_LOW}
+
+	# If we're processing a whole directory assume it's done in the background so "nice" it.
+	# If we're only processing one file we want it done quickly.
+	if [ "${FILE}" = "" ]; then
+			NICE="nice"
+	else
+			NICE=""
+	fi
+
 	for f in ${IMAGE_FILES} ; do
-		MEAN=$(nice convert "$f" -colorspace Gray -format "%[fx:image.mean]" info: 2> $TMP)
 		BAD=""
-		egrep -q "$ERROR_WORDS" $TMP
-		RET=$?
-		if [ $RET -eq 0 ] ; then
-			rm -f "$f" "thumbnails/$f"
-			BAD="'$f' (corrupt file: $(cat $TMP))"
-			let num_bad=num_bad+1
+
+		if [ ! -s "${f}" ]; then
+			BAD="'${f}' (zero length)"
 		else
-			# Multiply MEAN by 100 to convert to integer (0-100 %) since bash doesn't work with floats.
-			MEAN=$(echo "$MEAN" | awk '{ printf("%d", $1 * 100); }')
-			if [ $MEAN -lt $REMOVE_BAD_IMAGES_THRESHOLD_LOW -o $MEAN -gt $REMOVE_BAD_IMAGES_THRESHOLD_HIGH ]; then
-				rm -f "$f" "thumbnails/$f"
-				BAD="'$f' (bad threshold: MEAN=$MEAN)"
-				let num_bad=num_bad+1
+			# MEAN is a number between 0.0 and 1.0.
+			MEAN=$(${NICE} convert "${f}" -colorspace Gray -format "%[fx:image.mean]" info: 2> "${TMP}")
+			egrep -q "${ERROR_WORDS}" "${TMP}"
+
+			if [ $? -eq 0 ]; then	# at least one error word was found in the output
+				BAD="'${f}' (corrupt file: $(< "${TMP}"))"
+
+			else
+				# Multiply MEAN by 100 to convert to integer (0-100 %) since
+				# bash doesn't work with floats.
+				MEAN=$(echo "${MEAN} * 100" | bc)
+				MSG=""
+
+				if [ ${HIGH} -ne 0 ]; then
+					x=$(echo "${MEAN} > ${HIGH}" | bc)
+					if [ ${x} -eq 1 ]; then
+						BAD="'${f}' (above threshold: MEAN=${MEAN}, threshold = ${HIGH})"
+					elif [ ${DEBUG} = "true" ]; then
+						MSG="===== OK: ${f}, MEAN=${MEAN}, HIGH=${HIGH}, LOW=${LOW}"
+					fi
+				fi
+
+				# An image can't be both HIGH and LOW so if it was HIGH don't check for LOW.
+				if [ "${BAD}" = "" -a ${LOW} -ne 0 ]; then
+					x=$(echo "${MEAN} < ${LOW}" | bc)
+					if [ ${x} -eq 1 ]; then
+						BAD="'${f}' (below threshold: MEAN=${MEAN}, threshold = ${LOW})"
+					elif [ ${DEBUG} = "true" -a "${MSG}" = "" ]; then
+						MSG="===== OK: ${f}, MEAN=${MEAN}, HIGH=${HIGH}, LOW=${LOW}"
+					fi
+				fi
+
+				if [ ${DEBUG} = "true" -a "${BAD}" = "" -a "${MSG}" != "" ]; then
+					echo "${MSG}"
+				fi
 			fi
+
+		fi
+
+		if [ "${BAD}" != "" ]; then
+			echo "${r} ${BAD}" >> "${OUTPUT}"
+			[ ${DEBUG} = "false" ] && rm -f "${f}" "thumbnails/${f}"
+			let num_bad=num_bad+1
 		fi
-		[ "$BAD" != "" ] && echo "$ME: Removed $BAD"
 	done
+
 	if [ $num_bad -eq 0 ]; then
-		echo "$ME: No bad files found."
+		# If only one file, "no news is good news".
+		[ "${FILE}" = "" ] && echo -e "\n${ME} ${GREEN}No bad files found.${NC}"
+		rm -f "${OUTPUT}"
 	else
-		echo "$ME: $num_bad bad file(s) found and removed."
+		if [ "${FILE}" = "" ]; then
+			echo "${ME} ${num_bad} bad file(s) found and ${r}. See ${OUTPUT}."
+			# Do NOT remove ${OUTPUT} in case the user wants to look at it.
+		else	# only 1 file so show it
+			echo "${ME} File is bad: $(< "${OUTPUT}")"
+			rm -f "${OUTPUT}"
+		fi
 	fi
 fi
-rm -f $TMP
+rm -f "${TMP}"
+
+exit $num_bad