From b2a06e248387a21d2dfcdd802fb61f6b09a0c0c1 Mon Sep 17 00:00:00 2001 From: Carl Wilson Date: Thu, 16 Mar 2023 02:12:38 +0000 Subject: [PATCH] TIFF: Continue validation even if data is unaligned Copy of #750 submitted by @david-russo, many thanks. Couldn't sort the conflict from the web UI. Storing data unaligned to word or byte boundaries can decrease read performance but needn't halt file validation entirely as it doesn't prevent the file from being read. This will allow other potentially more serious issues to also be reported. This commit also makes the 'byteoffset' configuration option affect the reporting of unaligned IFDs the same way it does unaligned IFD Entry values (by treating them as valid when set to 'true'). --- .../hul/ois/jhove/module/TiffModule.java | 7 +- .../hul/ois/jhove/module/tiff/IFD.java | 375 +++++++++--------- 2 files changed, 189 insertions(+), 193 deletions(-) diff --git a/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java b/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java index 0fd14c34a..6f958697d 100644 --- a/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java +++ b/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/TiffModule.java @@ -1174,7 +1174,12 @@ protected List parseIFDs(long offset, RepInfo info, if ((next & 1) != 0) { String mess = MessageFormat.format(MessageConstants.TIFF_HUL_59.getMessage(), next); JhoveMessage message = JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_59.getId(), mess); - throw new TiffException(message); + if (_byteOffsetIsValid) { + info.setMessage(new InfoMessage(message)); + } else { + info.setMessage(new ErrorMessage(message)); + info.setWellFormed(false); + } } if (list.size() > 50) { throw new TiffException(MessageConstants.TIFF_HUL_60); diff --git a/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/tiff/IFD.java b/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/tiff/IFD.java index edfb84140..6f393644e 100644 --- a/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/tiff/IFD.java +++ b/jhove-modules/tiff-hul/src/main/java/edu/harvard/hul/ois/jhove/module/tiff/IFD.java @@ -29,8 +29,7 @@ /** * Encapsulation of a TIFF image file directory (IFD). */ -public abstract class IFD -{ +public abstract class IFD { /****************************************************************** * DEBUGGING FIELDS. @@ -55,7 +54,6 @@ public abstract class IFD /** Global parameters IFD. */ public static final int GLOBALPARAMETERS = 4; - /** Undefined value for integer tags. */ public static final int NULL = -1; @@ -136,15 +134,16 @@ public abstract class IFD * CLASS CONSTRUCTOR. ******************************************************************/ - /** Instantiate an IFD object. + /** + * Instantiate an IFD object. + * * @param offset IFD offset * @param info Representation information * @param raf TIFF file * @param bigEndian True if big-endian file */ public IFD(long offset, RepInfo info, RandomAccessFile raf, - boolean bigEndian) - { + boolean bigEndian) { _offset = offset; _info = info; _raf = raf; @@ -166,31 +165,36 @@ public IFD(long offset, RepInfo info, RandomAccessFile raf, * PUBLIC INSTANCE METHODS. ******************************************************************/ - /** Get any errors discovered during parsing. + /** + * Get any errors discovered during parsing. + * * @return list of strings with errors */ - public List getErrors() - { + public List getErrors() { return _errors; } - /** Get the offset of the next IFD. + /** + * Get the offset of the next IFD. + * * @return next */ - public long getNext() - { + public long getNext() { return _next; } - /** Get the IFD offset. + /** + * Get the IFD offset. + * * @return IFD offset */ - public long getOffset() - { + public long getOffset() { return _offset; } - /** Get the IFD properties. + /** + * Get the IFD properties. + * * @param rawOutput: boolean * @return Property * @throws edu.harvard.hul.ois.jhove.module.tiff.TiffException @@ -198,32 +202,36 @@ public long getOffset() public abstract Property getProperty(boolean rawOutput) throws TiffException; - /** Get the TIFF version. + /** + * Get the TIFF version. + * * @return TIFF version */ - public int getVersion() - { + public int getVersion() { return _version; } - /** Return true if this is the first IFD. + /** + * Return true if this is the first IFD. + * * @return if it's first? */ - public boolean isFirst() - { + public boolean isFirst() { return _first; } - /** Return true if this is the thumbnail IFD. + /** + * Return true if this is the thumbnail IFD. + * * @return if it's a thumbnail */ - public boolean isThumbnail() - { + public boolean isThumbnail() { return _thumbnail; } - - /** Lookup IFD tag. + /** + * Lookup IFD tag. + * * @param tag * @param type * @param count @@ -233,54 +241,53 @@ public boolean isThumbnail() public abstract void lookupTag(int tag, int type, long count, long value) throws TiffException; - /** Parse the IFD.Errors are not suppressed, and odd byte offsets for + /** + * Parse the IFD.Errors are not suppressed, and odd byte offsets for * tags not allowed. * * @return The offset of the next IFD * @throws edu.harvard.hul.ois.jhove.module.tiff.TiffException */ public long parse() - throws TiffException - { + throws TiffException { return parse(false, false); } - /** Parse the IFD. + /** + * Parse the IFD. + * * @param byteOffsetIsValid If true, allow offsets on odd byte boundaries * @param suppressErrors If true, return IFD even with errors * @return The offset of the next IFD * @throws edu.harvard.hul.ois.jhove.module.tiff.TiffException */ public long parse(boolean byteOffsetIsValid, boolean suppressErrors) - throws TiffException - { + throws TiffException { try { return parse(byteOffsetIsValid); - } - catch (TiffException e) { + } catch (TiffException e) { // If we got a TiffException and we're suppressing errors, // cover over the exception and issue an info message; // but we can't follow the IFD chain further. if (suppressErrors) { - _info.setMessage - (new InfoMessage(e.getJhoveMessage(), e.getOffset())); + _info.setMessage(new InfoMessage(e.getJhoveMessage(), e.getOffset())); return 0; } throw e; } } - - /** Parse the IFD.Errors are not suppressed. + /** + * Parse the IFD.Errors are not suppressed. * * @param byteOffsetIsValid If true, allow offsets on odd byte boundaries * @return The offset of the next IFD * @throws edu.harvard.hul.ois.jhove.module.tiff.TiffException */ public long parse(boolean byteOffsetIsValid) - throws TiffException - { - /* Start at the IFD offset, read the number of entries, then + throws TiffException { + /* + * Start at the IFD offset, read the number of entries, then * read the entire IFD. */ long offset = _offset; @@ -299,13 +306,11 @@ public long parse(boolean byteOffsetIsValid) /* Read the offset of the next IFD (or 0 if none). */ offset += len; _next = ModuleBase.readUnsignedInt(_raf, _bigEndian); - } - catch (Exception e) { + } catch (Exception e) { throw new TiffException(MessageConstants.TIFF_HUL_1, offset); } - DataInputStream ifdStream = - new DataInputStream(new ByteArrayInputStream(buffer)); + DataInputStream ifdStream = new DataInputStream(new ByteArrayInputStream(buffer)); try { int prevTag = 0; @@ -325,11 +330,11 @@ public long parse(boolean byteOffsetIsValid) _bigEndian, null); /* Skip over tags with unknown type. */ if (type < BYTE || type > IFD) { - String subMess = MessageFormat.format(MessageConstants.TIFF_HUL_3_SUB.getMessage(), type, Integer.valueOf(tag)); + String subMess = MessageFormat.format(MessageConstants.TIFF_HUL_3_SUB.getMessage(), type, + Integer.valueOf(tag)); _info.setMessage(new ErrorMessage(MessageConstants.TIFF_HUL_3, subMess, _offset + 4 + 12*i)); - } - else { + } else { /* Type gives indication of the TIFF version. */ if (SBYTE <= type && type <= IFD) { _version = 6; @@ -340,29 +345,32 @@ public long parse(boolean byteOffsetIsValid) long value = ModuleBase.readUnsignedInt(ifdStream, _bigEndian, null); if (calcValueSize(type, count) > 4) { - /* Value is the word-aligned offset of the actual - * value. */ + /* + * Value is the word-aligned offset of the actual + * value. + */ if ((value & 1) != 0) { final String mess = MessageFormat.format(MessageConstants.TIFF_HUL_4.getMessage(), value); - JhoveMessage message = JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_4.getId(), mess); + JhoveMessage message = JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_4.getId(), + mess); if (byteOffsetIsValid) { _info.setMessage(new InfoMessage(message, _offset + 10 + 12*i)); - } - else { - throw new TiffException(message,_offset + 10 + 12*i); - } + } else { + _info.setMessage(new ErrorMessage(message, _offset + 10 + 12 * i)); + _info.setWellFormed(false); } } - else { - /* Value is the actual value; pass the offset of - * the value. */ + } else { + /* + * Value is the actual value; pass the offset of + * the value. + */ value = _offset + 10 + 12*i; } lookupTag(tag, type, count, value); } } - } - catch (IOException e) { + } catch (IOException e) { throw new TiffException(MessageConstants.TIFF_HUL_5, _offset + 2); } postParseInitialization(); @@ -370,21 +378,23 @@ public long parse(boolean byteOffsetIsValid) return _next; } - /** Sets flag indicating whether this is the first IFD. + /** + * Sets flag indicating whether this is the first IFD. + * * @param first: true if it's the first IFD */ - public void setFirst(boolean first) - { + public void setFirst(boolean first) { _first = first; } - /** Sets flag indicating whether this is the "thumbnail" IFD. + /** + * Sets flag indicating whether this is the "thumbnail" IFD. * The second IFD in the top-level chain is assumed to be * the Thumbnail IFD. + * * @param thumbnail: flag true if this is the thumbnail IFD */ - public void setThumbnail(boolean thumbnail) - { + public void setThumbnail(boolean thumbnail) { _thumbnail = thumbnail; } @@ -408,8 +418,7 @@ public void setThumbnail(boolean thumbnail) * */ protected Property addBitmaskProperty(String name, long value, - String [] labels, boolean rawOutput) - { + String[] labels, boolean rawOutput) { Property prop = null; if (!rawOutput) { List list = new LinkedList<>(); @@ -419,10 +428,11 @@ protected Property addBitmaskProperty(String name, long value, list.add(labels[i]); } } - } - catch (Exception e) { - final String mess = MessageFormat.format(MessageConstants.TIFF_HUL_66.getMessage(), name, Long.valueOf(value)); - _errors.add(new ErrorMessage(JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_66.getId(), mess))); + } catch (Exception e) { + final String mess = MessageFormat.format(MessageConstants.TIFF_HUL_66.getMessage(), name, + Long.valueOf(value)); + _errors.add( + new ErrorMessage(JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_66.getId(), mess))); } prop = new Property(name, PropertyType.STRING, PropertyArity.LIST, list); @@ -451,16 +461,16 @@ protected Property addBitmaskProperty(String name, long value, * @return property representing an integer value */ protected Property addIntegerProperty(String name, int value, - String [] labels, boolean rawOutput) - { + String[] labels, boolean rawOutput) { Property prop = null; if (!rawOutput) { try { prop = new Property(name, PropertyType.STRING, labels[value]); - } - catch (ArrayIndexOutOfBoundsException aioobe) { - final String mess = MessageFormat.format(MessageConstants.TIFF_HUL_66.getMessage(), name, Long.valueOf(value)); - _errors.add(new ErrorMessage(JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_66.getId(), mess))); + } catch (ArrayIndexOutOfBoundsException aioobe) { + final String mess = MessageFormat.format(MessageConstants.TIFF_HUL_66.getMessage(), name, + Long.valueOf(value)); + _errors.add( + new ErrorMessage(JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_66.getId(), mess))); } } if (prop == null) { @@ -490,8 +500,7 @@ protected Property addIntegerProperty(String name, int value, */ protected Property addIntegerProperty(String name, int value, String [] labels, int [] index, - boolean rawOutput) - { + boolean rawOutput) { Property prop = null; if (!rawOutput) { int n = -1; @@ -503,10 +512,11 @@ protected Property addIntegerProperty(String name, int value, } if (n > -1) { prop = new Property(name, PropertyType.STRING, labels[n]); - } - else { - final String mess = MessageFormat.format(MessageConstants.TIFF_HUL_66.getMessage(), name, Long.valueOf(value)); - _errors.add(new ErrorMessage(JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_66.getId(), mess))); + } else { + final String mess = MessageFormat.format(MessageConstants.TIFF_HUL_66.getMessage(), name, + Long.valueOf(value)); + _errors.add( + new ErrorMessage(JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_66.getId(), mess))); } } if (prop == null) { @@ -534,18 +544,18 @@ protected Property addIntegerProperty(String name, int value, */ protected Property addIntegerArrayProperty(String name, int [] value, String [] labels, - boolean rawOutput) - { + boolean rawOutput) { Property prop = null; if (!rawOutput) { String [] s = new String[value.length]; for (int i=0; i 127) { sb.append(byteToHex(c)); - } - else { + } else { sb.append((char) c); } } return sb.toString(); } - /** Reads an array of strings from the TIFF file. + /** + * Reads an array of strings from the TIFF file. * * @param count Number of strings to read * @param value Offset from which to read @@ -675,8 +683,7 @@ protected String readASCII(long count, long value) * @throws IOException */ protected String [] readASCIIArray(long count, long value) - throws IOException - { + throws IOException { _raf.seek(value); int nstrs = 0; @@ -689,20 +696,20 @@ protected String readASCII(long count, long value) if (b == 0) { list.add(strbuf.toString()); strbuf.setLength(0); - } - else { + } else { // Escape characters that aren't ASCII. There really shouldn't // be any, and if there are, we don't know how they're encoded. if (b < 32 || b > 127) { strbuf.append(byteToHex((byte) b)); - } - else { + } else { strbuf.append((char) b); } } } - /* We can't use ArrayList.toArray because that returns an - Object[], not a String[] ... sigh. */ + /* + * We can't use ArrayList.toArray because that returns an + * Object[], not a String[] ... sigh. + */ String [] strs = new String[nstrs]; ListIterator iter = list.listIterator(); for (int i=0; itrue if file is big-endian, * false if little-endian. */ - public boolean isBigEndian() - { + public boolean isBigEndian() { return _bigEndian; } - - /****************************************************************** * PRIVATE CLASS METHODS. ******************************************************************/ /** * Check the tag entry count. + * * @param tag Tag entry value * @param count Tag entry count * @param minCount Tag count * @throws edu.harvard.hul.ois.jhove.module.tiff.TiffException */ protected static void checkCount(int tag, long count, int minCount) - throws TiffException - { + throws TiffException { if (count < minCount) { String mess = MessageFormat.format(MessageConstants.TIFF_HUL_6.getMessage(), tag, Integer.valueOf(minCount), Long.valueOf(count)); @@ -1101,14 +1091,15 @@ protected static void checkCount(int tag, long count, int minCount) /** * Check that the count is compatible with array instanciation. + * * @param tag Tag entry value * @param count Tag entry count */ protected static void checkCountArray(int tag, long count) - throws TiffException - { + throws TiffException { if (count > Integer.MAX_VALUE) { - String mess = MessageFormat.format(MessageConstants.TIFF_HUL_6.getMessage(), Integer.valueOf(tag), Integer.valueOf(Integer.MAX_VALUE), Long.valueOf(count)); + String mess = MessageFormat.format(MessageConstants.TIFF_HUL_6.getMessage(), Integer.valueOf(tag), + Integer.valueOf(Integer.MAX_VALUE), Long.valueOf(count)); JhoveMessage message = JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_6.getId(), mess); throw new TiffException(message); } @@ -1116,23 +1107,26 @@ protected static void checkCountArray(int tag, long count) /** * Check the tag entry type. + * * @param tag Tag entry value * @param type Tag entry type * @param expected Tag * @throws edu.harvard.hul.ois.jhove.module.tiff.TiffException */ protected static void checkType(int tag, int type, int expected) - throws TiffException - { - /* Readers are supposed to accept BYTE, SHORT or LONG for any - * unsigned integer field. */ + throws TiffException { + /* + * Readers are supposed to accept BYTE, SHORT or LONG for any + * unsigned integer field. + */ if ((type == BYTE || type == SHORT || type == LONG || type == IFD) && (expected == BYTE || expected == SHORT || expected == LONG || expected == IFD)) { return; // it's OK } if (type != expected) { - String mess = MessageFormat.format(MessageConstants.TIFF_HUL_7.getMessage(), tag, Integer.valueOf(expected), Integer.valueOf(type)); + String mess = MessageFormat.format(MessageConstants.TIFF_HUL_7.getMessage(), tag, Integer.valueOf(expected), + Integer.valueOf(type)); JhoveMessage message = JhoveMessages.getMessageInstance(MessageConstants.TIFF_HUL_7.getId(), mess); throw new TiffException(message); } @@ -1140,6 +1134,7 @@ protected static void checkType(int tag, int type, int expected) /** * Check the tag entry type. + * * @param tag Tag entry value * @param type Tag entry type * @param type1 Tag type @@ -1147,8 +1142,7 @@ protected static void checkType(int tag, int type, int expected) * @throws edu.harvard.hul.ois.jhove.module.tiff.TiffException */ protected static void checkType(int tag, int type, int type1, int type2) - throws TiffException - { + throws TiffException { if (type != type1 && type != type2) { String mess = MessageFormat.format(MessageConstants.TIFF_HUL_8.getMessage(), tag, Integer.valueOf(type1), Integer.valueOf(type2), Integer.valueOf(type)); @@ -1157,8 +1151,7 @@ protected static void checkType(int tag, int type, int type1, int type2) } } - protected static Rational average(Rational r1, Rational r2) - { + protected static Rational average(Rational r1, Rational r2) { long d1 = r1.getDenominator(); long d2 = r2.getDenominator(); @@ -1171,7 +1164,6 @@ protected static Rational average(Rational r1, Rational r2) f1.getDenominator()); } - /****************************************************************** * PRIVATE INSTANCE METHODS. ******************************************************************/ @@ -1186,8 +1178,7 @@ private String byteToHex(byte c) { int b = nibbles[i]; if (b >= 10) { b += (int) 'A' - 10; - } - else { + } else { b += (int) '0'; } retval.append((char) b);