From e529026aaf11e181a8a61957b892ce6ac98161be Mon Sep 17 00:00:00 2001 From: Desmond Sisson Date: Tue, 30 Nov 2021 14:41:32 -0800 Subject: [PATCH 1/5] Lowercase the filename comparison in getCodec() --- .../hadoop/io/compress/CompressionCodecFactory.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java index 1fa7fd4b52be5..fb812829e82ae 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java @@ -29,6 +29,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.util.StringUtils.toLowerCase; + /** * A factory that will find the correct codec for a given filename. */ @@ -67,10 +69,10 @@ private void addCodec(CompressionCodec codec) { codecsByClassName.put(codec.getClass().getCanonicalName(), codec); String codecName = codec.getClass().getSimpleName(); - codecsByName.put(StringUtils.toLowerCase(codecName), codec); + codecsByName.put(toLowerCase(codecName), codec); if (codecName.endsWith("Codec")) { codecName = codecName.substring(0, codecName.length() - "Codec".length()); - codecsByName.put(StringUtils.toLowerCase(codecName), codec); + codecsByName.put(toLowerCase(codecName), codec); } } @@ -199,7 +201,7 @@ public CompressionCodec getCodec(Path file) { if (codecs != null) { String filename = file.getName(); String reversedFilename = - new StringBuilder(filename).reverse().toString(); + toLowerCase(new StringBuilder(filename).reverse().toString()); SortedMap subMap = codecs.headMap(reversedFilename); if (!subMap.isEmpty()) { @@ -247,7 +249,7 @@ public CompressionCodec getCodecByName(String codecName) { if (codec == null) { // trying to get the codec by name in case the name was specified // instead a class - codec = codecsByName.get(StringUtils.toLowerCase(codecName)); + codec = codecsByName.get(toLowerCase(codecName)); } return codec; } From e3e496732b6349b73e0f7ae7b508eb99c0e9c84d Mon Sep 17 00:00:00 2001 From: Desmond Sisson Date: Tue, 30 Nov 2021 15:39:08 -0800 Subject: [PATCH 2/5] New tests against capitalized filenames, added rich error messages over NPE --- .../hadoop/io/compress/TestCodecFactory.java | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java index edab634a0b877..20af8babb1799 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java @@ -29,6 +29,7 @@ import org.junit.Test; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.fail; public class TestCodecFactory { @@ -137,9 +138,15 @@ private static CompressionCodecFactory setClasses(Class[] classes) { private static void checkCodec(String msg, Class expected, CompressionCodec actual) { - assertEquals(msg + " unexpected codec found", - expected.getName(), - actual.getClass().getName()); + if (expected == null) { + assertNull(msg + " expected null", actual); + } else if (actual == null) { + fail(msg + " result was null"); + } else { + assertEquals(msg + " unexpected codec found", + expected.getName(), + actual.getClass().getName()); + } } @Test @@ -153,6 +160,8 @@ public void testFinding() { codec = factory.getCodec(new Path("/tmp/foo.gz")); checkCodec("default factory for .gz", GzipCodec.class, codec); + codec = factory.getCodec(new Path("/tmp/foo.GZ")); + checkCodec("default factory for .GZ", GzipCodec.class, codec); codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName()); checkCodec("default factory for gzip codec", GzipCodec.class, codec); codec = factory.getCodecByName("gzip"); @@ -168,6 +177,8 @@ public void testFinding() { codec = factory.getCodec(new Path("/tmp/foo.bz2")); checkCodec("default factory for .bz2", BZip2Codec.class, codec); + codec = factory.getCodec(new Path("/tmp/foo.BZ2")); + checkCodec("default factory for .BZ2", BZip2Codec.class, codec); codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName()); checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec); codec = factory.getCodecByName("bzip2"); @@ -221,16 +232,22 @@ public void testFinding() { FooBarCodec.class}); codec = factory.getCodec(new Path("/tmp/.foo.bar.gz")); checkCodec("full factory gz codec", GzipCodec.class, codec); + codec = factory.getCodec(new Path("/tmp/.foo.bar.GZ")); + checkCodec("full factory GZ codec", GzipCodec.class, codec); codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName()); checkCodec("full codec gz codec", GzipCodec.class, codec); codec = factory.getCodec(new Path("/tmp/foo.bz2")); checkCodec("full factory for .bz2", BZip2Codec.class, codec); + codec = factory.getCodec(new Path("/tmp/foo.BZ2")); + checkCodec("full factory for .BZ2", BZip2Codec.class, codec); codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName()); checkCodec("full codec bzip2 codec", BZip2Codec.class, codec); codec = factory.getCodec(new Path("/tmp/foo.bar")); checkCodec("full factory bar codec", BarCodec.class, codec); + codec = factory.getCodec(new Path("/tmp/foo.BAR")); + checkCodec("full factory BAR codec", BarCodec.class, codec); codec = factory.getCodecByClassName(BarCodec.class.getCanonicalName()); checkCodec("full factory bar codec", BarCodec.class, codec); codec = factory.getCodecByName("bar"); @@ -240,6 +257,8 @@ public void testFinding() { codec = factory.getCodec(new Path("/tmp/foo/baz.foo.bar")); checkCodec("full factory foo bar codec", FooBarCodec.class, codec); + codec = factory.getCodec(new Path("/tmp/foo/baz.FOO.bar")); + checkCodec("full factory FOO bar codec", FooBarCodec.class, codec); codec = factory.getCodecByClassName(FooBarCodec.class.getCanonicalName()); checkCodec("full factory foo bar codec", FooBarCodec.class, codec); codec = factory.getCodecByName("foobar"); @@ -249,6 +268,8 @@ public void testFinding() { codec = factory.getCodec(new Path("/tmp/foo.foo")); checkCodec("full factory foo codec", FooCodec.class, codec); + codec = factory.getCodec(new Path("/tmp/FOO.FOO")); + checkCodec("full factory FOO codec", FooCodec.class, codec); codec = factory.getCodecByClassName(FooCodec.class.getCanonicalName()); checkCodec("full factory foo codec", FooCodec.class, codec); codec = factory.getCodecByName("foo"); @@ -259,6 +280,8 @@ public void testFinding() { factory = setClasses(new Class[]{NewGzipCodec.class}); codec = factory.getCodec(new Path("/tmp/foo.gz")); checkCodec("overridden factory for .gz", NewGzipCodec.class, codec); + codec = factory.getCodec(new Path("/tmp/foo.GZ")); + checkCodec("overridden factory for .GZ", NewGzipCodec.class, codec); codec = factory.getCodecByClassName(NewGzipCodec.class.getCanonicalName()); checkCodec("overridden factory for gzip codec", NewGzipCodec.class, codec); From 7e2c848bd3fffd4af372141db9cfc10544a3ba16 Mon Sep 17 00:00:00 2001 From: Desmond Sisson Date: Tue, 30 Nov 2021 16:00:07 -0800 Subject: [PATCH 3/5] Fix error message for expected null --- .../java/org/apache/hadoop/io/compress/TestCodecFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java index 20af8babb1799..42df50d70edeb 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java @@ -139,7 +139,7 @@ private static CompressionCodecFactory setClasses(Class[] classes) { private static void checkCodec(String msg, Class expected, CompressionCodec actual) { if (expected == null) { - assertNull(msg + " expected null", actual); + assertNull(msg, actual); } else if (actual == null) { fail(msg + " result was null"); } else { From 917f0b3ae113249e011da56b8cd94f2dc514c4f1 Mon Sep 17 00:00:00 2001 From: Desmond Sisson Date: Tue, 30 Nov 2021 21:42:17 -0800 Subject: [PATCH 4/5] Remove StringUtils import and separate lowercase filename --- .../io/compress/CompressionCodecFactory.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java index fb812829e82ae..6dbf0e6721641 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java @@ -29,8 +29,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.hadoop.util.StringUtils.toLowerCase; - /** * A factory that will find the correct codec for a given filename. */ @@ -69,10 +67,10 @@ private void addCodec(CompressionCodec codec) { codecsByClassName.put(codec.getClass().getCanonicalName(), codec); String codecName = codec.getClass().getSimpleName(); - codecsByName.put(toLowerCase(codecName), codec); + codecsByName.put(StringUtils.toLowerCase(codecName), codec); if (codecName.endsWith("Codec")) { codecName = codecName.substring(0, codecName.length() - "Codec".length()); - codecsByName.put(toLowerCase(codecName), codec); + codecsByName.put(StringUtils.toLowerCase(codecName), codec); } } @@ -201,12 +199,14 @@ public CompressionCodec getCodec(Path file) { if (codecs != null) { String filename = file.getName(); String reversedFilename = - toLowerCase(new StringBuilder(filename).reverse().toString()); + new StringBuilder(filename).reverse().toString(); + String lowerReversedFilename = + StringUtils.toLowerCase(reversedFilename); SortedMap subMap = - codecs.headMap(reversedFilename); + codecs.headMap(lowerReversedFilename); if (!subMap.isEmpty()) { String potentialSuffix = subMap.lastKey(); - if (reversedFilename.startsWith(potentialSuffix)) { + if (lowerReversedFilename.startsWith(potentialSuffix)) { result = codecs.get(potentialSuffix); } } @@ -249,7 +249,7 @@ public CompressionCodec getCodecByName(String codecName) { if (codec == null) { // trying to get the codec by name in case the name was specified // instead a class - codec = codecsByName.get(toLowerCase(codecName)); + codec = codecsByName.get(StringUtils.toLowerCase(codecName)); } return codec; } From 225c7dfbc3c5dcc51889b1b8c7a91bff9fc30aa0 Mon Sep 17 00:00:00 2001 From: Desmond Sisson Date: Wed, 1 Dec 2021 12:07:39 -0800 Subject: [PATCH 5/5] Fix all checkstyle issues in changed files. --- .../io/compress/CompressionCodecFactory.java | 136 +++++++++--------- .../hadoop/io/compress/TestCodecFactory.java | 20 +-- 2 files changed, 81 insertions(+), 75 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java index 6dbf0e6721641..a195ed4e77fd4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java @@ -40,7 +40,7 @@ public class CompressionCodecFactory { LoggerFactory.getLogger(CompressionCodecFactory.class.getName()); private static final ServiceLoader CODEC_PROVIDERS = - ServiceLoader.load(CompressionCodec.class); + ServiceLoader.load(CompressionCodec.class); /** * A map from the reversed filename suffixes to the codecs. @@ -49,15 +49,15 @@ public class CompressionCodecFactory { */ private SortedMap codecs = null; - /** - * A map from the reversed filename suffixes to the codecs. - * This is probably overkill, because the maps should be small, but it - * automatically supports finding the longest matching suffix. - */ - private Map codecsByName = null; + /** + * A map from the reversed filename suffixes to the codecs. + * This is probably overkill, because the maps should be small, but it + * automatically supports finding the longest matching suffix. + */ + private Map codecsByName = null; /** - * A map from class names to the codecs + * A map from class names to the codecs. */ private HashMap codecsByClassName = null; @@ -80,8 +80,8 @@ private void addCodec(CompressionCodec codec) { @Override public String toString() { StringBuilder buf = new StringBuilder(); - Iterator> itr = - codecs.entrySet().iterator(); + Iterator> itr = + codecs.entrySet().iterator(); buf.append("{ "); if (itr.hasNext()) { Map.Entry entry = itr.next(); @@ -110,8 +110,8 @@ public String toString() { */ public static List> getCodecClasses( Configuration conf) { - List> result - = new ArrayList>(); + List> result = + new ArrayList>(); // Add codec classes discovered via service loading synchronized (CODEC_PROVIDERS) { // CODEC_PROVIDERS is a lazy collection. Synchronize so it is @@ -202,8 +202,8 @@ public CompressionCodec getCodec(Path file) { new StringBuilder(filename).reverse().toString(); String lowerReversedFilename = StringUtils.toLowerCase(reversedFilename); - SortedMap subMap = - codecs.headMap(lowerReversedFilename); + SortedMap subMap = + codecs.headMap(lowerReversedFilename); if (!subMap.isEmpty()) { String potentialSuffix = subMap.lastKey(); if (lowerReversedFilename.startsWith(potentialSuffix)) { @@ -226,57 +226,57 @@ public CompressionCodec getCodecByClassName(String classname) { return codecsByClassName.get(classname); } - /** - * Find the relevant compression codec for the codec's canonical class name - * or by codec alias. - *

- * Codec aliases are case insensitive. - *

- * The code alias is the short class name (without the package name). - * If the short class name ends with 'Codec', then there are two aliases for - * the codec, the complete short class name and the short class name without - * the 'Codec' ending. For example for the 'GzipCodec' codec class name the - * alias are 'gzip' and 'gzipcodec'. - * - * @param codecName the canonical class name of the codec - * @return the codec object - */ - public CompressionCodec getCodecByName(String codecName) { - if (codecsByClassName == null) { - return null; - } - CompressionCodec codec = getCodecByClassName(codecName); - if (codec == null) { - // trying to get the codec by name in case the name was specified - // instead a class - codec = codecsByName.get(StringUtils.toLowerCase(codecName)); - } - return codec; + /** + * Find the relevant compression codec for the codec's canonical class name + * or by codec alias. + *

+ * Codec aliases are case insensitive. + *

+ * The code alias is the short class name (without the package name). + * If the short class name ends with 'Codec', then there are two aliases for + * the codec, the complete short class name and the short class name without + * the 'Codec' ending. For example for the 'GzipCodec' codec class name the + * alias are 'gzip' and 'gzipcodec'. + * + * @param codecName the canonical class name of the codec + * @return the codec object + */ + public CompressionCodec getCodecByName(String codecName) { + if (codecsByClassName == null) { + return null; } + CompressionCodec codec = getCodecByClassName(codecName); + if (codec == null) { + // trying to get the codec by name in case the name was specified + // instead a class + codec = codecsByName.get(StringUtils.toLowerCase(codecName)); + } + return codec; + } - /** - * Find the relevant compression codec for the codec's canonical class name - * or by codec alias and returns its implemetation class. - *

- * Codec aliases are case insensitive. - *

- * The code alias is the short class name (without the package name). - * If the short class name ends with 'Codec', then there are two aliases for - * the codec, the complete short class name and the short class name without - * the 'Codec' ending. For example for the 'GzipCodec' codec class name the - * alias are 'gzip' and 'gzipcodec'. - * - * @param codecName the canonical class name of the codec - * @return the codec class - */ - public Class getCodecClassByName( - String codecName) { - CompressionCodec codec = getCodecByName(codecName); - if (codec == null) { - return null; - } - return codec.getClass(); + /** + * Find the relevant compression codec for the codec's canonical class name + * or by codec alias and returns its implemetation class. + *

+ * Codec aliases are case insensitive. + *

+ * The code alias is the short class name (without the package name). + * If the short class name ends with 'Codec', then there are two aliases for + * the codec, the complete short class name and the short class name without + * the 'Codec' ending. For example for the 'GzipCodec' codec class name the + * alias are 'gzip' and 'gzipcodec'. + * + * @param codecName the canonical class name of the codec + * @return the codec class + */ + public Class getCodecClassByName( + String codecName) { + CompressionCodec codec = getCodecByName(codecName); + if (codec == null) { + return null; } + return codec.getClass(); + } /** * Removes a suffix from a filename, if it has it. @@ -325,8 +325,12 @@ public static void main(String[] args) throws Exception { len = in.read(buffer); } } finally { - if(out != null) { out.close(); } - if(in != null) { in.close(); } + if(out != null) { + out.close(); + } + if(in != null) { + in.close(); + } } } else { CompressionInputStream in = null; @@ -340,7 +344,9 @@ public static void main(String[] args) throws Exception { len = in.read(buffer); } } finally { - if(in != null) { in.close(); } + if(in != null) { + in.close(); + } } } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java index 42df50d70edeb..7461ea36f59a3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java @@ -46,8 +46,8 @@ public Configuration getConf() { } @Override - public CompressionOutputStream createOutputStream(OutputStream out) - throws IOException { + public CompressionOutputStream createOutputStream(OutputStream out) + throws IOException { return null; } @@ -63,21 +63,21 @@ public Compressor createCompressor() { @Override public CompressionInputStream createInputStream(InputStream in, - Decompressor decompressor) - throws IOException { + Decompressor decompressor) + throws IOException { return null; } @Override - public CompressionInputStream createInputStream(InputStream in) - throws IOException { + public CompressionInputStream createInputStream(InputStream in) + throws IOException { return null; } @Override public CompressionOutputStream createOutputStream(OutputStream out, - Compressor compressor) - throws IOException { + Compressor compressor) + throws IOException { return null; } @@ -126,7 +126,7 @@ public String getDefaultExtension() { } /** - * Returns a factory for a given set of codecs + * Returns a factory for a given set of codecs. * @param classes the codec classes to include * @return a new factory */ @@ -152,7 +152,7 @@ private static void checkCodec(String msg, @Test public void testFinding() { CompressionCodecFactory factory = - new CompressionCodecFactory(new Configuration()); + new CompressionCodecFactory(new Configuration()); CompressionCodec codec = factory.getCodec(new Path("/tmp/foo.bar")); assertEquals("default factory foo codec", null, codec); codec = factory.getCodecByClassName(BarCodec.class.getCanonicalName());