diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java index c125d8e24900e..6d5549c7ef988 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java @@ -299,6 +299,8 @@ public boolean createNewFile(StoragePath path) throws IOException { @Override public void close() throws IOException { - fs.close(); + // Don't close the wrapped `FileSystem` object. + // This will end up closing it for every thread since it + // could be cached across JVM. We don't own that object anyway. } } diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/storage/hadoop/TestHoodieHadoopStorage.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/storage/hadoop/TestHoodieHadoopStorage.java index e34f858b85909..8d0f64d0c6bd2 100644 --- a/hudi-hadoop-common/src/test/java/org/apache/hudi/storage/hadoop/TestHoodieHadoopStorage.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/storage/hadoop/TestHoodieHadoopStorage.java @@ -25,6 +25,11 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static org.junit.jupiter.api.Assertions.assertSame; /** * Tests {@link HoodieHadoopStorage}. @@ -49,4 +54,20 @@ protected Object getConf() { conf.set(CONF_KEY, CONF_VALUE); return conf; } + + @Test + void testClose() throws IOException { + Configuration conf = new Configuration(); + String path = getTempDir(); + FileSystem fileSystem = HadoopFSUtils.getFs(path, conf, true); + HoodieStorage storage = new HoodieHadoopStorage(fileSystem); + storage.close(); + // This validates that HoodieHadoopStorage#close does not close the underlying FileSystem + // object. If the underlying FileSystem object is closed, the cache of the object based on + // the path is closed and removed, which causes problems if it is reused elsewhere. Fetching + // the FileSystem object on the same path again in this case returns a different object, + // which can be caught here. + assertSame(fileSystem, storage.getFileSystem()); + assertSame(fileSystem, HadoopFSUtils.getFs(getTempDir(), conf, true)); + } }