diff --git a/build.gradle b/build.gradle index 6e888eebcb92..b64810e5a450 100644 --- a/build.gradle +++ b/build.gradle @@ -756,6 +756,7 @@ project(':iceberg-hive-metastore') { exclude group: 'com.tdunning', module: 'json' exclude group: 'javax.transaction', module: 'transaction-api' exclude group: 'com.zaxxer', module: 'HikariCP' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-resourcemanager' } // By default, hive-exec is a fat/uber jar and it exports a guava library @@ -772,6 +773,7 @@ project(':iceberg-hive-metastore') { exclude group: 'org.apache.calcite' exclude group: 'org.apache.calcite.avatica' exclude group: 'com.google.code.findbugs', module: 'jsr305' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-resourcemanager' } testImplementation(libs.hive2.metastore) { @@ -788,6 +790,10 @@ project(':iceberg-hive-metastore') { exclude group: 'com.tdunning', module: 'json' exclude group: 'javax.transaction', module: 'transaction-api' exclude group: 'com.zaxxer', module: 'HikariCP' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-common' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-applicationhistoryservice' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-resourcemanager' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-web-proxy' } compileOnly(libs.hadoop3.client) { diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java index a4ac5e2ff67a..b9fd4903b1ef 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java @@ -18,18 +18,27 @@ */ package org.apache.iceberg.hadoop; +import java.io.Closeable; import java.io.FileNotFoundException; import java.io.IOException; import java.io.UncheckedIOException; +import java.util.AbstractMap; +import java.util.Collection; import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.io.wrappedio.WrappedIO; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.BulkDeletionFailureException; import org.apache.iceberg.io.DelegateFileIO; @@ -37,6 +46,11 @@ import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Multimaps; +import org.apache.iceberg.relocated.com.google.common.collect.SetMultimap; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.relocated.com.google.common.collect.Streams; import org.apache.iceberg.util.SerializableMap; import org.apache.iceberg.util.SerializableSupplier; @@ -173,10 +187,36 @@ public void deletePrefix(String prefix) { } } + /** + * Delete files. + * + * @param pathsToDelete The paths to delete + * @throws BulkDeletionFailureException failure to delete files. + */ @Override - public void deleteFiles(Iterable pathsToDelete) throws BulkDeletionFailureException { + public void deleteFiles(final Iterable pathsToDelete) + throws BulkDeletionFailureException { + Iterable targetPaths = pathsToDelete; + try { + final List> pathsNotDeleted = hadoopBulkDelete(targetPaths); + if (pathsNotDeleted.isEmpty()) { + return; + } + // one or more files were not deleted. + targetPaths = + pathsNotDeleted.stream() + .map( + entry -> { + LOG.info("Failed to delete {} cause: {}", entry.getKey(), entry.getValue()); + return entry.getKey().toString(); + }) + .collect(Collectors.toList()); + } catch (RuntimeException e) { + LOG.warn("Failed to use bulk delete -falling back to single delete calls", e); + } + AtomicInteger failureCount = new AtomicInteger(0); - Tasks.foreach(pathsToDelete) + Tasks.foreach(targetPaths) .executeWith(executorService()) .retry(DELETE_RETRY_ATTEMPTS) .stopRetryOn(FileNotFoundException.class) @@ -187,12 +227,108 @@ public void deleteFiles(Iterable pathsToDelete) throws BulkDeletionFailu failureCount.incrementAndGet(); }) .run(this::deleteFile); - if (failureCount.get() != 0) { throw new BulkDeletionFailureException(failureCount.get()); } } + /** + * Delete files through the Hadoop Bulk Delete API. + * + * @param pathnames paths to delete. + * @return All paths which could not be deleted, and the reason + * @throws UncheckedIOException if an IOE was raised in the invoked methods. + * @throws RuntimeException if interrupted while waiting for deletions to complete. + */ + private List> hadoopBulkDelete(Iterable pathnames) { + + LOG.debug("Using bulk delete operation to delete files"); + + SetMultimap fsMap = Multimaps.newSetMultimap(Maps.newHashMap(), Sets::newHashSet); + Map fsPageSizeMap = Maps.newHashMap(); + List> filesNotDeleted = Lists.newArrayList(); + List>>> deletionTasks = Lists.newArrayList(); + final Path rootPath = new Path("/"); + final Configuration conf = hadoopConf.get(); + for (String name : pathnames) { + Path target = new Path(name); + LOG.debug("Deleting '{}' mapped to path '{}'", name, target); + final FileSystem fs; + try { + fs = Util.getFs(target, conf); + } catch (Exception e) { + LOG.warn("Failed to load filesystem for path: {}", target, e); + filesNotDeleted.add(new AbstractMap.SimpleImmutableEntry<>(target, e.toString())); + continue; + } + Path fsRoot = fs.makeQualified(rootPath); + int pageSize; + if (!fsPageSizeMap.containsKey(fsRoot)) { + pageSize = WrappedIO.bulkDelete_pageSize(fs, rootPath); + fsPageSizeMap.put(fsRoot, pageSize); + } else { + pageSize = fsPageSizeMap.get(fsRoot); + } + + Set pathsForFilesystem = fsMap.get(fsRoot); + Path targetPath = fs.makeQualified(target); + pathsForFilesystem.add(targetPath); + + if (pathsForFilesystem.size() == pageSize) { + Collection paths = Sets.newHashSet(pathsForFilesystem); + deletionTasks.add(executorService().submit(() -> deleteBatch(fs, fsRoot, paths))); + fsMap.removeAll(fsRoot); + } + } + + for (Map.Entry> pathsToDeleteByFileSystem : fsMap.asMap().entrySet()) { + Path fsRoot = pathsToDeleteByFileSystem.getKey(); + deletionTasks.add( + executorService() + .submit( + () -> + deleteBatch( + Util.getFs(fsRoot, conf), fsRoot, pathsToDeleteByFileSystem.getValue()))); + } + + LOG.debug("Waiting for {} deletion tasks to complete", deletionTasks.size()); + + for (Future>> deletionTask : deletionTasks) { + try { + List> failedDeletions = deletionTask.get(); + failedDeletions.forEach( + entry -> { + LOG.debug("Failed to delete object at path {}: {}", entry.getKey(), entry.getValue()); + filesNotDeleted.add(entry); + }); + } catch (ExecutionException e) { + LOG.warn("Exception during batch deletion", e.getCause()); + // this failure + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + deletionTasks.stream().filter(task -> !task.isDone()).forEach(task -> task.cancel(true)); + throw new RuntimeException("Interrupted when waiting for deletions to complete", e); + } + } + return filesNotDeleted; + } + + /** + * Blocking batch delete. + * + * @param fs filesystem. + * @param fsRoot root of the filesytem + * @param paths paths to delete. + * @return Paths which couldn't be deleted and the error messages + * @throws UncheckedIOException IO problem + */ + private List> deleteBatch( + FileSystem fs, final Path fsRoot, Collection paths) { + + LOG.debug("Deleting batch of {} files under {}", paths.size(), fsRoot); + return WrappedIO.bulkDelete_delete(fs, fsRoot, paths); + } + private int deleteThreads() { int defaultValue = Runtime.getRuntime().availableProcessors() * DEFAULT_DELETE_CORE_MULTIPLE; return conf().getInt(DELETE_FILE_PARALLELISM, defaultValue); @@ -213,10 +349,11 @@ private ExecutorService executorService() { /** * This class is a simple adaptor to allow for using Hadoop's RemoteIterator as an Iterator. + * Forwards {@link #close()} to the delegate if it is Closeable. * * @param element type */ - private static class AdaptingIterator implements Iterator, RemoteIterator { + private static class AdaptingIterator implements Iterator, RemoteIterator, Closeable { private final RemoteIterator delegate; AdaptingIterator(RemoteIterator delegate) { @@ -240,5 +377,17 @@ public E next() { throw new UncheckedIOException(e); } } + + @Override + public void close() throws IOException { + if (delegate instanceof Closeable) { + ((Closeable) delegate).close(); + } + } + + @Override + public String toString() { + return delegate.toString(); + } } } diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopFileIO.java b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopFileIO.java index 238c18c8cf03..413378ed27f0 100644 --- a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopFileIO.java +++ b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopFileIO.java @@ -25,19 +25,21 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.Files; +import java.util.Iterator; import java.util.List; import java.util.Random; import java.util.UUID; -import java.util.Vector; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.iceberg.TestHelpers; import org.apache.iceberg.common.DynMethods; +import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.BulkDeletionFailureException; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.FileIOParser; +import org.apache.iceberg.io.FileInfo; import org.apache.iceberg.io.ResolvingFileIO; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -82,20 +84,30 @@ public void testListPrefix() { }); long totalFiles = scaleSizes.stream().mapToLong(Integer::longValue).sum(); - assertThat(Streams.stream(hadoopFileIO.listPrefix(parent.toUri().toString())).count()) + final String parentString = parent.toUri().toString(); + final List files = + Streams.stream(hadoopFileIO.listPrefix(parentString)).collect(Collectors.toList()); + assertThat(files.size()) + .describedAs("Files found under %s", parentString) .isEqualTo(totalFiles); + + final Iterator locations = files.stream().map(FileInfo::location).iterator(); + hadoopFileIO.deleteFiles(() -> locations); + + assertThat(Streams.stream(hadoopFileIO.listPrefix(parentString)).count()) + .describedAs("Files found under %s after bulk delete", parentString) + .isEqualTo(0); } @Test public void testFileExists() throws IOException { Path parent = new Path(tempDir.toURI()); - Path randomFilePath = new Path(parent, "random-file-" + UUID.randomUUID()); - fs.createNewFile(randomFilePath); + Path randomFilePath = file(new Path(parent, "random-file-" + UUID.randomUUID())); // check existence of the created file - assertThat(hadoopFileIO.newInputFile(randomFilePath.toUri().toString()).exists()).isTrue(); + assertPathExists(randomFilePath); fs.delete(randomFilePath, false); - assertThat(hadoopFileIO.newInputFile(randomFilePath.toUri().toString()).exists()).isFalse(); + assertPathDoesNotExist(randomFilePath); } @Test @@ -132,17 +144,82 @@ public void testDeleteFiles() { List filesCreated = createRandomFiles(parent, 10); hadoopFileIO.deleteFiles( filesCreated.stream().map(Path::toString).collect(Collectors.toList())); - filesCreated.forEach( - file -> assertThat(hadoopFileIO.newInputFile(file.toString()).exists()).isFalse()); + filesCreated.forEach(this::assertPathDoesNotExist); } @Test public void testDeleteFilesErrorHandling() { + Path parent = new Path(tempDir.toURI()); List filesCreated = random.ints(2).mapToObj(x -> "fakefsnotreal://file-" + x).collect(Collectors.toList()); + filesCreated.add(new Path(parent, "file-not-exist").toUri().toString()); + Path file = file(new Path(parent, "file-which-exists-" + UUID.randomUUID())); + filesCreated.add(file.toUri().toString()); + assertThatThrownBy(() -> hadoopFileIO.deleteFiles(filesCreated)) + .describedAs("Exception raised by deleteFiles()") .isInstanceOf(BulkDeletionFailureException.class) - .hasMessage("Failed to delete 2 files"); + .hasMessage("Failed to delete 2 files") + .matches( + (e) -> ((BulkDeletionFailureException) e).numberFailedObjects() == 2, + "Wrong number of failures"); + assertPathDoesNotExist(file); + } + + @Test + public void testBulkDeleteEmptyList() { + hadoopFileIO.deleteFiles(Lists.newArrayList()); + } + + @Test + public void testBulkDeleteEmptyDirectory() throws IOException { + Path parent = new Path(tempDir.toURI()); + final Path dir = new Path(parent, "dir-" + UUID.randomUUID()); + fs.mkdirs(dir); + final Path file = file(new Path(parent, "file-" + UUID.randomUUID())); + List paths = Lists.newArrayList(dir.toUri().toString(), file.toUri().toString()); + hadoopFileIO.deleteFiles(paths); + assertPathDoesNotExist(file); + assertPathDoesNotExist(dir); + } + + @Test + public void testBulkDeleteNonEmptyDirectory() { + Path parent = new Path(tempDir.toURI()); + final Path dir = new Path(parent, "dir-" + UUID.randomUUID()); + final Path file = file(new Path(parent, "file-" + UUID.randomUUID())); + final Path child = file(new Path(dir, "file-" + UUID.randomUUID())); + List paths = Lists.newArrayList(dir.toUri().toString(), file.toUri().toString()); + assertThatThrownBy(() -> hadoopFileIO.deleteFiles(paths)) + .describedAs("Exception raised by deleteFiles()") + .isInstanceOf(BulkDeletionFailureException.class) + .hasMessageContaining("Failed") + .matches( + e -> ((BulkDeletionFailureException) e).numberFailedObjects() == 1, + "Wrong number of failures"); + assertPathDoesNotExist(file); + assertPathExists(child); + } + + @Test + public void testDeleteEmptyDirectory() throws IOException { + Path parent = new Path(tempDir.toURI()); + final Path dir = new Path(parent, "dir-" + UUID.randomUUID()); + fs.mkdirs(dir); + hadoopFileIO.deleteFile(dir.toUri().toString()); + assertPathDoesNotExist(dir); + } + + @Test + public void testDeleteNonEmptyDirectory() { + Path parent = new Path(tempDir.toURI()); + final Path dir = new Path(parent, "dir-" + UUID.randomUUID()); + final Path file = file(new Path(dir, "file")); + assertThatThrownBy(() -> hadoopFileIO.deleteFile(dir.toUri().toString())) + .describedAs("Exception raised by deleteFile(%s)", dir) + .isInstanceOf(RuntimeIOException.class) + .hasMessageContaining("Failed to delete file:"); + assertPathExists(file); } @ParameterizedTest @@ -218,21 +295,38 @@ private static void testJsonParser(HadoopFileIO hadoopFileIO, File tempDir) thro } } + /** + * Create a zero byte file at a path, overwriting any file which is there. + * + * @param path path + * @return the path of the file + */ + Path file(Path path) { + try { + hadoopFileIO.newOutputFile(path.toString()).createOrOverwrite().close(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + return path; + } + private List createRandomFiles(Path parent, int count) { - Vector paths = new Vector<>(); - random + return random .ints(count) .parallel() - .forEach( - i -> { - try { - Path path = new Path(parent, "file-" + i); - paths.add(path); - fs.createNewFile(path); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - }); - return paths; + .mapToObj(i -> file(new Path(parent, "file-" + i))) + .collect(Collectors.toList()); + } + + private void assertPathDoesNotExist(Path path) { + assertThat(hadoopFileIO.newInputFile(path.toUri().toString()).exists()) + .describedAs("File %s must not exist", path) + .isFalse(); + } + + private void assertPathExists(Path path) { + assertThat(hadoopFileIO.newInputFile(path.toUri().toString()).exists()) + .describedAs("File %s must exist", path) + .isTrue(); } } diff --git a/docs/docs/aws.md b/docs/docs/aws.md index b6123a6ae977..2f68e09f6c02 100644 --- a/docs/docs/aws.md +++ b/docs/docs/aws.md @@ -33,7 +33,8 @@ or individual AWS client packages (Glue, S3, DynamoDB, KMS, STS) if you would li All the default AWS clients use the [Apache HTTP Client](https://mvnrepository.com/artifact/software.amazon.awssdk/apache-client) for HTTP connection management. -This dependency is not part of the AWS SDK bundle and needs to be added separately. +This dependency is included as a shaded library in the AWS SDK bundle JAR, +but if if the individual client libraries are used, it needs to be added separately. To choose a different HTTP client library such as [URL Connection HTTP Client](https://mvnrepository.com/artifact/software.amazon.awssdk/url-connection-client), see the section [client customization](#aws-client-customization) for more details. @@ -422,18 +423,13 @@ There is no redundant consistency wait and check which might negatively impact p Before `S3FileIO` was introduced, many Iceberg users choose to use `HadoopFileIO` to write data to S3 through the [S3A FileSystem](https://github.com/apache/hadoop/blob/trunk/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java). As introduced in the previous sections, `S3FileIO` adopts the latest AWS clients and S3 features for optimized security and performance - and is thus recommended for S3 use cases rather than the S3A FileSystem. +and is thus recommended for S3 use cases rather than the S3A FileSystem. `S3FileIO` writes data with `s3://` URI scheme, but it is also compatible with schemes written by the S3A FileSystem. This means for any table manifests containing `s3a://` or `s3n://` file paths, `S3FileIO` is still able to read them. This feature allows people to easily switch from S3A to `S3FileIO`. -If for any reason you have to use S3A, here are the instructions: - -1. To store data using S3A, specify the `warehouse` catalog property to be an S3A path, e.g. `s3a://my-bucket/my-warehouse` -2. For `HiveCatalog`, to also store metadata using S3A, specify the Hadoop config property `hive.metastore.warehouse.dir` to be an S3A path. -3. Add [hadoop-aws](https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws) as a runtime dependency of your compute engine. -4. Configure AWS settings based on [hadoop-aws documentation](https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html) (make sure you check the version, S3A configuration varies a lot based on the version you use). +If you do wish to use S3A, see the "Using the Hadoop S3A Connector" section below. ### S3 Write Checksum Verification @@ -747,6 +743,24 @@ Users can use catalog properties to override the defaults. For example, to confi --conf spark.sql.catalog.my_catalog.http-client.apache.max-connections=5 ``` +## Using the Hadoop S3A Connector + +To use the S3A Connector, here are the instructions: + +1. Dpecify the `warehouse` catalog property to be an S3A path, e.g. `s3a://my-bucket/my-warehouse` +2. For `HiveCatalog` to also store metadata using S3A, specify the Hadoop config property `hive.metastore.warehouse.dir` to be an S3A path. +3. Add [hadoop-aws](https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws) as a runtime dependency of your compute engine. The version of this library must be the exact same version as the other hadoop binaries on the classpath. +4. Put a compatible version of the AWS SDK in the classpath. +5. Configure AWS settings based on [hadoop-aws documentation](https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html). + +```shell +spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.catalog.my_catalog.warehouse=s3a://my-bucket/my/key/prefix \ + --conf spark.sql.catalog.my_catalog.type=glue \ + --conf spark.sql.catalog.my_catalog.io-impl=org.apache.iceberg.hadoop.HadoopFileIO +``` +If any problems are encountered consult its [troubleshooting](https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/troubleshooting_s3a.html) document. + ## Run Iceberg on AWS ### Amazon Athena diff --git a/flink/v1.19/build.gradle b/flink/v1.19/build.gradle index 05fdddd63ccf..3abfd654bf68 100644 --- a/flink/v1.19/build.gradle +++ b/flink/v1.19/build.gradle @@ -222,6 +222,10 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { exclude group: 'org.apache.calcite' exclude group: 'org.apache.calcite.avatica' exclude group: 'com.google.code.findbugs', module: 'jsr305' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-common' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-applicationhistoryservice' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-resourcemanager' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-web-proxy' } } diff --git a/flink/v1.20/build.gradle b/flink/v1.20/build.gradle index 16d1d76ec1ec..24ae180e0f27 100644 --- a/flink/v1.20/build.gradle +++ b/flink/v1.20/build.gradle @@ -222,6 +222,10 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { exclude group: 'org.apache.calcite' exclude group: 'org.apache.calcite.avatica' exclude group: 'com.google.code.findbugs', module: 'jsr305' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-common' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-applicationhistoryservice' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-resourcemanager' + exclude group: 'org.apache.hadoop', module: 'hadoop-yarn-server-web-proxy' } } diff --git a/orc/src/main/java/org/apache/iceberg/orc/FileIOFSUtil.java b/orc/src/main/java/org/apache/iceberg/orc/FileIOFSUtil.java index 2570799ef93e..7f555749877d 100644 --- a/orc/src/main/java/org/apache/iceberg/orc/FileIOFSUtil.java +++ b/orc/src/main/java/org/apache/iceberg/orc/FileIOFSUtil.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.URI; +import org.apache.hadoop.fs.BulkDelete; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -83,6 +84,11 @@ public boolean delete(Path f, boolean recursive) throws IOException { throw new UnsupportedOperationException(); } + @Override + public BulkDelete createBulkDelete(Path path) throws IllegalArgumentException, IOException { + throw new UnsupportedOperationException(); + } + @Override public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException { throw new UnsupportedOperationException(); diff --git a/spark/v3.4/build.gradle b/spark/v3.4/build.gradle index 32abdf2962b2..d23df105293c 100644 --- a/spark/v3.4/build.gradle +++ b/spark/v3.4/build.gradle @@ -33,6 +33,8 @@ configure(sparkProjects) { force "com.fasterxml.jackson.module:jackson-module-scala_${scalaVersion}:${libs.versions.jackson214.get()}" force "com.fasterxml.jackson.core:jackson-databind:${libs.versions.jackson214.get()}" force "com.fasterxml.jackson.core:jackson-core:${libs.versions.jackson214.get()}" + force "org.apache.hadoop:hadoop-client-api:${libs.versions.hadoop3.get()}" + force "org.apache.hadoop:hadoop-client-runtime:${libs.versions.hadoop3.get()}" } } } diff --git a/spark/v3.5/build.gradle b/spark/v3.5/build.gradle index af7a1d74d1b3..d541e839a67f 100644 --- a/spark/v3.5/build.gradle +++ b/spark/v3.5/build.gradle @@ -33,7 +33,8 @@ configure(sparkProjects) { force "com.fasterxml.jackson.module:jackson-module-scala_${scalaVersion}:${libs.versions.jackson215.get()}" force "com.fasterxml.jackson.core:jackson-databind:${libs.versions.jackson215.get()}" force "com.fasterxml.jackson.core:jackson-core:${libs.versions.jackson215.get()}" - } + force "org.apache.hadoop:hadoop-client-api:${libs.versions.hadoop3.get()}" + force "org.apache.hadoop:hadoop-client-runtime:${libs.versions.hadoop3.get()}" } } } }