From 5fe42dbb818b97a314e1f7ef6f046d5dda947af9 Mon Sep 17 00:00:00 2001 From: Alex Jo Date: Wed, 18 Dec 2024 14:18:55 -0700 Subject: [PATCH] Fix S3InputStream's handling of large skips When the skip(n) method is called the MAX_SKIP_BYTES check is skipped, resulting in the call potentially blocking for a long time. Instead of delegating to the underlying stream, set the nextReadPosition value. This allows the next read to decide if it is best to keep the existing s3 object stream or open a new one. This behavior matches the implementations for Azure and GCS. --- .../java/io/trino/filesystem/s3/S3InputStream.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3InputStream.java b/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3InputStream.java index 6bcbc6e128ee..0122cf1093a8 100644 --- a/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3InputStream.java +++ b/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3InputStream.java @@ -29,6 +29,7 @@ import java.io.IOException; import java.io.InterruptedIOException; +import static java.lang.Math.clamp; import static java.lang.Math.max; import static java.util.Objects.requireNonNull; @@ -126,14 +127,10 @@ public long skip(long n) throws IOException { ensureOpen(); - seekStream(false); - return reconnectStreamIfNecessary(() -> { - long skip = doSkip(n); - streamPosition += skip; - nextReadPosition += skip; - return skip; - }); + long skipSize = clamp(n, 0, length != null ? length - nextReadPosition : Integer.MAX_VALUE); + nextReadPosition += skipSize; + return skipSize; } @Override