From 679fa3dc36c9613f60794b7e097d34acb4c19daf Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 9 Oct 2019 11:14:59 +0100 Subject: [PATCH] HADOOP-16644. Do a HEAD after a PUT to get the modtime. WiP: no tests. What would a test look like? best to use some mock to fix the remote time to always be slightly different from the local. Or we make the clock of the S3A FS patchable, which is potentially the most flexible Change-Id: I2c99752647f522991b1f89dd9c43f3a2e9b98bf5 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 29 +++++++++++++++++-- .../hadoop/fs/s3a/impl/StatusProbeEnum.java | 12 ++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 26f16a7b23271..8985cc2601e7b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3420,9 +3420,32 @@ void finishedWrite(String key, long length, String eTag, String versionId, activeState = stateToClose; } S3Guard.addAncestors(metadataStore, p, ttlTimeProvider, activeState); - S3AFileStatus status = createUploadFileStatus(p, - S3AUtils.objectRepresentsDirectory(key, length), length, - getDefaultBlockSize(p), username, eTag, versionId); + boolean isDir = objectRepresentsDirectory(key, length); + S3AFileStatus status = null; + status = createUploadFileStatus(p, + isDir, length, + getDefaultBlockSize(p), username, , versionId); + // do a HEAD to pick up the real timestamp. This is a PITA but + // it is the only way to defend against clock drift and timestamp + // inconsistencies, which can cause surprises later. + S3AFileStatus remoteStatus = null; + if (!isDir && eTag != null) { + // we need to pass down the version ID/etag so on an update we + // can discard previous results + try { + remoteStatus = innerGetFileStatus(p, false, + StatusProbeEnum.HEAD_ONLY); + if (eTag.equals(remoteStatus.getETag())) { + status = new S3AFileStatus(status.getLen(), + remoteStatus.getModificationTime(), + status.getPath(), status.getBlockSize(), status.getOwner(), + eTag, status.getVersionId()); + } + } catch (IOException ignored) { + // we don't worry if the file isn't visible or some other + // failure occurs. + } + } S3Guard.putAndReturn(metadataStore, status, instrumentation, ttlTimeProvider, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java index ca2875c39f86d..c8497ee8a9609 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java @@ -41,4 +41,16 @@ public enum StatusProbeEnum { public static final Set DIRECTORIES = EnumSet.of(DirMarker, List); + /** We only want the HEAD or dir marker. */ + public static final Set HEAD_OR_DIR_MARKER = + EnumSet.of(Head, DirMarker); + + /** We only want the HEAD. */ + public static final Set HEAD_ONLY = + EnumSet.of(Head); + + /** We only want the dir marker. */ + public static final Set DIR_MARKER_ONLY = + EnumSet.of(DirMarker); + }