diff --git a/presto-hive-common/src/main/java/com/facebook/presto/hive/HiveErrorCode.java b/presto-hive-common/src/main/java/com/facebook/presto/hive/HiveErrorCode.java index 739763dc1098e..e23270d65aad1 100644 --- a/presto-hive-common/src/main/java/com/facebook/presto/hive/HiveErrorCode.java +++ b/presto-hive-common/src/main/java/com/facebook/presto/hive/HiveErrorCode.java @@ -69,6 +69,7 @@ public enum HiveErrorCode // To be used for metadata inconsistencies and not for incorrect input from users HIVE_INVALID_ENCRYPTION_METADATA(42, EXTERNAL), HIVE_UNSUPPORTED_ENCRYPTION_OPERATION(43, USER_ERROR), + MALFORMED_HIVE_FILE_STATISTICS(44, INTERNAL_ERROR), /**/; private final ErrorCode errorCode; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java index 4945534cff046..aba8105ad7535 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java @@ -128,6 +128,8 @@ public void configure(Binder binder) binder.bind(HivePartitionManager.class).in(Scopes.SINGLETON); binder.bind(LocationService.class).to(HiveLocationService.class).in(Scopes.SINGLETON); binder.bind(TableParameterCodec.class).in(Scopes.SINGLETON); + binder.bind(HivePartitionStats.class).in(Scopes.SINGLETON); + newExporter(binder).export(HivePartitionStats.class).as(generatedNameOf(HivePartitionStats.class, connectorId)); binder.bind(HiveMetadataFactory.class).in(Scopes.SINGLETON); binder.bind(new TypeLiteral>() {}).to(HiveMetadataFactory.class).in(Scopes.SINGLETON); binder.bind(StagingFileCommitter.class).to(HiveStagingFileCommitter.class).in(Scopes.SINGLETON); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveFileWriter.java index 1f428b84cf404..bae799e27c5b3 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveFileWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveFileWriter.java @@ -25,7 +25,8 @@ public interface HiveFileWriter void appendRows(Page dataPage); - void commit(); + // Page returned by commit should have fileSize as first channel + Optional commit(); void rollback(); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveManifestUtils.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveManifestUtils.java new file mode 100644 index 0000000000000..61d98f54bd422 --- /dev/null +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveManifestUtils.java @@ -0,0 +1,88 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.hive; + +import com.facebook.presto.common.Page; +import com.facebook.presto.common.PageBuilder; +import com.facebook.presto.common.block.BlockBuilder; +import com.facebook.presto.spi.PrestoException; +import com.google.common.collect.ImmutableList; + +import java.util.Optional; + +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.hive.HiveErrorCode.MALFORMED_HIVE_FILE_STATISTICS; +import static com.facebook.presto.hive.PartitionUpdate.FileWriteInfo; +import static io.airlift.slice.Slices.utf8Slice; +import static java.lang.String.format; + +public class HiveManifestUtils +{ + private static final int FILE_SIZE_CHANNEL = 0; + private static final int ROW_COUNT_CHANNEL = 1; + + private HiveManifestUtils() + { + } + + public static Page createFileStatisticsPage(long fileSize, long rowCount) + { + // FileStatistics page layout: + // + // fileSize rowCount + // X X + PageBuilder statsPageBuilder = new PageBuilder(ImmutableList.of(BIGINT, BIGINT)); + statsPageBuilder.declarePosition(); + BIGINT.writeLong(statsPageBuilder.getBlockBuilder(FILE_SIZE_CHANNEL), fileSize); + BIGINT.writeLong(statsPageBuilder.getBlockBuilder(ROW_COUNT_CHANNEL), rowCount); + + return statsPageBuilder.build(); + } + + public static long getFileSize(Page statisticsPage, int position) + { + // FileStatistics page layout: + // + // fileSize rowCount + // X X + + if (position < 0 || position >= statisticsPage.getPositionCount()) { + throw new PrestoException(MALFORMED_HIVE_FILE_STATISTICS, format("Invalid position: %d specified for FileStatistics page", position)); + } + return BIGINT.getLong(statisticsPage.getBlock(FILE_SIZE_CHANNEL), position); + } + + public static Optional createPartitionManifest(PartitionUpdate partitionUpdate) + { + // Manifest Page layout: + // fileName fileSize + // X X + // X X + // X X + // .... + PageBuilder manifestBuilder = new PageBuilder(ImmutableList.of(VARCHAR, BIGINT)); + BlockBuilder fileNameBuilder = manifestBuilder.getBlockBuilder(0); + BlockBuilder fileSizeBuilder = manifestBuilder.getBlockBuilder(1); + for (FileWriteInfo fileWriteInfo : partitionUpdate.getFileWriteInfos()) { + if (!fileWriteInfo.getFileSize().isPresent()) { + return Optional.empty(); + } + manifestBuilder.declarePosition(); + VARCHAR.writeSlice(fileNameBuilder, utf8Slice(fileWriteInfo.getWriteFileName())); + BIGINT.writeLong(fileSizeBuilder, fileWriteInfo.getFileSize().get()); + } + return Optional.of(manifestBuilder.build()); + } +} diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java index d075f682624a9..fa33ed5644c0e 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java @@ -14,6 +14,7 @@ package com.facebook.presto.hive; import com.facebook.airlift.json.JsonCodec; +import com.facebook.presto.common.Page; import com.facebook.presto.common.Subfield; import com.facebook.presto.common.block.Block; import com.facebook.presto.common.predicate.Domain; @@ -24,7 +25,6 @@ import com.facebook.presto.common.type.RowType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.TypeManager; -import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.hive.LocationService.WriteInfo; import com.facebook.presto.hive.PartitionUpdate.FileWriteInfo; import com.facebook.presto.hive.metastore.Column; @@ -148,6 +148,7 @@ import static com.facebook.presto.common.type.DateType.DATE; import static com.facebook.presto.common.type.TimestampType.TIMESTAMP; import static com.facebook.presto.common.type.VarbinaryType.VARBINARY; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.common.type.Varchars.isVarcharType; import static com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT; import static com.facebook.presto.expressions.LogicalRowExpressions.binaryExpression; @@ -177,6 +178,7 @@ import static com.facebook.presto.hive.HiveErrorCode.HIVE_UNKNOWN_ERROR; import static com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_ENCRYPTION_OPERATION; import static com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT; +import static com.facebook.presto.hive.HiveManifestUtils.createPartitionManifest; import static com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID; import static com.facebook.presto.hive.HivePartitioningHandle.createHiveCompatiblePartitioningHandle; import static com.facebook.presto.hive.HivePartitioningHandle.createPrestoNativePartitioningHandle; @@ -351,6 +353,7 @@ public class HiveMetadata private final ZeroRowFileCreator zeroRowFileCreator; private final PartitionObjectBuilder partitionObjectBuilder; private final HiveEncryptionInformationProvider encryptionInformationProvider; + private final HivePartitionStats hivePartitionStats; public HiveMetadata( SemiTransactionalHiveMetastore metastore, @@ -374,7 +377,8 @@ public HiveMetadata( StagingFileCommitter stagingFileCommitter, ZeroRowFileCreator zeroRowFileCreator, PartitionObjectBuilder partitionObjectBuilder, - HiveEncryptionInformationProvider encryptionInformationProvider) + HiveEncryptionInformationProvider encryptionInformationProvider, + HivePartitionStats hivePartitionStats) { this.allowCorruptWritesForTesting = allowCorruptWritesForTesting; @@ -399,6 +403,7 @@ public HiveMetadata( this.zeroRowFileCreator = requireNonNull(zeroRowFileCreator, "zeroRowFileCreator is null"); this.partitionObjectBuilder = requireNonNull(partitionObjectBuilder, "partitionObjectBuilder is null"); this.encryptionInformationProvider = requireNonNull(encryptionInformationProvider, "encryptionInformationProvider is null"); + this.hivePartitionStats = requireNonNull(hivePartitionStats, "hivePartitionStats is null"); } public SemiTransactionalHiveMetastore getMetastore() @@ -472,7 +477,7 @@ private Optional getPropertiesSystemTable(SchemaTableName tableName } Map sortedTableParameters = ImmutableSortedMap.copyOf(table.get().getParameters()); List columns = sortedTableParameters.keySet().stream() - .map(key -> new ColumnMetadata(key, VarcharType.VARCHAR)) + .map(key -> new ColumnMetadata(key, VARCHAR)) .collect(toImmutableList()); List types = columns.stream() .map(ColumnMetadata::getType) @@ -1590,7 +1595,7 @@ private ImmutableList computePartitionUpdatesForMissingBuckets( locationHandle.getWritePath(), locationHandle.getTargetPath(), fileNamesForMissingBuckets.stream() - .map(fileName -> new FileWriteInfo(fileName, fileName)) + .map(fileName -> new FileWriteInfo(fileName, fileName, Optional.empty())) .collect(toImmutableList()), 0, 0, @@ -1613,7 +1618,7 @@ private ImmutableList computePartitionUpdatesForMissingBuckets( partitionUpdate.getWritePath(), partitionUpdate.getTargetPath(), fileNamesForMissingBuckets.stream() - .map(fileName -> new FileWriteInfo(fileName, fileName)) + .map(fileName -> new FileWriteInfo(fileName, fileName, Optional.empty())) .collect(toImmutableList()), 0, 0, @@ -1836,6 +1841,12 @@ else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode Map extraPartitionMetadata = handle.getEncryptionInformation() .map(encryptionInfo -> encryptionInfo.getDwrfEncryptionMetadata().map(DwrfEncryptionMetadata::getExtraMetadata).orElseGet(ImmutableMap::of)) .orElseGet(ImmutableMap::of); + + // TODO: Put the manifest blob in partition parameters + // Track the manifest blob size + Optional manifestBlob = createPartitionManifest(partitionUpdate); + manifestBlob.ifPresent(manifest -> hivePartitionStats.addManifestSizeInBytes(manifest.getRetainedSizeInBytes())); + // insert into new partition or overwrite existing partition Partition partition = partitionObjectBuilder.buildPartitionObject( session, diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadataFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadataFactory.java index b143dd441a031..413fa295849b0 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadataFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadataFactory.java @@ -62,6 +62,7 @@ public class HiveMetadataFactory private final String prestoVersion; private final PartitionObjectBuilder partitionObjectBuilder; private final HiveEncryptionInformationProvider encryptionInformationProvider; + private final HivePartitionStats hivePartitionStats; @Inject @SuppressWarnings("deprecation") @@ -84,7 +85,8 @@ public HiveMetadataFactory( ZeroRowFileCreator zeroRowFileCreator, NodeVersion nodeVersion, PartitionObjectBuilder partitionObjectBuilder, - HiveEncryptionInformationProvider encryptionInformationProvider) + HiveEncryptionInformationProvider encryptionInformationProvider, + HivePartitionStats hivePartitionStats) { this( metastore, @@ -111,7 +113,8 @@ public HiveMetadataFactory( zeroRowFileCreator, nodeVersion.toString(), partitionObjectBuilder, - encryptionInformationProvider); + encryptionInformationProvider, + hivePartitionStats); } public HiveMetadataFactory( @@ -139,7 +142,8 @@ public HiveMetadataFactory( ZeroRowFileCreator zeroRowFileCreator, String prestoVersion, PartitionObjectBuilder partitionObjectBuilder, - HiveEncryptionInformationProvider encryptionInformationProvider) + HiveEncryptionInformationProvider encryptionInformationProvider, + HivePartitionStats hivePartitionStats) { this.allowCorruptWritesForTesting = allowCorruptWritesForTesting; this.skipDeletionForAlter = skipDeletionForAlter; @@ -167,6 +171,7 @@ public HiveMetadataFactory( this.prestoVersion = requireNonNull(prestoVersion, "prestoVersion is null"); this.partitionObjectBuilder = requireNonNull(partitionObjectBuilder, "partitionObjectBuilder is null"); this.encryptionInformationProvider = requireNonNull(encryptionInformationProvider, "encryptionInformationProvider is null"); + this.hivePartitionStats = requireNonNull(hivePartitionStats, "hivePartitionStats is null"); if (!allowCorruptWritesForTesting && !timeZone.equals(DateTimeZone.getDefault())) { log.warn("Hive writes are disabled. " + @@ -208,6 +213,7 @@ public HiveMetadata get() stagingFileCommitter, zeroRowFileCreator, partitionObjectBuilder, - encryptionInformationProvider); + encryptionInformationProvider, + hivePartitionStats); } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HivePartitionStats.java b/presto-hive/src/main/java/com/facebook/presto/hive/HivePartitionStats.java new file mode 100644 index 0000000000000..7aa0a7feebead --- /dev/null +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HivePartitionStats.java @@ -0,0 +1,35 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.hive; + +import com.facebook.airlift.stats.DistributionStat; +import org.weakref.jmx.Managed; +import org.weakref.jmx.Nested; + +public class HivePartitionStats +{ + private final DistributionStat manifestSizeInBytes = new DistributionStat(); + + public void addManifestSizeInBytes(long bytes) + { + manifestSizeInBytes.add(bytes); + } + + @Managed + @Nested + public DistributionStat getManifestSizeInBytes() + { + return manifestSizeInBytes; + } +} diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriter.java index 4df57c56e76c6..e66cb2ad792ae 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriter.java @@ -21,6 +21,7 @@ import java.util.Optional; import java.util.function.Consumer; +import static com.facebook.presto.hive.HiveManifestUtils.getFileSize; import static com.google.common.base.MoreObjects.toStringHelper; import static java.util.Objects.requireNonNull; @@ -37,6 +38,7 @@ public class HiveWriter private long rowCount; private long inputSizeInBytes; + private Optional fileStatistics = Optional.empty(); public HiveWriter( HiveFileWriter fileWriter, @@ -84,7 +86,7 @@ public void append(Page dataPage) public void commit() { - fileWriter.commit(); + fileStatistics = fileWriter.commit(); onCommit.accept(this); } @@ -110,7 +112,7 @@ public PartitionUpdate getPartitionUpdate() updateMode, writePath, targetPath, - ImmutableList.of(fileWriteInfo), + ImmutableList.of(new FileWriteInfo(fileWriteInfo.getWriteFileName(), fileWriteInfo.getTargetFileName(), fileStatistics.map(statisticsPage -> getFileSize(statisticsPage, 0)))), rowCount, inputSizeInBytes, fileWriter.getWrittenBytes()); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriterFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriterFactory.java index d748396536333..78303dcff686e 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriterFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriterFactory.java @@ -387,7 +387,7 @@ public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt hiveFileWriter, partitionName, writerParameters.getUpdateMode(), - new FileWriteInfo(writeFileName, targetFileName), + new FileWriteInfo(writeFileName, targetFileName, Optional.empty()), writerParameters.getWriteInfo().getWritePath().toString(), writerParameters.getWriteInfo().getTargetPath().toString(), createCommitEventListener(path, partitionName, hiveFileWriter, writerParameters), diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java index fd90fd731d00c..ebcc9badf2656 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/OrcFileWriter.java @@ -47,6 +47,7 @@ import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR; import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_DATA_ERROR; import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED; +import static com.facebook.presto.hive.HiveManifestUtils.createFileStatisticsPage; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.base.MoreObjects.toStringHelper; import static java.util.Objects.requireNonNull; @@ -64,6 +65,7 @@ public class OrcFileWriter private final Optional> validationInputFactory; private long validationCpuNanos; + private long rowCount; public OrcFileWriter( DataSink dataSink, @@ -146,6 +148,7 @@ public void appendRows(Page dataPage) Page page = new Page(dataPage.getPositionCount(), blocks); try { orcWriter.write(page); + rowCount += page.getPositionCount(); } catch (IOException | UncheckedIOException e) { throw new PrestoException(HIVE_WRITER_DATA_ERROR, e); @@ -153,7 +156,7 @@ public void appendRows(Page dataPage) } @Override - public void commit() + public Optional commit() { try { orcWriter.close(); @@ -180,6 +183,8 @@ public void commit() throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e); } } + + return Optional.of(createFileStatisticsPage(getWrittenBytes(), rowCount)); } @Override diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/PartitionUpdate.java b/presto-hive/src/main/java/com/facebook/presto/hive/PartitionUpdate.java index 1061cb7d41762..0a8b6949dc5be 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/PartitionUpdate.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/PartitionUpdate.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.List; import java.util.Objects; +import java.util.Optional; import static com.facebook.presto.hive.HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED; import static com.google.common.base.MoreObjects.toStringHelper; @@ -211,14 +212,17 @@ public static class FileWriteInfo { private final String writeFileName; private final String targetFileName; + private final Optional fileSize; @JsonCreator public FileWriteInfo( @JsonProperty("writeFileName") String writeFileName, - @JsonProperty("targetFileName") String targetFileName) + @JsonProperty("targetFileName") String targetFileName, + @JsonProperty("fileSize") Optional fileSize) { this.writeFileName = requireNonNull(writeFileName, "writeFileName is null"); this.targetFileName = requireNonNull(targetFileName, "targetFileName is null"); + this.fileSize = requireNonNull(fileSize, "fileSize is null"); } @JsonProperty @@ -233,6 +237,12 @@ public String getTargetFileName() return targetFileName; } + @JsonProperty + public Optional getFileSize() + { + return fileSize; + } + @Override public boolean equals(Object o) { @@ -244,13 +254,14 @@ public boolean equals(Object o) } FileWriteInfo that = (FileWriteInfo) o; return Objects.equals(writeFileName, that.writeFileName) && - Objects.equals(targetFileName, that.targetFileName); + Objects.equals(targetFileName, that.targetFileName) && + Objects.equals(fileSize, that.fileSize); } @Override public int hashCode() { - return Objects.hash(writeFileName, targetFileName); + return Objects.hash(writeFileName, targetFileName, fileSize); } @Override @@ -259,6 +270,7 @@ public String toString() return toStringHelper(this) .add("writeFileName", writeFileName) .add("targetFileName", targetFileName) + .add("fileSize", fileSize) .toString(); } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/RcFileFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/RcFileFileWriter.java index f230b04bef476..4d2b4b8af0b09 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/RcFileFileWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/RcFileFileWriter.java @@ -130,7 +130,7 @@ public void appendRows(Page dataPage) } @Override - public void commit() + public Optional commit() { try { rcFileWriter.close(); @@ -157,6 +157,7 @@ public void commit() throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e); } } + return Optional.empty(); } @Override diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/RecordFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/RecordFileWriter.java index b94aa338a6b55..cfc2d6958c341 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/RecordFileWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/RecordFileWriter.java @@ -39,6 +39,7 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.List; +import java.util.Optional; import java.util.Properties; import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR; @@ -175,11 +176,12 @@ public void appendRow(Page dataPage, int position) } @Override - public void commit() + public Optional commit() { try { recordWriter.close(false); committed = true; + return Optional.empty(); } catch (IOException e) { throw new PrestoException(HIVE_WRITER_CLOSE_ERROR, "Error committing write to Hive", e); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/SortingFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/SortingFileWriter.java index 958430d798b6d..4a6073f3c726e 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/SortingFileWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/SortingFileWriter.java @@ -124,14 +124,13 @@ public void appendRows(Page page) } @Override - public void commit() + public Optional commit() { if (!sortBuffer.isEmpty()) { // skip temporary files entirely if the total output size is small if (tempFiles.isEmpty()) { sortBuffer.flushTo(outputWriter::appendRows); - outputWriter.commit(); - return; + return outputWriter.commit(); } flushToTempFile(); @@ -139,7 +138,7 @@ public void commit() try { writeSorted(); - outputWriter.commit(); + return outputWriter.commit(); } catch (UncheckedIOException e) { throw new PrestoException(HIVE_WRITER_CLOSE_ERROR, "Error committing write to Hive", e); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/pagefile/PageFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/pagefile/PageFileWriter.java index ee5de42e53ebf..a2684a8bd721c 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/pagefile/PageFileWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/pagefile/PageFileWriter.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.util.Optional; import java.util.concurrent.Callable; import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR; @@ -75,10 +76,11 @@ public void appendRows(Page dataPage) } @Override - public void commit() + public Optional commit() { try { pageWriter.close(); + return Optional.empty(); } catch (IOException | UncheckedIOException e) { try { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriter.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriter.java index 6a973a2ad0098..8234bdd415261 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriter.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetFileWriter.java @@ -112,10 +112,11 @@ public void appendRows(Page dataPage) } @Override - public void commit() + public Optional commit() { try { parquetWriter.close(); + return Optional.empty(); } catch (IOException | UncheckedIOException e) { try { diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java index 1c45d38994e73..88b189c4016ba 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java @@ -943,7 +943,8 @@ protected final void setup(String databaseName, HiveClientConfig hiveClientConfi new HiveZeroRowFileCreator(hdfsEnvironment, new OutputStreamDataSinkFactory(), listeningDecorator(executor)), TEST_SERVER_VERSION, new HivePartitionObjectBuilder(), - new HiveEncryptionInformationProvider(ImmutableList.of())); + new HiveEncryptionInformationProvider(ImmutableList.of()), + new HivePartitionStats()); transactionManager = new HiveTransactionManager(); encryptionInformationProvider = new HiveEncryptionInformationProvider(ImmutableList.of()); splitManager = new HiveSplitManager( diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java index e40b8f6e4ca47..04862d43fbc35 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java @@ -101,6 +101,9 @@ import static com.facebook.presto.common.type.Varchars.isVarcharType; import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY; import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR; +import static com.facebook.presto.hive.HiveManifestUtils.getFileSize; +import static com.facebook.presto.hive.HiveStorageFormat.DWRF; +import static com.facebook.presto.hive.HiveStorageFormat.ORC; import static com.facebook.presto.hive.HiveTestUtils.SESSION; import static com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER; import static com.facebook.presto.hive.HiveTestUtils.mapType; @@ -543,11 +546,23 @@ public static FileSplit createTestFile( HiveFileWriter hiveFileWriter = fileWriter.orElseThrow(() -> new IllegalArgumentException("fileWriterFactory")); hiveFileWriter.appendRows(page); - hiveFileWriter.commit(); + Optional fileStatistics = hiveFileWriter.commit(); + + assertFileStatistics(fileStatistics, hiveFileWriter.getWrittenBytes(), storageFormat); return new FileSplit(new Path(filePath), 0, new File(filePath).length(), new String[0]); } + private static void assertFileStatistics(Optional fileStatistics, long writtenBytes, HiveStorageFormat storageFormat) + { + if (storageFormat == ORC || storageFormat == DWRF) { + assertTrue(fileStatistics.isPresent()); + Page statisticsPage = fileStatistics.get(); + assertEquals(statisticsPage.getPositionCount(), 1); + assertEquals(writtenBytes, getFileSize(statisticsPage, 0)); + } + } + public static FileSplit createTestFile( String filePath, HiveStorageFormat storageFormat, diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java index 40a7e372d47d2..96b8a31aa1b7c 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java @@ -206,7 +206,8 @@ protected void setup(String host, int port, String databaseName, BiFunction manifestPage = createPartitionManifest(partitionUpdate); + assertTrue(manifestPage.isPresent()); + assertEquals(manifestPage.get().getChannelCount(), 2); + assertEquals(manifestPage.get().getPositionCount(), 1); + } + + private Page createTestStatisticsPageWithOneRow(List types, List values) + { + assertEquals(types.size(), values.size()); + PageBuilder pageBuilder = new PageBuilder(ImmutableList.copyOf(types)); + pageBuilder.declarePosition(); + for (int i = 0; i < types.size(); i++) { + Type type = types.get(i); + BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i); + Object value = values.get(i); + switch (type.getTypeSignature().getBase()) { + case BOOLEAN: + type.writeBoolean(blockBuilder, (Boolean) value); + break; + case StandardTypes.BIGINT: + type.writeLong(blockBuilder, (Long) value); + break; + case DOUBLE: + type.writeDouble(blockBuilder, (Double) value); + break; + case VARCHAR: + type.writeSlice(blockBuilder, utf8Slice((String) value)); + break; + } + } + return pageBuilder.build(); + } +} diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveMetadataFileFormatEncryptionSettings.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveMetadataFileFormatEncryptionSettings.java index 3cd3082204a12..4fbfa1ac9f7d7 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveMetadataFileFormatEncryptionSettings.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveMetadataFileFormatEncryptionSettings.java @@ -125,7 +125,8 @@ public void setup() new HiveZeroRowFileCreator(HDFS_ENVIRONMENT, new OutputStreamDataSinkFactory(), listeningDecorator(executor)), TEST_SERVER_VERSION, new HivePartitionObjectBuilder(), - new HiveEncryptionInformationProvider(ImmutableList.of(new TestDwrfEncryptionInformationSource()))); + new HiveEncryptionInformationProvider(ImmutableList.of(new TestDwrfEncryptionInformationSource())), + new HivePartitionStats()); metastore.createDatabase(Database.builder() .setDatabaseName(TEST_DB_NAME) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestPartitionUpdate.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestPartitionUpdate.java index b4b9edd124ebb..483c9160eb8a0 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestPartitionUpdate.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestPartitionUpdate.java @@ -20,6 +20,8 @@ import org.apache.hadoop.fs.Path; import org.testng.annotations.Test; +import java.util.Optional; + import static com.facebook.airlift.json.JsonCodec.jsonCodec; import static org.testng.Assert.assertEquals; @@ -35,7 +37,7 @@ public void testRoundTrip() UpdateMode.APPEND, "/writePath", "/targetPath", - ImmutableList.of(new PartitionUpdate.FileWriteInfo(".file1", "file1"), new FileWriteInfo(".file3", "file3")), + ImmutableList.of(new PartitionUpdate.FileWriteInfo(".file1", "file1", Optional.empty()), new FileWriteInfo(".file3", "file3", Optional.empty())), 123, 456, 789); @@ -46,7 +48,7 @@ public void testRoundTrip() assertEquals(actual.getUpdateMode(), UpdateMode.APPEND); assertEquals(actual.getWritePath(), new Path("/writePath")); assertEquals(actual.getTargetPath(), new Path("/targetPath")); - assertEquals(actual.getFileWriteInfos(), ImmutableList.of(new FileWriteInfo(".file1", "file1"), new FileWriteInfo(".file3", "file3"))); + assertEquals(actual.getFileWriteInfos(), ImmutableList.of(new FileWriteInfo(".file1", "file1", Optional.empty()), new FileWriteInfo(".file3", "file3", Optional.empty()))); assertEquals(actual.getRowCount(), 123); assertEquals(actual.getInMemoryDataSizeInBytes(), 456); assertEquals(actual.getOnDiskDataSizeInBytes(), 789);