diff --git a/presto-orc/src/main/java/io/prestosql/orc/OrcDataSourceUtils.java b/presto-orc/src/main/java/io/prestosql/orc/OrcDataSourceUtils.java index 96ee64c49589..7fa865741a37 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/OrcDataSourceUtils.java +++ b/presto-orc/src/main/java/io/prestosql/orc/OrcDataSourceUtils.java @@ -20,13 +20,12 @@ import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import static java.lang.Math.toIntExact; +import static java.util.Comparator.comparingLong; public final class OrcDataSourceUtils { @@ -41,14 +40,7 @@ public static List mergeAdjacentDiskRanges(Collection disk { // sort ranges by start offset List ranges = new ArrayList<>(diskRanges); - Collections.sort(ranges, new Comparator() - { - @Override - public int compare(DiskRange o1, DiskRange o2) - { - return Long.compare(o1.getOffset(), o2.getOffset()); - } - }); + ranges.sort(comparingLong(DiskRange::getOffset)); // merge overlapping ranges long maxReadSizeBytes = maxReadSize.toBytes(); diff --git a/presto-orc/src/main/java/io/prestosql/orc/OrcRecordReader.java b/presto-orc/src/main/java/io/prestosql/orc/OrcRecordReader.java index 475fa12b0178..c3a2a9c6e526 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/OrcRecordReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/OrcRecordReader.java @@ -14,7 +14,6 @@ package io.prestosql.orc; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Predicate; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; @@ -47,12 +46,12 @@ import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Predicate; import java.util.stream.Collectors; import static com.google.common.base.Preconditions.checkArgument; @@ -93,7 +92,6 @@ public class OrcRecordReader private final StripeReader stripeReader; private int currentStripe = -1; private AggregatedMemoryContext currentStripeSystemMemoryContext; - private AggregatedMemoryContext streamReadersSystemMemoryContext; private final long fileRowCount; private final List stripeFilePositions; @@ -186,7 +184,7 @@ public OrcRecordReader( } stripeInfos.add(new StripeInfo(fileStripes.get(i), stats)); } - Collections.sort(stripeInfos, comparingLong(info -> info.getStripe().getOffset())); + stripeInfos.sort(comparingLong(info -> info.getStripe().getOffset())); long totalRowCount = 0; long fileRowCount = 0; @@ -225,7 +223,7 @@ public OrcRecordReader( // doesn't have a local buffer. All non-leaf level StreamReaders' (e.g. MapStreamReader, LongStreamReader, // ListStreamReader and StructStreamReader) instance sizes were not counted, because calling setBytes() in // their constructors is confusing. - this.streamReadersSystemMemoryContext = this.systemMemoryUsage.newAggregatedMemoryContext(); + AggregatedMemoryContext streamReadersSystemMemoryContext = this.systemMemoryUsage.newAggregatedMemoryContext(); stripeReader = new StripeReader( orcDataSource, @@ -256,10 +254,11 @@ private static boolean isStripeIncluded( OrcPredicate predicate) { // if there are no stats, include the column - if (!stripeStats.isPresent()) { - return true; - } - return predicate.matches(stripe.getNumberOfRows(), getStatisticsByColumnOrdinal(rootStructType, stripeStats.get().getColumnStatistics())); + return stripeStats + .map(StripeStatistics::getColumnStatistics) + .map(columnStats -> getStatisticsByColumnOrdinal(rootStructType, columnStats)) + .map(statsByColumn -> predicate.matches(stripe.getNumberOfRows(), statsByColumn)) + .orElse(true); } @VisibleForTesting @@ -341,7 +340,7 @@ public void close() closer.register(orcDataSource); for (StreamReader column : streamReaders) { if (column != null) { - closer.register(() -> column.close()); + closer.register(column::close); } } } @@ -525,16 +524,14 @@ private void advanceToNextStripe() private void validateWrite(Predicate test, String messageFormat, Object... args) throws OrcCorruptionException { - if (writeValidation.isPresent() && !test.apply(writeValidation.get())) { + if (writeValidation.isPresent() && !test.test(writeValidation.get())) { throw new OrcCorruptionException(orcDataSource.getId(), "Write validation failed: " + messageFormat, args); } } private void validateWriteStripe(int rowCount) { - if (writeChecksumBuilder.isPresent()) { - writeChecksumBuilder.get().addStripe(rowCount); - } + writeChecksumBuilder.ifPresent(builder -> builder.addStripe(rowCount)); } private void validateWritePageChecksum() @@ -680,14 +677,13 @@ public Optional getStats() } } - @VisibleForTesting static class LinearProbeRangeFinder implements CachingOrcDataSource.RegionFinder { private final List diskRanges; private int index; - public LinearProbeRangeFinder(List diskRanges) + private LinearProbeRangeFinder(List diskRanges) { this.diskRanges = diskRanges; } @@ -697,19 +693,20 @@ public DiskRange getRangeFor(long desiredOffset) { // Assumption: range are always read in order // Assumption: bytes that are not part of any range are never read - for (; index < diskRanges.size(); index++) { + while (index < diskRanges.size()) { DiskRange range = diskRanges.get(index); if (range.getEnd() > desiredOffset) { checkArgument(range.getOffset() <= desiredOffset); return range; } + index++; } throw new IllegalArgumentException("Invalid desiredOffset " + desiredOffset); } public static LinearProbeRangeFinder createTinyStripesRangeFinder(List stripes, DataSize maxMergeDistance, DataSize tinyStripeThreshold) { - if (stripes.size() == 0) { + if (stripes.isEmpty()) { return new LinearProbeRangeFinder(ImmutableList.of()); } diff --git a/presto-orc/src/main/java/io/prestosql/orc/OrcWriter.java b/presto-orc/src/main/java/io/prestosql/orc/OrcWriter.java index 9cc2cca70173..954b9d5e95b5 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/OrcWriter.java +++ b/presto-orc/src/main/java/io/prestosql/orc/OrcWriter.java @@ -16,7 +16,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import io.airlift.log.Logger; +import com.google.common.primitives.UnsignedBytes; import io.airlift.slice.Slice; import io.airlift.slice.Slices; import io.prestosql.orc.OrcWriteValidation.OrcWriteValidationBuilder; @@ -58,6 +58,7 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Verify.verify; +import static com.google.common.collect.ImmutableList.toImmutableList; import static io.airlift.slice.Slices.utf8Slice; import static io.prestosql.orc.OrcReader.validateFile; import static io.prestosql.orc.OrcWriterStats.FlushReason.CLOSED; @@ -73,11 +74,10 @@ import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; -public class OrcWriter +public final class OrcWriter implements Closeable { private static final int INSTANCE_SIZE = ClassLayout.parseClass(OrcWriter.class).instanceSize(); - private static final Logger log = Logger.get(OrcWriter.class); static final String PRESTO_ORC_WRITER_VERSION_METADATA_KEY = "presto.writer.version"; static final String PRESTO_ORC_WRITER_VERSION; @@ -116,7 +116,7 @@ public class OrcWriter private boolean closed; @Nullable - private final OrcWriteValidation.OrcWriteValidationBuilder validationBuilder; + private final OrcWriteValidationBuilder validationBuilder; public OrcWriter( OrcDataSink orcDataSink, @@ -131,7 +131,8 @@ public OrcWriter( OrcWriteValidationMode validationMode, OrcWriterStats stats) { - this.validationBuilder = validate ? new OrcWriteValidation.OrcWriteValidationBuilder(validationMode, types).setStringStatisticsLimitInBytes(toIntExact(options.getMaxStringStatisticsLimit().toBytes())) : null; + this.validationBuilder = validate ? new OrcWriteValidationBuilder(validationMode, types) + .setStringStatisticsLimitInBytes(toIntExact(options.getMaxStringStatisticsLimit().toBytes())) : null; this.orcDataSink = requireNonNull(orcDataSink, "orcDataSink is null"); this.types = ImmutableList.copyOf(requireNonNull(types, "types is null")); @@ -477,7 +478,7 @@ private List bufferFileFooter() rowGroupMaxRowCount, closedStripes.stream() .map(ClosedStripe::getStripeInformation) - .collect(toList()), + .collect(toImmutableList()), orcTypes, fileStats, userMetadata); @@ -491,7 +492,7 @@ private List bufferFileFooter() recordValidation(validation -> validation.setVersion(metadataWriter.getOrcMetadataVersion())); Slice postscriptSlice = metadataWriter.writePostscript(footerSlice.length(), metadataSlice.length(), compression, maxCompressionBufferSize); outputData.add(createDataOutput(postscriptSlice)); - outputData.add(createDataOutput(Slices.wrappedBuffer((byte) postscriptSlice.length()))); + outputData.add(createDataOutput(Slices.wrappedBuffer(UnsignedBytes.checkedCast(postscriptSlice.length())))); return outputData; } @@ -518,7 +519,7 @@ public void validate(OrcDataSource input) private static List toDenseList(Map data, int expectedSize) { checkArgument(data.size() == expectedSize); - ArrayList list = new ArrayList<>(expectedSize); + List list = new ArrayList<>(expectedSize); for (int i = 0; i < expectedSize; i++) { list.add(data.get(i)); } diff --git a/presto-orc/src/main/java/io/prestosql/orc/checkpoint/StreamCheckpoint.java b/presto-orc/src/main/java/io/prestosql/orc/checkpoint/StreamCheckpoint.java index cc798a5dd241..efe976ffb2a6 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/checkpoint/StreamCheckpoint.java +++ b/presto-orc/src/main/java/io/prestosql/orc/checkpoint/StreamCheckpoint.java @@ -13,6 +13,7 @@ */ package io.prestosql.orc.checkpoint; +@SuppressWarnings("MarkerInterface") public interface StreamCheckpoint { } diff --git a/presto-orc/src/main/java/io/prestosql/orc/metadata/DwrfMetadataReader.java b/presto-orc/src/main/java/io/prestosql/orc/metadata/DwrfMetadataReader.java index ec41917ca596..18750014dccb 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/metadata/DwrfMetadataReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/metadata/DwrfMetadataReader.java @@ -16,7 +16,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Iterables; import io.airlift.slice.Slice; import io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind; import io.prestosql.orc.metadata.OrcType.OrcTypeKind; @@ -40,7 +39,8 @@ import java.util.Map; import java.util.Optional; -import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.collect.ImmutableList.toImmutableList; import static io.prestosql.orc.metadata.CompressionKind.LZ4; import static io.prestosql.orc.metadata.CompressionKind.NONE; import static io.prestosql.orc.metadata.CompressionKind.SNAPPY; @@ -108,7 +108,9 @@ public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputS private static List toStripeInformation(List types) { - return ImmutableList.copyOf(Iterables.transform(types, DwrfMetadataReader::toStripeInformation)); + return types.stream() + .map(DwrfMetadataReader::toStripeInformation) + .collect(toImmutableList()); } private static StripeInformation toStripeInformation(DwrfProto.StripeInformation stripeInformation) @@ -137,7 +139,9 @@ private static Stream toStream(DwrfProto.Stream stream) private static List toStream(List streams) { - return ImmutableList.copyOf(Iterables.transform(streams, DwrfMetadataReader::toStream)); + return streams.stream() + .map(DwrfMetadataReader::toStream) + .collect(toImmutableList()); } private static DwrfSequenceEncoding toSequenceEncoding(OrcType type, DwrfProto.ColumnEncoding columnEncoding) @@ -202,7 +206,9 @@ public List readRowIndexes(HiveWriterVersion hiveWriterVersion, I { CodedInputStream input = CodedInputStream.newInstance(inputStream); DwrfProto.RowIndex rowIndex = DwrfProto.RowIndex.parseFrom(input); - return ImmutableList.copyOf(Iterables.transform(rowIndex.getEntryList(), rowIndexEntry -> toRowGroupIndex(hiveWriterVersion, rowIndexEntry))); + return rowIndex.getEntryList().stream() + .map(entry -> toRowGroupIndex(hiveWriterVersion, entry)) + .collect(toImmutableList()); } @Override @@ -218,24 +224,17 @@ private static RowGroupIndex toRowGroupIndex(HiveWriterVersion hiveWriterVersion ImmutableList.Builder positions = ImmutableList.builder(); for (int index = 0; index < positionsList.size(); index++) { long longPosition = positionsList.get(index); - int intPosition = (int) longPosition; - checkState(intPosition == longPosition, "Expected checkpoint position %s, to be an integer", index); + @SuppressWarnings("NumericCastThatLosesPrecision") + int intPosition = (int) longPosition; + checkArgument(intPosition == longPosition, "Expected checkpoint position [%s] value [%s] to be an integer", index, longPosition); positions.add(intPosition); } return new RowGroupIndex(positions.build(), toColumnStatistics(hiveWriterVersion, rowIndexEntry.getStatistics(), true)); } - private static List toColumnStatistics(HiveWriterVersion hiveWriterVersion, List columnStatistics, boolean isRowGroup) - { - if (columnStatistics == null) { - return ImmutableList.of(); - } - return ImmutableList.copyOf(Iterables.transform(columnStatistics, statistics -> toColumnStatistics(hiveWriterVersion, statistics, isRowGroup))); - } - - private Map toUserMetadata(List metadataList) + private static Map toUserMetadata(List metadataList) { ImmutableMap.Builder mapBuilder = ImmutableMap.builder(); for (DwrfProto.UserMetadataItem item : metadataList) { @@ -350,7 +349,9 @@ private static OrcType toType(DwrfProto.Type type) private static List toType(List types) { - return ImmutableList.copyOf(Iterables.transform(types, DwrfMetadataReader::toType)); + return types.stream() + .map(DwrfMetadataReader::toType) + .collect(toImmutableList()); } private static OrcTypeKind toTypeKind(DwrfProto.Type.Kind kind) @@ -426,9 +427,7 @@ private static ColumnEncodingKind toColumnEncodingKind(OrcTypeKind type, DwrfPro if (type == OrcTypeKind.SHORT || type == OrcTypeKind.INT || type == OrcTypeKind.LONG) { return ColumnEncodingKind.DWRF_DIRECT; } - else { - return ColumnEncodingKind.DIRECT; - } + return ColumnEncodingKind.DIRECT; case DICTIONARY: return ColumnEncodingKind.DICTIONARY; case MAP_FLAT: diff --git a/presto-orc/src/main/java/io/prestosql/orc/metadata/OrcMetadataReader.java b/presto-orc/src/main/java/io/prestosql/orc/metadata/OrcMetadataReader.java index d1449366f7fd..d0a1ca888c91 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/metadata/OrcMetadataReader.java +++ b/presto-orc/src/main/java/io/prestosql/orc/metadata/OrcMetadataReader.java @@ -45,7 +45,7 @@ import java.util.Map; import java.util.Optional; -import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.airlift.slice.SliceUtf8.lengthOfCodePoint; import static io.airlift.slice.SliceUtf8.tryGetCodePointAt; @@ -217,9 +217,10 @@ private static RowGroupIndex toRowGroupIndex(HiveWriterVersion hiveWriterVersion ImmutableList.Builder positions = ImmutableList.builder(); for (int index = 0; index < positionsList.size(); index++) { long longPosition = positionsList.get(index); - int intPosition = (int) longPosition; - checkState(intPosition == longPosition, "Expected checkpoint position %s, to be an integer", index); + @SuppressWarnings("NumericCastThatLosesPrecision") + int intPosition = (int) longPosition; + checkArgument(intPosition == longPosition, "Expected checkpoint position [%s] value [%s] to be an integer", index, longPosition); positions.add(intPosition); } diff --git a/presto-orc/src/main/java/io/prestosql/orc/metadata/OrcType.java b/presto-orc/src/main/java/io/prestosql/orc/metadata/OrcType.java index 46173faffd2e..69238d25cfa8 100644 --- a/presto-orc/src/main/java/io/prestosql/orc/metadata/OrcType.java +++ b/presto-orc/src/main/java/io/prestosql/orc/metadata/OrcType.java @@ -273,13 +273,13 @@ public static List createOrcRowType(int nextFieldTypeIndex, List orcTypes = new ArrayList<>(); + ImmutableList.Builder orcTypes = ImmutableList.builder(); orcTypes.add(new OrcType( OrcTypeKind.STRUCT, fieldTypeIndexes, fieldNames)); fieldTypesList.forEach(orcTypes::addAll); - return orcTypes; + return orcTypes.build(); } }