diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java index 1d20aaecdaa5..51bae29cb931 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java @@ -93,6 +93,7 @@ public void setup(Binder binder) binder.bind(Key.get(boolean.class, TranslateHiveViews.class)).toInstance(false); configBinder(binder).bindConfig(ParquetReaderConfig.class); configBinder(binder).bindConfig(ParquetWriterConfig.class); + configBinder(binder).bindConfigDefaults(ParquetWriterConfig.class, config -> config.setParquetOptimizedWriterEnabled(true)); install(new ConnectorAccessControlModule()); newOptionalBinder(binder, DeltaLakeAccessControlMetadataFactory.class) diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatistics.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatistics.java index b799b4b7b864..5ac4fe9d23e8 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatistics.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatistics.java @@ -13,29 +13,9 @@ */ package io.trino.plugin.deltalake; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import io.trino.plugin.deltalake.transactionlog.AddFileEntry; -import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics; -import io.trino.plugin.hive.parquet.ParquetWriterConfig; -import io.trino.testing.QueryRunner; -import org.testng.annotations.Test; -import java.time.Instant; -import java.time.ZonedDateTime; -import java.util.List; import java.util.Map; -import java.util.Optional; -import java.util.function.Function; - -import static com.google.common.base.Verify.verify; -import static com.google.common.collect.Iterables.getOnlyElement; -import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; -import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; -import static io.trino.spi.type.TimeZoneKey.UTC_KEY; -import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS; -import static org.assertj.core.api.Assertions.assertThat; -import static org.testng.Assert.assertEquals; public class TestDeltaLakeCreateTableStatistics extends AbstractTestDeltaLakeCreateTableStatistics @@ -45,63 +25,4 @@ Map additionalProperties() { return ImmutableMap.of(); } - - @Override - protected QueryRunner createQueryRunner() - throws Exception - { - verify(!new ParquetWriterConfig().isParquetOptimizedWriterEnabled(), "This test assumes the optimized Parquet writer is disabled by default"); - return super.createQueryRunner(); - } - - @Override - @Test - public void testTimestampMilliRecords() - throws Exception - { - String columnName = "t_timestamp"; - DeltaLakeColumnHandle columnHandle = new DeltaLakeColumnHandle(columnName, TIMESTAMP_TZ_MILLIS, REGULAR); - try (TestTable table = new TestTable( - "test_timestamp_records_", - ImmutableList.of(columnName), - "VALUES timestamp '2012-10-31 01:00:00.123 America/New_York', timestamp '2012-10-31 01:00:00.123 America/Los_Angeles', timestamp '2012-10-31 01:00:00.123 UTC'")) { - List addFileEntries = getAddFileEntries(table.getName()); - AddFileEntry entry = getOnlyElement(addFileEntries); - assertThat(entry.getStats()).isPresent(); - DeltaLakeFileStatistics fileStatistics = entry.getStats().get(); - - assertEquals(fileStatistics.getNumRecords(), Optional.of(3L)); - assertEquals(fileStatistics.getMinColumnValue(columnHandle), Optional.empty()); - assertEquals(fileStatistics.getMaxColumnValue(columnHandle), Optional.empty()); - assertEquals(fileStatistics.getNullCount(columnName), Optional.empty()); - } - } - - // int96 timestamp Statistics are only populated if a row group contains a single value - @Test - public void testTimestampMilliSingleRecord() - throws Exception - { - String columnName = "t_timestamp"; - DeltaLakeColumnHandle columnHandle = new DeltaLakeColumnHandle(columnName, TIMESTAMP_TZ_MILLIS, REGULAR); - try (TestTable table = new TestTable( - "test_timestamp_single_record_", - ImmutableList.of(columnName), - "VALUES timestamp '2012-10-31 04:00:00.123 America/New_York', timestamp '2012-10-31 01:00:00.123 America/Los_Angeles', null")) { - List addFileEntries = getAddFileEntries(table.getName()); - AddFileEntry entry = getOnlyElement(addFileEntries); - assertThat(entry.getStats()).isPresent(); - DeltaLakeFileStatistics fileStatistics = entry.getStats().get(); - - assertEquals(fileStatistics.getNumRecords(), Optional.of(3L)); - Function timestampValueConverter = valueString -> { - ZonedDateTime zonedDateTime = ZonedDateTime.parse(valueString); - Instant instant = zonedDateTime.toInstant(); - return packDateTimeWithZone(instant.toEpochMilli(), UTC_KEY); - }; - assertEquals(fileStatistics.getMinColumnValue(columnHandle), Optional.of(timestampValueConverter.apply("2012-10-31T08:00:00.123Z"))); - assertEquals(fileStatistics.getMaxColumnValue(columnHandle), Optional.of(timestampValueConverter.apply("2012-10-31T08:00:00.123Z"))); - assertEquals(fileStatistics.getNullCount(columnName), Optional.of(1L)); - } - } } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatisticsLegacyWriter.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatisticsLegacyWriter.java new file mode 100644 index 000000000000..7f8ff9c433e6 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatisticsLegacyWriter.java @@ -0,0 +1,96 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.trino.plugin.deltalake.transactionlog.AddFileEntry; +import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics; +import org.testng.annotations.Test; + +import java.time.Instant; +import java.time.ZonedDateTime; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; + +import static com.google.common.collect.Iterables.getOnlyElement; +import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; +import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; +import static io.trino.spi.type.TimeZoneKey.UTC_KEY; +import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS; +import static org.assertj.core.api.Assertions.assertThat; +import static org.testng.Assert.assertEquals; + +public class TestDeltaLakeCreateTableStatisticsLegacyWriter + extends AbstractTestDeltaLakeCreateTableStatistics +{ + @Override + Map additionalProperties() + { + return ImmutableMap.of("parquet.experimental-optimized-writer.enabled", "false"); + } + + @Override + @Test + public void testTimestampMilliRecords() + throws Exception + { + String columnName = "t_timestamp"; + DeltaLakeColumnHandle columnHandle = new DeltaLakeColumnHandle(columnName, TIMESTAMP_TZ_MILLIS, REGULAR); + try (TestTable table = new TestTable( + "test_timestamp_records_", + ImmutableList.of(columnName), + "VALUES timestamp '2012-10-31 01:00:00.123 America/New_York', timestamp '2012-10-31 01:00:00.123 America/Los_Angeles', timestamp '2012-10-31 01:00:00.123 UTC'")) { + List addFileEntries = getAddFileEntries(table.getName()); + AddFileEntry entry = getOnlyElement(addFileEntries); + assertThat(entry.getStats()).isPresent(); + DeltaLakeFileStatistics fileStatistics = entry.getStats().get(); + + assertEquals(fileStatistics.getNumRecords(), Optional.of(3L)); + assertEquals(fileStatistics.getMinColumnValue(columnHandle), Optional.empty()); + assertEquals(fileStatistics.getMaxColumnValue(columnHandle), Optional.empty()); + assertEquals(fileStatistics.getNullCount(columnName), Optional.empty()); + } + } + + // int96 timestamp Statistics are only populated if a row group contains a single value + @Test + public void testTimestampMilliSingleRecord() + throws Exception + { + String columnName = "t_timestamp"; + DeltaLakeColumnHandle columnHandle = new DeltaLakeColumnHandle(columnName, TIMESTAMP_TZ_MILLIS, REGULAR); + try (TestTable table = new TestTable( + "test_timestamp_single_record_", + ImmutableList.of(columnName), + "VALUES timestamp '2012-10-31 04:00:00.123 America/New_York', timestamp '2012-10-31 01:00:00.123 America/Los_Angeles', null")) { + List addFileEntries = getAddFileEntries(table.getName()); + AddFileEntry entry = getOnlyElement(addFileEntries); + assertThat(entry.getStats()).isPresent(); + DeltaLakeFileStatistics fileStatistics = entry.getStats().get(); + + assertEquals(fileStatistics.getNumRecords(), Optional.of(3L)); + Function timestampValueConverter = valueString -> { + ZonedDateTime zonedDateTime = ZonedDateTime.parse(valueString); + Instant instant = zonedDateTime.toInstant(); + return packDateTimeWithZone(instant.toEpochMilli(), UTC_KEY); + }; + assertEquals(fileStatistics.getMinColumnValue(columnHandle), Optional.of(timestampValueConverter.apply("2012-10-31T08:00:00.123Z"))); + assertEquals(fileStatistics.getMaxColumnValue(columnHandle), Optional.of(timestampValueConverter.apply("2012-10-31T08:00:00.123Z"))); + assertEquals(fileStatistics.getNullCount(columnName), Optional.of(1L)); + } + } +} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatisticsOptimizedWriter.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatisticsOptimizedWriter.java deleted file mode 100644 index da3f4ee9fbb2..000000000000 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatisticsOptimizedWriter.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.deltalake; - -import com.google.common.collect.ImmutableMap; - -import java.util.Map; - -public class TestDeltaLakeCreateTableStatisticsOptimizedWriter - extends AbstractTestDeltaLakeCreateTableStatistics -{ - @Override - Map additionalProperties() - { - return ImmutableMap.of("parquet.experimental-optimized-writer.enabled", "true"); - } -} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeOptimizedWriterConnectorSmokeTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeLegacyWriterConnectorSmokeTest.java similarity index 95% rename from plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeOptimizedWriterConnectorSmokeTest.java rename to plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeLegacyWriterConnectorSmokeTest.java index d3c85a4bd497..935fb428bc00 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeOptimizedWriterConnectorSmokeTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeLegacyWriterConnectorSmokeTest.java @@ -20,7 +20,7 @@ import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; -public class TestDeltaLakeOptimizedWriterConnectorSmokeTest +public class TestDeltaLakeLegacyWriterConnectorSmokeTest extends BaseDeltaLakeAwsConnectorSmokeTest { @Override @@ -32,7 +32,7 @@ protected QueryRunner createDeltaLakeQueryRunner(Map connectorPr SCHEMA, ImmutableMap.builder() .putAll(connectorProperties) - .put("parquet.experimental-optimized-writer.enabled", "true") + .put("parquet.experimental-optimized-writer.enabled", "false") .put("delta.enable-non-concurrent-writes", "true") .put("hive.s3.max-connections", "2") .buildOrThrow(),