Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,7 @@ jobs:
- suite-delta-lake-databricks164
- suite-ranger
- suite-gcs
- suite-hive4
- suite-clients
- suite-functions
- suite-tpch
Expand Down
4 changes: 3 additions & 1 deletion lib/trino-orc/src/main/java/io/trino/orc/OrcWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
import static io.trino.orc.OrcWriterStats.FlushReason.DICTIONARY_FULL;
import static io.trino.orc.OrcWriterStats.FlushReason.MAX_BYTES;
import static io.trino.orc.OrcWriterStats.FlushReason.MAX_ROWS;
import static io.trino.orc.metadata.CalendarKind.PROLEPTIC_GREGORIAN;
import static io.trino.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT;
import static io.trino.orc.metadata.OrcColumnId.ROOT_COLUMN;
import static io.trino.orc.metadata.PostScript.MAGIC;
Expand Down Expand Up @@ -529,7 +530,8 @@ private List<OrcDataOutput> bufferFileFooter()
orcTypes,
fileStats,
userMetadata,
Optional.empty()); // writer id will be set by MetadataWriter
Optional.empty(), // writer id will be set by MetadataWriter
PROLEPTIC_GREGORIAN);

closedStripes.clear();
closedStripesRetainedBytes = 0;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.orc.metadata;

public enum CalendarKind
{
UNKNOWN_CALENDAR,
JULIAN_GREGORIAN,
PROLEPTIC_GREGORIAN
}
11 changes: 10 additions & 1 deletion lib/trino-orc/src/main/java/io/trino/orc/metadata/Footer.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public class Footer
private final Optional<ColumnMetadata<ColumnStatistics>> fileStats;
private final Map<String, Slice> userMetadata;
private final Optional<Integer> writerId;
private final CalendarKind calendar;

public Footer(
long numberOfRows,
Expand All @@ -45,7 +46,8 @@ public Footer(
ColumnMetadata<OrcType> types,
Optional<ColumnMetadata<ColumnStatistics>> fileStats,
Map<String, Slice> userMetadata,
Optional<Integer> writerId)
Optional<Integer> writerId,
CalendarKind calendar)
{
this.numberOfRows = numberOfRows;
rowsInRowGroup.ifPresent(value -> checkArgument(value > 0, "rowsInRowGroup must be at least 1"));
Expand All @@ -56,6 +58,7 @@ public Footer(
requireNonNull(userMetadata, "userMetadata is null");
this.userMetadata = ImmutableMap.copyOf(transformValues(userMetadata, Slice::copy));
this.writerId = requireNonNull(writerId, "writerId is null");
this.calendar = requireNonNull(calendar, "calendar is null");
}

public long getNumberOfRows()
Expand Down Expand Up @@ -93,6 +96,11 @@ public Optional<Integer> getWriterId()
return writerId;
}

public CalendarKind getCalendar()
{
return calendar;
}

@Override
public String toString()
{
Expand All @@ -104,6 +112,7 @@ public String toString()
.add("columnStatistics", fileStats)
.add("userMetadata", userMetadata.keySet())
.add("writerId", writerId)
.add("calendar", calendar)
.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@
import static io.airlift.slice.SliceUtf8.lengthOfCodePoint;
import static io.airlift.slice.SliceUtf8.tryGetCodePointAt;
import static io.airlift.units.DataSize.Unit.GIGABYTE;
import static io.trino.orc.metadata.CalendarKind.JULIAN_GREGORIAN;
import static io.trino.orc.metadata.CalendarKind.PROLEPTIC_GREGORIAN;
import static io.trino.orc.metadata.CalendarKind.UNKNOWN_CALENDAR;
import static io.trino.orc.metadata.CompressionKind.LZ4;
import static io.trino.orc.metadata.CompressionKind.NONE;
import static io.trino.orc.metadata.CompressionKind.SNAPPY;
Expand Down Expand Up @@ -150,7 +153,8 @@ public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputS
toType(footer.getTypesList()),
toColumnStatistics(hiveWriterVersion, footer.getStatisticsList(), false),
toUserMetadata(footer.getMetadataList()),
Optional.of(footer.getWriter()));
Optional.of(footer.getWriter()),
toTrinoOrcCalendarKind(footer.getCalendar()));
}

private static List<StripeInformation> toStripeInformation(List<OrcProto.StripeInformation> types)
Expand Down Expand Up @@ -409,6 +413,16 @@ private static BinaryStatistics toBinaryStatistics(OrcProto.BinaryStatistics bin
return new BinaryStatistics(binaryStatistics.getSum());
}

private static CalendarKind toTrinoOrcCalendarKind(OrcProto.CalendarKind calendarKind)
{
return switch (calendarKind) {
case null -> UNKNOWN_CALENDAR;
case OrcProto.CalendarKind.UNKNOWN_CALENDAR -> UNKNOWN_CALENDAR;
case OrcProto.CalendarKind.JULIAN_GREGORIAN -> JULIAN_GREGORIAN;
case OrcProto.CalendarKind.PROLEPTIC_GREGORIAN -> PROLEPTIC_GREGORIAN;
};
}

private static Slice byteStringToSlice(ByteString value)
{
return Slices.wrappedBuffer(value.toByteArray());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ public int writeFooter(SliceOutput output, Footer footer)
.collect(toList()))
.addAllMetadata(footer.getUserMetadata().entrySet().stream()
.map(OrcMetadataWriter::toUserMetadata)
.collect(toList()));
.collect(toList()))
.setCalendar(toOrcCalendarKind(footer.getCalendar()));

setWriter(builder);

Expand Down Expand Up @@ -361,6 +362,15 @@ private static OrcProto.Stream.Kind toStreamKind(StreamKind streamKind)
throw new IllegalArgumentException("Unsupported stream kind: " + streamKind);
}

private static OrcProto.CalendarKind toOrcCalendarKind(CalendarKind calendarKind)
{
return switch (calendarKind) {
case UNKNOWN_CALENDAR -> OrcProto.CalendarKind.UNKNOWN_CALENDAR;
case JULIAN_GREGORIAN -> OrcProto.CalendarKind.JULIAN_GREGORIAN;
case PROLEPTIC_GREGORIAN -> OrcProto.CalendarKind.PROLEPTIC_GREGORIAN;
};
}

private static OrcProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings)
{
return OrcProto.ColumnEncoding.newBuilder()
Expand Down
43 changes: 43 additions & 0 deletions lib/trino-orc/src/test/java/io/trino/orc/TestOrcWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import io.airlift.units.DataSize;
import io.trino.filesystem.local.LocalOutputFile;
import io.trino.orc.OrcWriteValidation.OrcWriteValidationMode;
import io.trino.orc.metadata.CompressionKind;
import io.trino.orc.metadata.Footer;
import io.trino.orc.metadata.OrcMetadataReader;
import io.trino.orc.metadata.OrcType;
Expand All @@ -32,23 +33,33 @@
import io.trino.spi.Page;
import io.trino.spi.block.Block;
import io.trino.spi.block.VariableWidthBlockBuilder;
import io.trino.spi.type.SqlDate;
import io.trino.spi.type.SqlTimestamp;
import io.trino.spi.type.Type;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.io.InputStream;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.List;
import java.util.Optional;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext;
import static io.trino.orc.OrcTester.READER_OPTIONS;
import static io.trino.orc.StripeReader.isIndexStream;
import static io.trino.orc.TestingOrcPredicate.ORC_ROW_GROUP_SIZE;
import static io.trino.orc.TestingOrcPredicate.ORC_STRIPE_SIZE;
import static io.trino.orc.metadata.CalendarKind.PROLEPTIC_GREGORIAN;
import static io.trino.orc.metadata.CompressionKind.NONE;
import static io.trino.spi.type.DateType.DATE;
import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS;
import static io.trino.spi.type.VarcharType.VARCHAR;
import static io.trino.testing.DateTimeTestingUtils.sqlDateOf;
import static io.trino.testing.DateTimeTestingUtils.sqlTimestampOf;
import static java.lang.Math.toIntExact;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.assertj.core.api.Assertions.assertThat;
Expand Down Expand Up @@ -78,6 +89,38 @@ public void testWriteHugeChunk()
testWriteOutput(columnNameBuilder.build(), data);
}

@Test
public void testCalendarEntryInFooter()
{
List<String> strings = ImmutableList.of("aaa1", "qwerty", "asdf", "zxcvb", "1234");
assertFooterHasProlepticGregorianCalendar(VARCHAR, strings);

List<SqlDate> dates = ImmutableList.of("2020-01-01", "2021-02-02", "2022-03-03", "2023-04-04", "2024-05-05").stream()
.map(text -> sqlDateOf(LocalDate.parse(text)))
.collect(toImmutableList());
assertFooterHasProlepticGregorianCalendar(DATE, dates);

List<SqlTimestamp> timestamps = ImmutableList.of("2023-04-11T05:16:12.123", "2021-04-11T05:16:12.123", "1999-04-11T05:16:12.123").stream()
.map(text -> sqlTimestampOf(TIMESTAMP_MILLIS.getPrecision(), LocalDateTime.parse(text)))
.collect(toImmutableList());
assertFooterHasProlepticGregorianCalendar(TIMESTAMP_MILLIS, timestamps);
}

private static void assertFooterHasProlepticGregorianCalendar(Type type, List<?> values)
{
try (TempFile tempFile = new TempFile()) {
OrcTester.writeOrcColumnTrino(tempFile.getFile(), CompressionKind.NONE, type, values.iterator(), new OrcWriterStats());

OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), READER_OPTIONS);

assertThat(OrcReader.createOrcReader(orcDataSource, READER_OPTIONS).orElseThrow(() -> new RuntimeException("File is empty")).getFooter().getCalendar())
.isEqualTo(PROLEPTIC_GREGORIAN);
}
catch (Exception e) {
throw new RuntimeException(e);
}
}

private void testWriteOutput(List<String> columnNames, String[] data)
throws IOException
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public final class TestGroups
public static final String HDFS_IMPERSONATION = "hdfs_impersonation";
public static final String HDFS_NO_IMPERSONATION = "hdfs_no_impersonation";
public static final String HIVE_GCS = "hive_gcs";
public static final String HIVE4 = "hive4";
public static final String HIVE_SPARK = "hive_spark";
public static final String HIVE_SPARK_NO_STATS_FALLBACK = "hive_spark_no_stats_fallback";
public static final String HIVE_COMPRESSION = "hive_compression";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import io.trino.tests.product.launcher.env.common.HadoopKerberos;
import io.trino.tests.product.launcher.env.common.HadoopKerberosKms;
import io.trino.tests.product.launcher.env.common.HadoopKerberosKmsWithImpersonation;
import io.trino.tests.product.launcher.env.common.Hive4WithMinio;
import io.trino.tests.product.launcher.env.common.HttpProxy;
import io.trino.tests.product.launcher.env.common.HttpsProxy;
import io.trino.tests.product.launcher.env.common.HydraIdentityProvider;
Expand Down Expand Up @@ -99,6 +100,7 @@ public void configure(Binder binder)
binder.bind(OpenLdapReferral.class).in(SINGLETON);
binder.bind(HttpProxy.class).in(SINGLETON);
binder.bind(HttpsProxy.class).in(SINGLETON);
binder.bind(Hive4WithMinio.class).in(SINGLETON);

MapBinder<String, EnvironmentProvider> environments = newMapBinder(binder, String.class, EnvironmentProvider.class);
findEnvironmentsByBasePackage(ENVIRONMENT_PACKAGE).forEach(clazz -> environments.addBinding(nameForEnvironmentClass(clazz)).to(clazz).in(SINGLETON));
Expand Down
Loading