Skip to content

Commit f733ee2

Browse files
committed
Set Calendar type used for temporal types in ORC footer
1 parent dc5d602 commit f733ee2

File tree

7 files changed

+117
-4
lines changed

7 files changed

+117
-4
lines changed

lib/trino-orc/src/main/java/io/trino/orc/OrcWriter.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,10 @@
7272
import static io.trino.orc.OrcWriterStats.FlushReason.DICTIONARY_FULL;
7373
import static io.trino.orc.OrcWriterStats.FlushReason.MAX_BYTES;
7474
import static io.trino.orc.OrcWriterStats.FlushReason.MAX_ROWS;
75+
import static io.trino.orc.metadata.CalendarKind.PROLEPTIC_GREGORIAN;
7576
import static io.trino.orc.metadata.ColumnEncoding.ColumnEncodingKind.DIRECT;
7677
import static io.trino.orc.metadata.OrcColumnId.ROOT_COLUMN;
78+
import static io.trino.orc.metadata.OrcType.TEMPORAL_TYPES;
7779
import static io.trino.orc.metadata.PostScript.MAGIC;
7880
import static io.trino.orc.stream.OrcDataOutput.createDataOutput;
7981
import static io.trino.orc.writer.ColumnWriters.createColumnWriter;
@@ -529,7 +531,8 @@ private List<OrcDataOutput> bufferFileFooter()
529531
orcTypes,
530532
fileStats,
531533
userMetadata,
532-
Optional.empty()); // writer id will be set by MetadataWriter
534+
Optional.empty(), // writer id will be set by MetadataWriter
535+
containsTemporalType(orcTypes) ? Optional.of(PROLEPTIC_GREGORIAN) : Optional.empty());
533536

534537
closedStripes.clear();
535538
closedStripesRetainedBytes = 0;
@@ -544,6 +547,11 @@ private List<OrcDataOutput> bufferFileFooter()
544547
return outputData;
545548
}
546549

550+
private boolean containsTemporalType(ColumnMetadata<OrcType> orcTypes)
551+
{
552+
return orcTypes.stream().map(OrcType::getOrcTypeKind).anyMatch(TEMPORAL_TYPES::contains);
553+
}
554+
547555
private void recordValidation(Consumer<OrcWriteValidationBuilder> task)
548556
{
549557
if (validationBuilder != null) {
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.orc.metadata;
15+
16+
public enum CalendarKind
17+
{
18+
UNKNOWN_CALENDAR,
19+
JULIAN_GREGORIAN,
20+
PROLEPTIC_GREGORIAN
21+
}

lib/trino-orc/src/main/java/io/trino/orc/metadata/Footer.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public class Footer
3737
private final Optional<ColumnMetadata<ColumnStatistics>> fileStats;
3838
private final Map<String, Slice> userMetadata;
3939
private final Optional<Integer> writerId;
40+
private final Optional<CalendarKind> calendar;
4041

4142
public Footer(
4243
long numberOfRows,
@@ -45,7 +46,8 @@ public Footer(
4546
ColumnMetadata<OrcType> types,
4647
Optional<ColumnMetadata<ColumnStatistics>> fileStats,
4748
Map<String, Slice> userMetadata,
48-
Optional<Integer> writerId)
49+
Optional<Integer> writerId,
50+
Optional<CalendarKind> calendar)
4951
{
5052
this.numberOfRows = numberOfRows;
5153
rowsInRowGroup.ifPresent(value -> checkArgument(value > 0, "rowsInRowGroup must be at least 1"));
@@ -56,6 +58,7 @@ public Footer(
5658
requireNonNull(userMetadata, "userMetadata is null");
5759
this.userMetadata = ImmutableMap.copyOf(transformValues(userMetadata, Slice::copy));
5860
this.writerId = requireNonNull(writerId, "writerId is null");
61+
this.calendar = requireNonNull(calendar, "calendar is null");
5962
}
6063

6164
public long getNumberOfRows()
@@ -85,14 +88,19 @@ public Optional<ColumnMetadata<ColumnStatistics>> getFileStats()
8588

8689
public Map<String, Slice> getUserMetadata()
8790
{
88-
return ImmutableMap.copyOf(transformValues(userMetadata, Slice::copy));
91+
return userMetadata;
8992
}
9093

9194
public Optional<Integer> getWriterId()
9295
{
9396
return writerId;
9497
}
9598

99+
public Optional<CalendarKind> getCalendar()
100+
{
101+
return calendar;
102+
}
103+
96104
@Override
97105
public String toString()
98106
{
@@ -104,6 +112,7 @@ public String toString()
104112
.add("columnStatistics", fileStats)
105113
.add("userMetadata", userMetadata.keySet())
106114
.add("writerId", writerId)
115+
.add("calendar", calendar)
107116
.toString();
108117
}
109118
}

lib/trino-orc/src/main/java/io/trino/orc/metadata/OrcMetadataReader.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@
5656
import static io.airlift.slice.SliceUtf8.lengthOfCodePoint;
5757
import static io.airlift.slice.SliceUtf8.tryGetCodePointAt;
5858
import static io.airlift.units.DataSize.Unit.GIGABYTE;
59+
import static io.trino.orc.metadata.CalendarKind.JULIAN_GREGORIAN;
60+
import static io.trino.orc.metadata.CalendarKind.PROLEPTIC_GREGORIAN;
61+
import static io.trino.orc.metadata.CalendarKind.UNKNOWN_CALENDAR;
5962
import static io.trino.orc.metadata.CompressionKind.LZ4;
6063
import static io.trino.orc.metadata.CompressionKind.NONE;
6164
import static io.trino.orc.metadata.CompressionKind.SNAPPY;
@@ -150,7 +153,8 @@ public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputS
150153
toType(footer.getTypesList()),
151154
toColumnStatistics(hiveWriterVersion, footer.getStatisticsList(), false),
152155
toUserMetadata(footer.getMetadataList()),
153-
Optional.of(footer.getWriter()));
156+
Optional.of(footer.getWriter()),
157+
Optional.of(toTrinoOrcCalendarKind(footer.getCalendar())));
154158
}
155159

156160
private static List<StripeInformation> toStripeInformation(List<OrcProto.StripeInformation> types)
@@ -409,6 +413,15 @@ private static BinaryStatistics toBinaryStatistics(OrcProto.BinaryStatistics bin
409413
return new BinaryStatistics(binaryStatistics.getSum());
410414
}
411415

416+
private static CalendarKind toTrinoOrcCalendarKind(OrcProto.CalendarKind calendarKind)
417+
{
418+
return switch (calendarKind) {
419+
case OrcProto.CalendarKind.UNKNOWN_CALENDAR -> UNKNOWN_CALENDAR;
420+
case OrcProto.CalendarKind.JULIAN_GREGORIAN -> JULIAN_GREGORIAN;
421+
case OrcProto.CalendarKind.PROLEPTIC_GREGORIAN -> PROLEPTIC_GREGORIAN;
422+
};
423+
}
424+
412425
private static Slice byteStringToSlice(ByteString value)
413426
{
414427
return Slices.wrappedBuffer(value.toByteArray());

lib/trino-orc/src/main/java/io/trino/orc/metadata/OrcMetadataWriter.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ public int writeFooter(SliceOutput output, Footer footer)
143143
.map(OrcMetadataWriter::toUserMetadata)
144144
.collect(toList()));
145145

146+
footer.getCalendar().ifPresent(calendar -> builder.setCalendar(toOrcCalendarKind(calendar)));
147+
146148
setWriter(builder);
147149

148150
return writeProtobufObject(output, builder.build());
@@ -361,6 +363,15 @@ private static OrcProto.Stream.Kind toStreamKind(StreamKind streamKind)
361363
throw new IllegalArgumentException("Unsupported stream kind: " + streamKind);
362364
}
363365

366+
private static OrcProto.CalendarKind toOrcCalendarKind(CalendarKind calendarKind)
367+
{
368+
return switch (calendarKind) {
369+
case UNKNOWN_CALENDAR -> OrcProto.CalendarKind.UNKNOWN_CALENDAR;
370+
case JULIAN_GREGORIAN -> OrcProto.CalendarKind.JULIAN_GREGORIAN;
371+
case PROLEPTIC_GREGORIAN -> OrcProto.CalendarKind.PROLEPTIC_GREGORIAN;
372+
};
373+
}
374+
364375
private static OrcProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings)
365376
{
366377
return OrcProto.ColumnEncoding.newBuilder()

lib/trino-orc/src/main/java/io/trino/orc/metadata/OrcType.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import com.google.common.collect.ImmutableList;
1717
import com.google.common.collect.ImmutableMap;
18+
import com.google.common.collect.ImmutableSet;
1819
import io.trino.spi.TrinoException;
1920
import io.trino.spi.type.ArrayType;
2021
import io.trino.spi.type.CharType;
@@ -29,6 +30,7 @@
2930
import java.util.List;
3031
import java.util.Map;
3132
import java.util.Optional;
33+
import java.util.Set;
3234
import java.util.function.Function;
3335

3436
import static com.google.common.base.MoreObjects.toStringHelper;
@@ -83,6 +85,8 @@ public enum OrcTypeKind
8385
UNION,
8486
}
8587

88+
public static final Set<OrcTypeKind> TEMPORAL_TYPES = ImmutableSet.of(OrcTypeKind.DATE, OrcTypeKind.TIMESTAMP, OrcTypeKind.TIMESTAMP_INSTANT);
89+
8690
private final OrcTypeKind orcTypeKind;
8791
private final List<OrcColumnId> fieldTypeIndexes;
8892
private final List<String> fieldNames;

lib/trino-orc/src/test/java/io/trino/orc/TestOrcWriter.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import io.airlift.units.DataSize;
2222
import io.trino.filesystem.local.LocalOutputFile;
2323
import io.trino.orc.OrcWriteValidation.OrcWriteValidationMode;
24+
import io.trino.orc.metadata.CalendarKind;
25+
import io.trino.orc.metadata.CompressionKind;
2426
import io.trino.orc.metadata.Footer;
2527
import io.trino.orc.metadata.OrcMetadataReader;
2628
import io.trino.orc.metadata.OrcType;
@@ -32,12 +34,17 @@
3234
import io.trino.spi.Page;
3335
import io.trino.spi.block.Block;
3436
import io.trino.spi.block.VariableWidthBlockBuilder;
37+
import io.trino.spi.type.SqlDate;
38+
import io.trino.spi.type.SqlTimestamp;
3539
import io.trino.spi.type.Type;
3640
import org.junit.jupiter.api.Test;
3741

3842
import java.io.IOException;
3943
import java.io.InputStream;
44+
import java.time.LocalDate;
45+
import java.time.LocalDateTime;
4046
import java.time.ZoneId;
47+
import java.util.Iterator;
4148
import java.util.List;
4249
import java.util.Optional;
4350

@@ -48,9 +55,12 @@
4855
import static io.trino.orc.TestingOrcPredicate.ORC_ROW_GROUP_SIZE;
4956
import static io.trino.orc.TestingOrcPredicate.ORC_STRIPE_SIZE;
5057
import static io.trino.orc.metadata.CompressionKind.NONE;
58+
import static io.trino.spi.type.DateType.DATE;
59+
import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS;
5160
import static io.trino.spi.type.VarcharType.VARCHAR;
5261
import static java.lang.Math.toIntExact;
5362
import static java.nio.charset.StandardCharsets.UTF_8;
63+
import static java.time.ZoneOffset.UTC;
5464
import static org.assertj.core.api.Assertions.assertThat;
5565

5666
public class TestOrcWriter
@@ -78,6 +88,43 @@ public void testWriteHugeChunk()
7888
testWriteOutput(columnNameBuilder.build(), data);
7989
}
8090

91+
@Test
92+
public void testWritingFooter()
93+
{
94+
List<String> strings = ImmutableList.of("aaa1", "qwerty", "asdf", "zxcvb", "1234");
95+
testWritingFooter(VARCHAR, strings.iterator(), CalendarKind.UNKNOWN_CALENDAR);
96+
97+
List<String> dates = ImmutableList.of("2020-01-01", "2021-02-02", "2022-03-03", "2023-04-04", "2024-05-05");
98+
Iterator<?> values = dates.stream().map(LocalDate::parse).map(LocalDate::toEpochDay).map(Math::toIntExact).map(SqlDate::new).toList().iterator();
99+
testWritingFooter(DATE, values, CalendarKind.PROLEPTIC_GREGORIAN);
100+
101+
List<String> printedTimestamp = ImmutableList.of("2023-04-11T05:16:12.123", "2021-04-11T05:16:12.123", "1999-04-11T05:16:12.123");
102+
Iterator<?> timestamps = printedTimestamp.stream()
103+
.map(LocalDateTime::parse)
104+
.map(ldt -> SqlTimestamp.fromSeconds(TIMESTAMP_MILLIS.getPrecision(), ldt.toEpochSecond(UTC), ldt.getNano()))
105+
.iterator();
106+
107+
testWritingFooter(TIMESTAMP_MILLIS, timestamps, CalendarKind.PROLEPTIC_GREGORIAN);
108+
}
109+
110+
private static void testWritingFooter(Type type, Iterator<?> values, CalendarKind calendarKind)
111+
{
112+
try (TempFile tempFile = new TempFile()) {
113+
OrcTester.writeOrcColumnTrino(tempFile.getFile(), CompressionKind.NONE, type, values, new OrcWriterStats());
114+
115+
OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), READER_OPTIONS);
116+
Footer footer = OrcReader.createOrcReader(orcDataSource, READER_OPTIONS)
117+
.orElseThrow(() -> new RuntimeException("File is empty"))
118+
.getFooter();
119+
120+
assertThat(footer.getCalendar().isPresent());
121+
assertThat(footer.getCalendar().get()).isEqualTo(calendarKind);
122+
}
123+
catch (Exception e) {
124+
throw new RuntimeException(e);
125+
}
126+
}
127+
81128
private void testWriteOutput(List<String> columnNames, String[] data)
82129
throws IOException
83130
{

0 commit comments

Comments
 (0)