Skip to content

Commit f936740

Browse files
committed
ORC-546. Fix reading timestamps with duplicated millis within a second.
This caused SPARK-27594. Fixes #420 Signed-off-by: Owen O'Malley <[email protected]>
1 parent 8b500dd commit f936740

File tree

2 files changed

+10
-8
lines changed

2 files changed

+10
-8
lines changed

java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,9 +1013,9 @@ public void nextVector(ColumnVector previousVector,
10131013

10141014
for (int i = 0; i < batchSize; i++) {
10151015
if (result.noNulls || !result.isNull[i]) {
1016-
final int newNanos = parseNanos(nanos.next());
1016+
int newNanos = parseNanos(nanos.next());
10171017
long millis = (data.next() + base_timestamp)
1018-
* TimestampTreeWriter.MILLIS_PER_SECOND + newNanos / 1_000_000;
1018+
* TimestampTreeWriter.MILLIS_PER_SECOND;
10191019
if (millis < 0 && newNanos > 999_999) {
10201020
millis -= TimestampTreeWriter.MILLIS_PER_SECOND;
10211021
}

java/core/src/test/org/apache/orc/TestVectorOrcFile.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
import java.nio.charset.StandardCharsets;
7575
import java.sql.Date;
7676
import java.sql.Timestamp;
77+
import java.time.format.DateTimeFormatter;
7778
import java.util.ArrayList;
7879
import java.util.Arrays;
7980
import java.util.Collection;
@@ -1442,16 +1443,17 @@ public void createOrcDateFile(Path file, int minYear, int maxYear
14421443
batch = reader.getSchema().createRowBatch(1000);
14431444
TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
14441445
LongColumnVector dates = (LongColumnVector) batch.cols[1];
1446+
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSS");
14451447
for (int year = minYear; year < maxYear; ++year) {
14461448
rows.nextBatch(batch);
14471449
assertEquals(1000, batch.size);
14481450
for(int row = 0; row < 1000; ++row) {
1449-
Timestamp expected = Timestamp.valueOf(
1450-
String.format("%04d-05-05 12:34:56.%04d", year, 2*row));
1451-
assertEquals("ms row " + row + " " + expected, expected.getTime(),
1452-
times.time[row]);
1453-
assertEquals("nanos row " + row + " " + expected, expected.getNanos(),
1454-
times.nanos[row]);
1451+
String expectedStr = String.format("%04d-05-05 12:34:56.%04d", year, 2*row);
1452+
assertEquals("row " + row, expectedStr,
1453+
formatter.format(times.asScratchTimestamp(row).toLocalDateTime()));
1454+
assertEquals(0, times.time[row] % 1000);
1455+
assertTrue("nano " + row + " = " + times.nanos[row],
1456+
times.nanos[row] >= 0 && times.nanos[row] < 1_000_000_000);
14551457
assertEquals("year " + year + " row " + row,
14561458
Integer.toString(year) + "-12-25",
14571459
new DateWritable((int) dates.vector[row]).toString());

0 commit comments

Comments
 (0)