Skip to content

Commit 2695f25

Browse files
committed
fix comments
1 parent ef7a744 commit 2695f25

File tree

6 files changed

+20
-39
lines changed

6 files changed

+20
-39
lines changed

arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -167,11 +167,6 @@ public VectorHolder read(VectorHolder reuse, int numValsToRead) {
167167
.fixedWidthTypeBinaryBatchReader()
168168
.nextBatch(vec, typeWidth, nullabilityHolder);
169169
break;
170-
case TIMESTAMP_INT96:
171-
vectorizedColumnIterator
172-
.timestampInt96BatchReader()
173-
.nextBatch(vec, typeWidth, nullabilityHolder);
174-
break;
175170
case BOOLEAN:
176171
vectorizedColumnIterator.booleanBatchReader().nextBatch(vec, -1, nullabilityHolder);
177172
break;
@@ -198,6 +193,11 @@ public VectorHolder read(VectorHolder reuse, int numValsToRead) {
198193
.timestampMillisBatchReader()
199194
.nextBatch(vec, typeWidth, nullabilityHolder);
200195
break;
196+
case TIMESTAMP_INT96:
197+
vectorizedColumnIterator
198+
.timestampInt96BatchReader()
199+
.nextBatch(vec, typeWidth, nullabilityHolder);
200+
break;
201201
case UUID:
202202
vectorizedColumnIterator
203203
.fixedSizeBinaryBatchReader()
@@ -340,14 +340,6 @@ private void allocateVectorBasedOnTypeName(PrimitiveType primitive, Field arrowF
340340
vec.allocateNew();
341341
this.typeWidth = len;
342342
break;
343-
case INT96:
344-
int length = BigIntVector.TYPE_WIDTH;
345-
this.readType = ReadType.TIMESTAMP_INT96;
346-
this.vec = arrowField.createVector(rootAlloc);
347-
vec.setInitialCapacity(batchSize * length);
348-
vec.allocateNew();
349-
this.typeWidth = length;
350-
break;
351343
case BINARY:
352344
this.vec = arrowField.createVector(rootAlloc);
353345
// TODO: Possibly use the uncompressed page size info to set the initial capacity
@@ -368,6 +360,14 @@ private void allocateVectorBasedOnTypeName(PrimitiveType primitive, Field arrowF
368360
this.readType = ReadType.INT;
369361
this.typeWidth = (int) IntVector.TYPE_WIDTH;
370362
break;
363+
case INT96:
364+
int length = BigIntVector.TYPE_WIDTH;
365+
this.readType = ReadType.TIMESTAMP_INT96;
366+
this.vec = arrowField.createVector(rootAlloc);
367+
vec.setInitialCapacity(batchSize * length);
368+
vec.allocateNew();
369+
this.typeWidth = length;
370+
break;
371371
case FLOAT:
372372
Field floatField =
373373
new Field(

arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedParquetDefinitionLevelReader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,8 @@ protected void nextVal(
456456
ValuesAsBytesReader valuesReader,
457457
int typeWidth,
458458
byte[] byteArray) {
459-
ByteBuffer buffer = valuesReader.getBuffer(12);
459+
// 8 bytes (time of day nanos) + 4 bytes(julianDay) = 12 bytes
460+
ByteBuffer buffer = valuesReader.getBuffer(12).order(ByteOrder.LITTLE_ENDIAN);
460461
long timestampInt96 = TimestampUtil.extractTimestampInt96(buffer);
461462
vector.getDataBuffer().setLong((long) idx * typeWidth, timestampInt96);
462463
}

spark/v2.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,6 @@ public long readLong() {
377377
}
378378

379379
private static class TimestampInt96Reader extends UnboxedReader<Long> {
380-
private static final long UNIX_EPOCH_JULIAN = 2_440_588L;
381380

382381
TimestampInt96Reader(ColumnDescriptor desc) {
383382
super(desc);
@@ -392,11 +391,7 @@ public Long read(Long ignored) {
392391
public long readLong() {
393392
final ByteBuffer byteBuffer =
394393
column.nextBinary().toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
395-
final long timeOfDayNanos = byteBuffer.getLong();
396-
final int julianDay = byteBuffer.getInt();
397-
398-
return TimeUnit.DAYS.toMicros(julianDay - UNIX_EPOCH_JULIAN)
399-
+ TimeUnit.NANOSECONDS.toMicros(timeOfDayNanos);
394+
return TimestampUtil.extractTimestampInt96(byteBuffer);
400395
}
401396
}
402397

spark/v3.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,6 @@ public long readLong() {
378378
}
379379

380380
private static class TimestampInt96Reader extends UnboxedReader<Long> {
381-
private static final long UNIX_EPOCH_JULIAN = 2_440_588L;
382381

383382
TimestampInt96Reader(ColumnDescriptor desc) {
384383
super(desc);
@@ -393,11 +392,7 @@ public Long read(Long ignored) {
393392
public long readLong() {
394393
final ByteBuffer byteBuffer =
395394
column.nextBinary().toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
396-
final long timeOfDayNanos = byteBuffer.getLong();
397-
final int julianDay = byteBuffer.getInt();
398-
399-
return TimeUnit.DAYS.toMicros(julianDay - UNIX_EPOCH_JULIAN)
400-
+ TimeUnit.NANOSECONDS.toMicros(timeOfDayNanos);
395+
return TimestampUtil.extractTimestampInt96(byteBuffer);
401396
}
402397
}
403398

spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,6 @@ public long readLong() {
377377
}
378378

379379
private static class TimestampInt96Reader extends UnboxedReader<Long> {
380-
private static final long UNIX_EPOCH_JULIAN = 2_440_588L;
381380

382381
TimestampInt96Reader(ColumnDescriptor desc) {
383382
super(desc);
@@ -392,11 +391,7 @@ public Long read(Long ignored) {
392391
public long readLong() {
393392
final ByteBuffer byteBuffer =
394393
column.nextBinary().toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
395-
final long timeOfDayNanos = byteBuffer.getLong();
396-
final int julianDay = byteBuffer.getInt();
397-
398-
return TimeUnit.DAYS.toMicros(julianDay - UNIX_EPOCH_JULIAN)
399-
+ TimeUnit.NANOSECONDS.toMicros(timeOfDayNanos);
394+
return TimestampUtil.extractTimestampInt96(byteBuffer);
400395
}
401396
}
402397

spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@
2525
import java.util.Arrays;
2626
import java.util.List;
2727
import java.util.Map;
28-
import java.util.concurrent.TimeUnit;
2928
import org.apache.iceberg.MetadataColumns;
3029
import org.apache.iceberg.Schema;
30+
import org.apache.iceberg.arrow.vectorized.parquet.TimestampUtil;
3131
import org.apache.iceberg.parquet.ParquetSchemaUtil;
3232
import org.apache.iceberg.parquet.ParquetValueReader;
3333
import org.apache.iceberg.parquet.ParquetValueReaders;
@@ -377,7 +377,6 @@ public long readLong() {
377377
}
378378

379379
private static class TimestampInt96Reader extends UnboxedReader<Long> {
380-
private static final long UNIX_EPOCH_JULIAN = 2_440_588L;
381380

382381
TimestampInt96Reader(ColumnDescriptor desc) {
383382
super(desc);
@@ -392,11 +391,7 @@ public Long read(Long ignored) {
392391
public long readLong() {
393392
final ByteBuffer byteBuffer =
394393
column.nextBinary().toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
395-
final long timeOfDayNanos = byteBuffer.getLong();
396-
final int julianDay = byteBuffer.getInt();
397-
398-
return TimeUnit.DAYS.toMicros(julianDay - UNIX_EPOCH_JULIAN)
399-
+ TimeUnit.NANOSECONDS.toMicros(timeOfDayNanos);
394+
return TimestampUtil.extractTimestampInt96(byteBuffer);
400395
}
401396
}
402397

0 commit comments

Comments
 (0)