diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceArrowToPageScanner.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceArrowToPageScanner.java index 22d25a3adedc6..18a85e7eb256d 100644 --- a/presto-lance/src/main/java/com/facebook/presto/lance/LanceArrowToPageScanner.java +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceArrowToPageScanner.java @@ -20,12 +20,19 @@ import com.facebook.presto.spi.PrestoException; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.Float2Vector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.ipc.ArrowReader; import org.lance.ipc.LanceScanner; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import static com.google.common.collect.ImmutableList.toImmutableList; @@ -36,6 +43,7 @@ public class LanceArrowToPageScanner { private final ScannerFactory scannerFactory; private final ArrowReader arrowReader; + private final BufferAllocator allocator; private final List columns; private final ArrowBlockBuilder arrowBlockBuilder; private long lastBatchBytes; @@ -46,6 +54,7 @@ public LanceArrowToPageScanner( ScannerFactory scannerFactory, ArrowBlockBuilder arrowBlockBuilder) { + this.allocator = requireNonNull(allocator, "allocator is null"); this.columns = requireNonNull(columns, "columns is null"); this.scannerFactory = requireNonNull(scannerFactory, "scannerFactory is null"); this.arrowBlockBuilder = requireNonNull(arrowBlockBuilder, "arrowBlockBuilder is null"); @@ -95,15 +104,191 @@ public Page convert() int rowCount = root.getRowCount(); Block[] blocks = new Block[columns.size()]; + List coercedVectors = new ArrayList<>(); - for (int col = 0; col < columns.size(); col++) { - LanceColumnHandle column = columns.get(col); - FieldVector vector = root.getVector(column.getColumnName()); - Type type = column.getColumnType(); - blocks[col] = arrowBlockBuilder.buildBlockFromFieldVector(vector, type, null); + try { + for (int col = 0; col < columns.size(); col++) { + LanceColumnHandle column = columns.get(col); + FieldVector vector = root.getVector(column.getColumnName()); + Type type = column.getColumnType(); + vector = coerceVector(vector, coercedVectors); + blocks[col] = arrowBlockBuilder.buildBlockFromFieldVector(vector, type, null); + } + return new Page(rowCount, blocks); + } + finally { + for (FieldVector v : coercedVectors) { + v.close(); + } + } + } + + /** + * Coerce unsupported Arrow vector types to types that ArrowBlockBuilder can handle. + * Tracks newly allocated vectors in coercedVectors for cleanup. + * + * - Float2Vector (float16) -> Float4Vector (float32) + * - UInt8Vector (uint64) -> BigIntVector (int64, treats as signed) + * - List/FixedSizeList containing Float2Vector -> widen inner data vector + */ + private FieldVector coerceVector(FieldVector vector, List coercedVectors) + { + if (vector instanceof Float2Vector) { + Float4Vector widened = widenFloat2ToFloat4((Float2Vector) vector, allocator); + coercedVectors.add(widened); + return widened; + } + if (vector instanceof UInt8Vector) { + BigIntVector converted = convertUInt8ToBigInt((UInt8Vector) vector, allocator); + coercedVectors.add(converted); + return converted; + } + if (vector instanceof FixedSizeListVector) { + FixedSizeListVector fslVector = (FixedSizeListVector) vector; + FieldVector dataVector = fslVector.getDataVector(); + if (dataVector instanceof Float2Vector) { + Float4Vector widened = widenFloat2ToFloat4((Float2Vector) dataVector, allocator); + coercedVectors.add(widened); + // Build a new FixedSizeListVector backed by the widened Float4Vector + FixedSizeListVector newFsl = buildFixedSizeListWithData( + fslVector.getName(), fslVector.getListSize(), + fslVector.getValueCount(), fslVector, widened); + coercedVectors.add(newFsl); + return newFsl; + } + } + if (vector instanceof ListVector) { + ListVector listVector = (ListVector) vector; + FieldVector dataVector = listVector.getDataVector(); + if (dataVector instanceof Float2Vector) { + Float4Vector widened = widenFloat2ToFloat4((Float2Vector) dataVector, allocator); + coercedVectors.add(widened); + // Build a new ListVector backed by the widened Float4Vector + ListVector newList = buildListWithData( + listVector.getName(), listVector, widened); + coercedVectors.add(newList); + return newList; + } + } + return vector; + } + + static Float4Vector widenFloat2ToFloat4(Float2Vector f2v, BufferAllocator allocator) + { + int valueCount = f2v.getValueCount(); + Float4Vector f4v = new Float4Vector(f2v.getName(), allocator); + f4v.allocateNew(valueCount); + for (int i = 0; i < valueCount; i++) { + if (f2v.isNull(i)) { + f4v.setNull(i); + } + else { + f4v.set(i, f2v.getValueAsFloat(i)); + } } + f4v.setValueCount(valueCount); + return f4v; + } - return new Page(rowCount, blocks); + static BigIntVector convertUInt8ToBigInt(UInt8Vector uint8v, BufferAllocator allocator) + { + int valueCount = uint8v.getValueCount(); + BigIntVector bigIntVector = new BigIntVector(uint8v.getName(), allocator); + bigIntVector.allocateNew(valueCount); + for (int i = 0; i < valueCount; i++) { + if (uint8v.isNull(i)) { + bigIntVector.setNull(i); + } + else { + bigIntVector.set(i, uint8v.get(i)); + } + } + bigIntVector.setValueCount(valueCount); + return bigIntVector; + } + + /** + * Build a new FixedSizeListVector using the validity from the original + * and the widened data vector as the inner data. + */ + private FixedSizeListVector buildFixedSizeListWithData( + String name, int listSize, int valueCount, + FixedSizeListVector original, Float4Vector widenedData) + { + FixedSizeListVector newFsl = FixedSizeListVector.empty(name, listSize, allocator); + newFsl.addOrGetVector(widenedData.getField().getFieldType()); + newFsl.setInitialCapacity(valueCount); + newFsl.allocateNew(); + + // Copy validity bits from original + for (int i = 0; i < valueCount; i++) { + if (original.isNull(i)) { + newFsl.setNull(i); + } + else { + newFsl.setNotNull(i); + } + } + newFsl.setValueCount(valueCount); + + // Copy widened float data into the new inner data vector + Float4Vector newData = (Float4Vector) newFsl.getDataVector(); + newData.allocateNew(widenedData.getValueCount()); + for (int i = 0; i < widenedData.getValueCount(); i++) { + if (widenedData.isNull(i)) { + newData.setNull(i); + } + else { + newData.set(i, widenedData.get(i)); + } + } + newData.setValueCount(widenedData.getValueCount()); + return newFsl; + } + + /** + * Build a new ListVector using the offset buffer from the original + * and the widened data vector as the inner data. + */ + private ListVector buildListWithData( + String name, ListVector original, Float4Vector widenedData) + { + ListVector newList = ListVector.empty(name, allocator); + newList.addOrGetVector(widenedData.getField().getFieldType()); + int valueCount = original.getValueCount(); + newList.setInitialCapacity(valueCount); + newList.allocateNew(); + + // Copy offset buffer from original + ArrowBuf originalOffsets = original.getOffsetBuffer(); + ArrowBuf newOffsets = newList.getOffsetBuffer(); + newOffsets.setBytes(0, originalOffsets, 0, (long) (valueCount + 1) * ListVector.OFFSET_WIDTH); + + // Copy validity bits + for (int i = 0; i < valueCount; i++) { + if (original.isNull(i)) { + newList.setNull(i); + } + else { + newList.setNotNull(i); + } + } + newList.setValueCount(valueCount); + newList.setLastSet(valueCount - 1); + + // Copy widened data + Float4Vector newData = (Float4Vector) newList.getDataVector(); + newData.allocateNew(widenedData.getValueCount()); + for (int i = 0; i < widenedData.getValueCount(); i++) { + if (widenedData.isNull(i)) { + newData.setNull(i); + } + else { + newData.set(i, widenedData.get(i)); + } + } + newData.setValueCount(widenedData.getValueCount()); + return newList; } @Override diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceColumnHandle.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceColumnHandle.java index 9ae1281e52048..9381dc978c92d 100644 --- a/presto-lance/src/main/java/com/facebook/presto/lance/LanceColumnHandle.java +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceColumnHandle.java @@ -122,7 +122,8 @@ else if (type instanceof ArrowType.Int) { } else if (type instanceof ArrowType.FloatingPoint) { ArrowType.FloatingPoint fpType = (ArrowType.FloatingPoint) type; - if (fpType.getPrecision() == FloatingPointPrecision.SINGLE) { + if (fpType.getPrecision() == FloatingPointPrecision.HALF + || fpType.getPrecision() == FloatingPointPrecision.SINGLE) { return RealType.REAL; } return DoubleType.DOUBLE; diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestFloat16Widening.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestFloat16Widening.java new file mode 100644 index 0000000000000..e3b2dcc92d45a --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestFloat16Widening.java @@ -0,0 +1,130 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.plugin.arrow.ArrowBlockBuilder; +import com.facebook.presto.common.block.Block; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.Float2Vector; +import org.apache.arrow.vector.Float4Vector; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static com.facebook.presto.common.type.RealType.REAL; +import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +@Test(singleThreaded = true) +public class TestFloat16Widening +{ + private BufferAllocator allocator; + private ArrowBlockBuilder arrowBlockBuilder; + + @BeforeMethod + public void setUp() + { + allocator = new RootAllocator(Long.MAX_VALUE); + arrowBlockBuilder = new ArrowBlockBuilder(createTestFunctionAndTypeManager()); + } + + @AfterMethod + public void tearDown() + { + allocator.close(); + } + + @Test + public void testWidenFloat2ToFloat4() + { + // Create a Float2Vector with test values + try (Float2Vector f2v = new Float2Vector("f16_col", allocator)) { + f2v.allocateNew(4); + f2v.setWithPossibleTruncate(0, 1.5f); + f2v.setWithPossibleTruncate(1, -2.25f); + f2v.setNull(2); + f2v.setWithPossibleTruncate(3, 0.0f); + f2v.setValueCount(4); + + // Widen to Float4Vector + try (Float4Vector f4v = LanceArrowToPageScanner.widenFloat2ToFloat4(f2v, allocator)) { + assertEquals(f4v.getValueCount(), 4); + assertEquals(f4v.getName(), "f16_col"); + + assertEquals(f4v.get(0), 1.5f, 0.01f); + assertEquals(f4v.get(1), -2.25f, 0.01f); + assertTrue(f4v.isNull(2)); + assertEquals(f4v.get(3), 0.0f, 0.01f); + } + } + } + + @Test + public void testWidenedFloat4VectorProducesRealBlock() + { + // Verify the widened Float4Vector works with ArrowBlockBuilder + try (Float2Vector f2v = new Float2Vector("f16_col", allocator)) { + f2v.allocateNew(3); + f2v.setWithPossibleTruncate(0, 1.5f); + f2v.setWithPossibleTruncate(1, -2.25f); + f2v.setNull(2); + f2v.setValueCount(3); + + try (Float4Vector f4v = LanceArrowToPageScanner.widenFloat2ToFloat4(f2v, allocator)) { + Block block = arrowBlockBuilder.buildBlockFromFieldVector(f4v, REAL, null); + assertEquals(block.getPositionCount(), 3); + + float val0 = Float.intBitsToFloat((int) REAL.getLong(block, 0)); + float val1 = Float.intBitsToFloat((int) REAL.getLong(block, 1)); + assertEquals(val0, 1.5f, 0.01f); + assertEquals(val1, -2.25f, 0.01f); + assertTrue(block.isNull(2)); + } + } + } + + @Test + public void testWidenEmptyVector() + { + try (Float2Vector f2v = new Float2Vector("empty", allocator)) { + f2v.allocateNew(0); + f2v.setValueCount(0); + + try (Float4Vector f4v = LanceArrowToPageScanner.widenFloat2ToFloat4(f2v, allocator)) { + assertEquals(f4v.getValueCount(), 0); + } + } + } + + @Test + public void testWidenAllNulls() + { + try (Float2Vector f2v = new Float2Vector("nulls", allocator)) { + f2v.allocateNew(3); + f2v.setNull(0); + f2v.setNull(1); + f2v.setNull(2); + f2v.setValueCount(3); + + try (Float4Vector f4v = LanceArrowToPageScanner.widenFloat2ToFloat4(f2v, allocator)) { + assertEquals(f4v.getValueCount(), 3); + assertTrue(f4v.isNull(0)); + assertTrue(f4v.isNull(1)); + assertTrue(f4v.isNull(2)); + } + } + } +} diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceColumnHandle.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceColumnHandle.java index 072d1f2f9b94d..28c830024adf1 100644 --- a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceColumnHandle.java +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceColumnHandle.java @@ -59,6 +59,7 @@ public void testArrowToPrestoType() assertEquals(LanceColumnHandle.toPrestoType(field("b", new ArrowType.Int(32, true))), INTEGER); assertEquals(LanceColumnHandle.toPrestoType(field("c", new ArrowType.Int(64, true))), BIGINT); assertEquals(LanceColumnHandle.toPrestoType(field("d", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE))), REAL); + assertEquals(LanceColumnHandle.toPrestoType(field("d2", new ArrowType.FloatingPoint(FloatingPointPrecision.HALF))), REAL); assertEquals(LanceColumnHandle.toPrestoType(field("e", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE))), DOUBLE); assertEquals(LanceColumnHandle.toPrestoType(field("f", ArrowType.Utf8.INSTANCE)), VARCHAR); } diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceMetadata.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceMetadata.java index cbb93d7d18d7a..210ebe8824741 100644 --- a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceMetadata.java +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceMetadata.java @@ -122,7 +122,8 @@ public void testListTables() new SchemaTableName("default", "test_table1"), new SchemaTableName("default", "test_table2"), new SchemaTableName("default", "test_table3"), - new SchemaTableName("default", "test_table4"))); + new SchemaTableName("default", "test_table4"), + new SchemaTableName("default", "wide_types_table"))); // no schema filter List allTables = metadata.listTables(null, Optional.empty()); diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestWideTypesTable.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestWideTypesTable.java new file mode 100644 index 0000000000000..abeaaf830a510 --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestWideTypesTable.java @@ -0,0 +1,277 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.plugin.arrow.ArrowBlockBuilder; +import com.facebook.presto.common.Page; +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.type.ArrayType; +import com.facebook.presto.spi.ColumnHandle; +import com.google.common.collect.ImmutableList; +import com.google.common.io.Resources; +import org.lance.Fragment; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.net.URL; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; + +import static com.facebook.airlift.json.JsonCodec.jsonCodec; +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.common.type.DateType.DATE; +import static com.facebook.presto.common.type.DoubleType.DOUBLE; +import static com.facebook.presto.common.type.IntegerType.INTEGER; +import static com.facebook.presto.common.type.RealType.REAL; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +/** + * Tests reading from wide_types_table.lance which contains many Arrow types + * including Float16 columns. Verifies the full read path through + * LanceFragmentPageSource including Float16-to-Float32 widening. + * + * Dataset schema (2 rows): + * id int64 -> BIGINT [1, 2] + * col_bool bool -> BOOLEAN [true, false] + * col_int32 int32 -> INTEGER [10, -10] + * col_int64 int64 -> BIGINT [100, -100] + * col_uint64 uint64 -> BIGINT [42, 99] + * col_float16 float16 -> REAL [3.5, -3.5] + * col_float32 float32 -> REAL [1.5, -1.5] + * col_float64 float64 -> DOUBLE [2.5, -2.5] + * col_string utf8 -> VARCHAR ["hello", "world"] + * col_binary binary -> VARBINARY [[0x01,0x02], [0x03,0x04]] + * col_date date32 -> DATE [2024-01-15, 2024-06-30] + * col_ts timestamp[us] -> TIMESTAMP + * col_ts_tz timestamp[us,UTC] -> TIMESTAMP + * col_list_f32 list(float32) -> ARRAY(REAL) [[1.0,2.0], [3.0,4.0,5.0]] + * col_fsl_f32 fsl(float32)[3] -> ARRAY(REAL) [[1.0,2.0,3.0], [4.0,5.0,6.0]] + * col_fsl_f16 fsl(float16)[3] -> ARRAY(REAL) [[7.0,8.0,9.0], [10.0,11.0,12.0]] + */ +@Test(singleThreaded = true) +public class TestWideTypesTable +{ + private LanceNamespaceHolder namespaceHolder; + private LanceTableHandle tableHandle; + private String tablePath; + private List fragments; + private ArrowBlockBuilder arrowBlockBuilder; + private Map columnHandles; + + @BeforeMethod + public void setUp() + throws Exception + { + URL dbUrl = Resources.getResource(TestWideTypesTable.class, "/example_db"); + assertNotNull(dbUrl, "example_db resource not found"); + String rootPath = Paths.get(dbUrl.toURI()).toString(); + LanceConfig config = new LanceConfig() + .setRootUrl(rootPath) + .setSingleLevelNs(true); + namespaceHolder = new LanceNamespaceHolder(config); + arrowBlockBuilder = new ArrowBlockBuilder(createTestFunctionAndTypeManager()); + tableHandle = new LanceTableHandle("default", "wide_types_table"); + tablePath = namespaceHolder.getTablePath("wide_types_table"); + fragments = namespaceHolder.getFragments("wide_types_table"); + LanceMetadata metadata = new LanceMetadata(namespaceHolder, jsonCodec(LanceCommitTaskData.class)); + columnHandles = metadata.getColumnHandles(null, tableHandle); + } + + @Test + public void testReadAllColumns() + throws Exception + { + // Project all columns (uint64 and nested float16 are now coerced) + List columns = columnHandles.values().stream() + .map(LanceColumnHandle.class::cast) + .collect(toImmutableList()); + + Page page = readColumns(columns); + assertEquals(page.getPositionCount(), 2); + assertTrue(page.getChannelCount() > 0); + } + + @Test + public void testFloat16Column() + { + LanceColumnHandle col = (LanceColumnHandle) columnHandles.get("col_float16"); + assertNotNull(col, "col_float16 not found in schema"); + assertEquals(col.getColumnType(), REAL); + + Page page = readColumns(ImmutableList.of(col)); + assertEquals(page.getPositionCount(), 2); + + Block block = page.getBlock(0); + float val0 = Float.intBitsToFloat((int) REAL.getLong(block, 0)); + float val1 = Float.intBitsToFloat((int) REAL.getLong(block, 1)); + assertEquals(val0, 3.5f, 0.01f); + assertEquals(val1, -3.5f, 0.01f); + } + + @Test + public void testFloat32Column() + { + LanceColumnHandle col = (LanceColumnHandle) columnHandles.get("col_float32"); + Page page = readColumns(ImmutableList.of(col)); + + Block block = page.getBlock(0); + float val0 = Float.intBitsToFloat((int) REAL.getLong(block, 0)); + float val1 = Float.intBitsToFloat((int) REAL.getLong(block, 1)); + assertEquals(val0, 1.5f, 0.01f); + assertEquals(val1, -1.5f, 0.01f); + } + + @Test + public void testFloat64Column() + { + LanceColumnHandle col = (LanceColumnHandle) columnHandles.get("col_float64"); + Page page = readColumns(ImmutableList.of(col)); + + Block block = page.getBlock(0); + assertEquals(DOUBLE.getDouble(block, 0), 2.5, 0.01); + assertEquals(DOUBLE.getDouble(block, 1), -2.5, 0.01); + } + + @Test + public void testIntegerColumns() + { + LanceColumnHandle colId = (LanceColumnHandle) columnHandles.get("id"); + LanceColumnHandle colInt32 = (LanceColumnHandle) columnHandles.get("col_int32"); + LanceColumnHandle colInt64 = (LanceColumnHandle) columnHandles.get("col_int64"); + Page page = readColumns(ImmutableList.of(colId, colInt32, colInt64)); + + // id + assertEquals(BIGINT.getLong(page.getBlock(0), 0), 1L); + assertEquals(BIGINT.getLong(page.getBlock(0), 1), 2L); + // col_int32 + assertEquals(INTEGER.getLong(page.getBlock(1), 0), 10L); + assertEquals(INTEGER.getLong(page.getBlock(1), 1), -10L); + // col_int64 + assertEquals(BIGINT.getLong(page.getBlock(2), 0), 100L); + assertEquals(BIGINT.getLong(page.getBlock(2), 1), -100L); + } + + @Test + public void testBooleanColumn() + { + LanceColumnHandle col = (LanceColumnHandle) columnHandles.get("col_bool"); + Page page = readColumns(ImmutableList.of(col)); + + assertTrue(BOOLEAN.getBoolean(page.getBlock(0), 0)); + assertFalse(BOOLEAN.getBoolean(page.getBlock(0), 1)); + } + + @Test + public void testVarcharColumn() + { + LanceColumnHandle col = (LanceColumnHandle) columnHandles.get("col_string"); + Page page = readColumns(ImmutableList.of(col)); + + assertEquals(VARCHAR.getSlice(page.getBlock(0), 0).toStringUtf8(), "hello"); + assertEquals(VARCHAR.getSlice(page.getBlock(0), 1).toStringUtf8(), "world"); + } + + @Test + public void testDateColumn() + { + LanceColumnHandle col = (LanceColumnHandle) columnHandles.get("col_date"); + Page page = readColumns(ImmutableList.of(col)); + + // 2024-01-15 = 19737 days since epoch + assertEquals(DATE.getLong(page.getBlock(0), 0), 19737L); + // 2024-06-30 = 19904 days since epoch + assertEquals(DATE.getLong(page.getBlock(0), 1), 19904L); + } + + @Test + public void testFixedSizeListFloat32() + { + LanceColumnHandle col = (LanceColumnHandle) columnHandles.get("col_fsl_f32"); + assertNotNull(col, "col_fsl_f32 not found"); + Page page = readColumns(ImmutableList.of(col)); + + ArrayType arrayType = (ArrayType) col.getColumnType(); + Block inner0 = (Block) arrayType.getObject(page.getBlock(0), 0); + assertEquals(inner0.getPositionCount(), 3); + assertEquals(Float.intBitsToFloat((int) REAL.getLong(inner0, 0)), 1.0f, 0.01f); + assertEquals(Float.intBitsToFloat((int) REAL.getLong(inner0, 1)), 2.0f, 0.01f); + assertEquals(Float.intBitsToFloat((int) REAL.getLong(inner0, 2)), 3.0f, 0.01f); + } + + @Test + public void testUint64Column() + { + LanceColumnHandle col = (LanceColumnHandle) columnHandles.get("col_uint64"); + assertNotNull(col, "col_uint64 not found in schema"); + assertEquals(col.getColumnType(), BIGINT); + + Page page = readColumns(ImmutableList.of(col)); + assertEquals(BIGINT.getLong(page.getBlock(0), 0), 42L); + assertEquals(BIGINT.getLong(page.getBlock(0), 1), 99L); + } + + @Test + public void testFixedSizeListFloat16() + { + LanceColumnHandle col = (LanceColumnHandle) columnHandles.get("col_fsl_f16"); + assertNotNull(col, "col_fsl_f16 not found"); + Page page = readColumns(ImmutableList.of(col)); + + ArrayType arrayType = (ArrayType) col.getColumnType(); + Block inner0 = (Block) arrayType.getObject(page.getBlock(0), 0); + assertEquals(inner0.getPositionCount(), 3); + assertEquals(Float.intBitsToFloat((int) REAL.getLong(inner0, 0)), 7.0f, 0.01f); + assertEquals(Float.intBitsToFloat((int) REAL.getLong(inner0, 1)), 8.0f, 0.01f); + assertEquals(Float.intBitsToFloat((int) REAL.getLong(inner0, 2)), 9.0f, 0.01f); + + Block inner1 = (Block) arrayType.getObject(page.getBlock(0), 1); + assertEquals(inner1.getPositionCount(), 3); + assertEquals(Float.intBitsToFloat((int) REAL.getLong(inner1, 0)), 10.0f, 0.01f); + assertEquals(Float.intBitsToFloat((int) REAL.getLong(inner1, 1)), 11.0f, 0.01f); + assertEquals(Float.intBitsToFloat((int) REAL.getLong(inner1, 2)), 12.0f, 0.01f); + } + + private Page readColumns(List columns) + { + try { + LanceFragmentPageSource pageSource = new LanceFragmentPageSource( + tableHandle, + columns, + ImmutableList.of(fragments.get(0).getId()), + tablePath, + 8192, + arrowBlockBuilder, + namespaceHolder.getAllocator()); + try { + Page page = pageSource.getNextPage(); + assertNotNull(page); + return page; + } + finally { + pageSource.close(); + } + } + catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/presto-lance/src/test/resources/example_db/wide_types_table.lance/_transactions/0-808a0cce-8541-4513-908b-6751199a5021.txn b/presto-lance/src/test/resources/example_db/wide_types_table.lance/_transactions/0-808a0cce-8541-4513-908b-6751199a5021.txn new file mode 100644 index 0000000000000..02640a637fd95 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/wide_types_table.lance/_transactions/0-808a0cce-8541-4513-908b-6751199a5021.txn differ diff --git a/presto-lance/src/test/resources/example_db/wide_types_table.lance/_versions/18446744073709551614.manifest b/presto-lance/src/test/resources/example_db/wide_types_table.lance/_versions/18446744073709551614.manifest new file mode 100644 index 0000000000000..456ad9a4b8ddf Binary files /dev/null and b/presto-lance/src/test/resources/example_db/wide_types_table.lance/_versions/18446744073709551614.manifest differ diff --git a/presto-lance/src/test/resources/example_db/wide_types_table.lance/data/1101011001011011001010008e9f4c4a7d866a5325e404288a.lance b/presto-lance/src/test/resources/example_db/wide_types_table.lance/data/1101011001011011001010008e9f4c4a7d866a5325e404288a.lance new file mode 100644 index 0000000000000..6a48434a7e600 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/wide_types_table.lance/data/1101011001011011001010008e9f4c4a7d866a5325e404288a.lance differ