Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,19 @@
import com.facebook.presto.spi.PrestoException;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.Float2Vector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.UInt8Vector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.ipc.ArrowReader;
import org.lance.ipc.LanceScanner;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import static com.google.common.collect.ImmutableList.toImmutableList;
Expand All @@ -36,6 +43,7 @@ public class LanceArrowToPageScanner
{
private final ScannerFactory scannerFactory;
private final ArrowReader arrowReader;
private final BufferAllocator allocator;
private final List<LanceColumnHandle> columns;
private final ArrowBlockBuilder arrowBlockBuilder;
private long lastBatchBytes;
Expand All @@ -46,6 +54,7 @@ public LanceArrowToPageScanner(
ScannerFactory scannerFactory,
ArrowBlockBuilder arrowBlockBuilder)
{
this.allocator = requireNonNull(allocator, "allocator is null");
this.columns = requireNonNull(columns, "columns is null");
this.scannerFactory = requireNonNull(scannerFactory, "scannerFactory is null");
this.arrowBlockBuilder = requireNonNull(arrowBlockBuilder, "arrowBlockBuilder is null");
Expand Down Expand Up @@ -95,15 +104,191 @@ public Page convert()

int rowCount = root.getRowCount();
Block[] blocks = new Block[columns.size()];
List<FieldVector> coercedVectors = new ArrayList<>();

for (int col = 0; col < columns.size(); col++) {
LanceColumnHandle column = columns.get(col);
FieldVector vector = root.getVector(column.getColumnName());
Type type = column.getColumnType();
blocks[col] = arrowBlockBuilder.buildBlockFromFieldVector(vector, type, null);
try {
for (int col = 0; col < columns.size(); col++) {
LanceColumnHandle column = columns.get(col);
FieldVector vector = root.getVector(column.getColumnName());
Type type = column.getColumnType();
vector = coerceVector(vector, coercedVectors);
blocks[col] = arrowBlockBuilder.buildBlockFromFieldVector(vector, type, null);
}
return new Page(rowCount, blocks);
}
finally {
for (FieldVector v : coercedVectors) {
v.close();
}
}
}

/**
* Coerce unsupported Arrow vector types to types that ArrowBlockBuilder can handle.
* Tracks newly allocated vectors in coercedVectors for cleanup.
*
* - Float2Vector (float16) -> Float4Vector (float32)
* - UInt8Vector (uint64) -> BigIntVector (int64, treats as signed)
* - List/FixedSizeList containing Float2Vector -> widen inner data vector
*/
private FieldVector coerceVector(FieldVector vector, List<FieldVector> coercedVectors)
{
if (vector instanceof Float2Vector) {
Float4Vector widened = widenFloat2ToFloat4((Float2Vector) vector, allocator);
coercedVectors.add(widened);
return widened;
}
if (vector instanceof UInt8Vector) {
BigIntVector converted = convertUInt8ToBigInt((UInt8Vector) vector, allocator);
coercedVectors.add(converted);
return converted;
}
if (vector instanceof FixedSizeListVector) {
FixedSizeListVector fslVector = (FixedSizeListVector) vector;
FieldVector dataVector = fslVector.getDataVector();
if (dataVector instanceof Float2Vector) {
Float4Vector widened = widenFloat2ToFloat4((Float2Vector) dataVector, allocator);
coercedVectors.add(widened);
// Build a new FixedSizeListVector backed by the widened Float4Vector
FixedSizeListVector newFsl = buildFixedSizeListWithData(
fslVector.getName(), fslVector.getListSize(),
fslVector.getValueCount(), fslVector, widened);
coercedVectors.add(newFsl);
return newFsl;
}
}
if (vector instanceof ListVector) {
ListVector listVector = (ListVector) vector;
FieldVector dataVector = listVector.getDataVector();
if (dataVector instanceof Float2Vector) {
Float4Vector widened = widenFloat2ToFloat4((Float2Vector) dataVector, allocator);
coercedVectors.add(widened);
// Build a new ListVector backed by the widened Float4Vector
ListVector newList = buildListWithData(
listVector.getName(), listVector, widened);
coercedVectors.add(newList);
return newList;
}
}
return vector;
}

static Float4Vector widenFloat2ToFloat4(Float2Vector f2v, BufferAllocator allocator)
{
int valueCount = f2v.getValueCount();
Float4Vector f4v = new Float4Vector(f2v.getName(), allocator);
f4v.allocateNew(valueCount);
for (int i = 0; i < valueCount; i++) {
if (f2v.isNull(i)) {
f4v.setNull(i);
}
else {
f4v.set(i, f2v.getValueAsFloat(i));
}
}
f4v.setValueCount(valueCount);
return f4v;
}

return new Page(rowCount, blocks);
static BigIntVector convertUInt8ToBigInt(UInt8Vector uint8v, BufferAllocator allocator)
{
int valueCount = uint8v.getValueCount();
BigIntVector bigIntVector = new BigIntVector(uint8v.getName(), allocator);
bigIntVector.allocateNew(valueCount);
for (int i = 0; i < valueCount; i++) {
if (uint8v.isNull(i)) {
bigIntVector.setNull(i);
}
else {
bigIntVector.set(i, uint8v.get(i));
}
}
bigIntVector.setValueCount(valueCount);
return bigIntVector;
}

/**
* Build a new FixedSizeListVector using the validity from the original
* and the widened data vector as the inner data.
*/
private FixedSizeListVector buildFixedSizeListWithData(
String name, int listSize, int valueCount,
FixedSizeListVector original, Float4Vector widenedData)
{
FixedSizeListVector newFsl = FixedSizeListVector.empty(name, listSize, allocator);
newFsl.addOrGetVector(widenedData.getField().getFieldType());
newFsl.setInitialCapacity(valueCount);
newFsl.allocateNew();

// Copy validity bits from original
for (int i = 0; i < valueCount; i++) {
if (original.isNull(i)) {
newFsl.setNull(i);
}
else {
newFsl.setNotNull(i);
}
}
newFsl.setValueCount(valueCount);

// Copy widened float data into the new inner data vector
Float4Vector newData = (Float4Vector) newFsl.getDataVector();
newData.allocateNew(widenedData.getValueCount());
for (int i = 0; i < widenedData.getValueCount(); i++) {
if (widenedData.isNull(i)) {
newData.setNull(i);
}
else {
newData.set(i, widenedData.get(i));
}
}
newData.setValueCount(widenedData.getValueCount());
return newFsl;
}

/**
* Build a new ListVector using the offset buffer from the original
* and the widened data vector as the inner data.
*/
private ListVector buildListWithData(
String name, ListVector original, Float4Vector widenedData)
{
ListVector newList = ListVector.empty(name, allocator);
newList.addOrGetVector(widenedData.getField().getFieldType());
int valueCount = original.getValueCount();
newList.setInitialCapacity(valueCount);
newList.allocateNew();

// Copy offset buffer from original
ArrowBuf originalOffsets = original.getOffsetBuffer();
ArrowBuf newOffsets = newList.getOffsetBuffer();
newOffsets.setBytes(0, originalOffsets, 0, (long) (valueCount + 1) * ListVector.OFFSET_WIDTH);

// Copy validity bits
for (int i = 0; i < valueCount; i++) {
if (original.isNull(i)) {
newList.setNull(i);
}
else {
newList.setNotNull(i);
}
}
newList.setValueCount(valueCount);
newList.setLastSet(valueCount - 1);

// Copy widened data
Float4Vector newData = (Float4Vector) newList.getDataVector();
newData.allocateNew(widenedData.getValueCount());
for (int i = 0; i < widenedData.getValueCount(); i++) {
if (widenedData.isNull(i)) {
newData.setNull(i);
}
else {
newData.set(i, widenedData.get(i));
}
}
newData.setValueCount(widenedData.getValueCount());
return newList;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ else if (type instanceof ArrowType.Int) {
}
else if (type instanceof ArrowType.FloatingPoint) {
ArrowType.FloatingPoint fpType = (ArrowType.FloatingPoint) type;
if (fpType.getPrecision() == FloatingPointPrecision.SINGLE) {
if (fpType.getPrecision() == FloatingPointPrecision.HALF
|| fpType.getPrecision() == FloatingPointPrecision.SINGLE) {
return RealType.REAL;
}
return DoubleType.DOUBLE;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.lance;

import com.facebook.plugin.arrow.ArrowBlockBuilder;
import com.facebook.presto.common.block.Block;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.Float2Vector;
import org.apache.arrow.vector.Float4Vector;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;

import static com.facebook.presto.common.type.RealType.REAL;
import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;

@Test(singleThreaded = true)
public class TestFloat16Widening
{
private BufferAllocator allocator;
private ArrowBlockBuilder arrowBlockBuilder;

@BeforeMethod
public void setUp()
{
allocator = new RootAllocator(Long.MAX_VALUE);
arrowBlockBuilder = new ArrowBlockBuilder(createTestFunctionAndTypeManager());
}

@AfterMethod
public void tearDown()
{
allocator.close();
}

@Test
public void testWidenFloat2ToFloat4()
{
// Create a Float2Vector with test values
try (Float2Vector f2v = new Float2Vector("f16_col", allocator)) {
f2v.allocateNew(4);
f2v.setWithPossibleTruncate(0, 1.5f);
f2v.setWithPossibleTruncate(1, -2.25f);
f2v.setNull(2);
f2v.setWithPossibleTruncate(3, 0.0f);
f2v.setValueCount(4);

// Widen to Float4Vector
try (Float4Vector f4v = LanceArrowToPageScanner.widenFloat2ToFloat4(f2v, allocator)) {
assertEquals(f4v.getValueCount(), 4);
assertEquals(f4v.getName(), "f16_col");

assertEquals(f4v.get(0), 1.5f, 0.01f);
assertEquals(f4v.get(1), -2.25f, 0.01f);
assertTrue(f4v.isNull(2));
assertEquals(f4v.get(3), 0.0f, 0.01f);
}
}
}

@Test
public void testWidenedFloat4VectorProducesRealBlock()
{
// Verify the widened Float4Vector works with ArrowBlockBuilder
try (Float2Vector f2v = new Float2Vector("f16_col", allocator)) {
f2v.allocateNew(3);
f2v.setWithPossibleTruncate(0, 1.5f);
f2v.setWithPossibleTruncate(1, -2.25f);
f2v.setNull(2);
f2v.setValueCount(3);

try (Float4Vector f4v = LanceArrowToPageScanner.widenFloat2ToFloat4(f2v, allocator)) {
Block block = arrowBlockBuilder.buildBlockFromFieldVector(f4v, REAL, null);
assertEquals(block.getPositionCount(), 3);

float val0 = Float.intBitsToFloat((int) REAL.getLong(block, 0));
float val1 = Float.intBitsToFloat((int) REAL.getLong(block, 1));
assertEquals(val0, 1.5f, 0.01f);
assertEquals(val1, -2.25f, 0.01f);
assertTrue(block.isNull(2));
}
}
}

@Test
public void testWidenEmptyVector()
{
try (Float2Vector f2v = new Float2Vector("empty", allocator)) {
f2v.allocateNew(0);
f2v.setValueCount(0);

try (Float4Vector f4v = LanceArrowToPageScanner.widenFloat2ToFloat4(f2v, allocator)) {
assertEquals(f4v.getValueCount(), 0);
}
}
}

@Test
public void testWidenAllNulls()
{
try (Float2Vector f2v = new Float2Vector("nulls", allocator)) {
f2v.allocateNew(3);
f2v.setNull(0);
f2v.setNull(1);
f2v.setNull(2);
f2v.setValueCount(3);

try (Float4Vector f4v = LanceArrowToPageScanner.widenFloat2ToFloat4(f2v, allocator)) {
assertEquals(f4v.getValueCount(), 3);
assertTrue(f4v.isNull(0));
assertTrue(f4v.isNull(1));
assertTrue(f4v.isNull(2));
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public void testArrowToPrestoType()
assertEquals(LanceColumnHandle.toPrestoType(field("b", new ArrowType.Int(32, true))), INTEGER);
assertEquals(LanceColumnHandle.toPrestoType(field("c", new ArrowType.Int(64, true))), BIGINT);
assertEquals(LanceColumnHandle.toPrestoType(field("d", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE))), REAL);
assertEquals(LanceColumnHandle.toPrestoType(field("d2", new ArrowType.FloatingPoint(FloatingPointPrecision.HALF))), REAL);
assertEquals(LanceColumnHandle.toPrestoType(field("e", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE))), DOUBLE);
assertEquals(LanceColumnHandle.toPrestoType(field("f", ArrowType.Utf8.INSTANCE)), VARCHAR);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ public void testListTables()
new SchemaTableName("default", "test_table1"),
new SchemaTableName("default", "test_table2"),
new SchemaTableName("default", "test_table3"),
new SchemaTableName("default", "test_table4")));
new SchemaTableName("default", "test_table4"),
new SchemaTableName("default", "wide_types_table")));

// no schema filter
List<SchemaTableName> allTables = metadata.listTables(null, Optional.empty());
Expand Down
Loading
Loading