Skip to content
Merged
37 changes: 37 additions & 0 deletions arrow/src/main/java/org/apache/iceberg/arrow/ArrowAllocation.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.arrow;

import org.apache.arrow.memory.RootAllocator;

public class ArrowAllocation {
static {
ROOT_ALLOCATOR = new RootAllocator(Long.MAX_VALUE);
}

private static final RootAllocator ROOT_ALLOCATOR;

private ArrowAllocation() {
}

public static RootAllocator rootAllocator() {
return ROOT_ALLOCATOR;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.holders.NullableVarCharHolder;

/**
* The general way of getting a value at an index in the Arrow vector
Expand Down Expand Up @@ -64,38 +64,9 @@ public void setNullabilityHolder(NullabilityHolder nullabilityHolder) {
}
}

/**
* Extension of Arrow's @{@link VarBinaryVector}. The whole reason of having this implementation is to override the
* expensive {@link VarBinaryVector#isSet(int)} method.
*/
public static class VarBinaryArrowVector extends VarBinaryVector {
private NullabilityHolder nullabilityHolder;

public VarBinaryArrowVector(
String name,
BufferAllocator allocator) {
super(name, allocator);
}

/**
* Same as {@link #isNull(int)}.
*
* @param index position of element
* @return 1 if element at given index is not null, 0 otherwise
*/
@Override
public int isSet(int index) {
return nullabilityHolder.isNullAt(index) ^ 1;
}

public void setNullabilityHolder(NullabilityHolder nullabilityHolder) {
this.nullabilityHolder = nullabilityHolder;
}
}

/**
* Extension of Arrow's @{@link VarCharVector}. The reason of having this implementation is to override the expensive
* {@link VarCharVector#isSet(int)} method.
* {@link VarCharVector#isSet(int)} method called by {@link VarCharVector#get(int, NullableVarCharHolder)}
*/
public static class VarcharArrowVector extends VarCharVector {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,35 +19,48 @@

package org.apache.iceberg.arrow.vectorized;

import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.FieldVector;
import org.apache.iceberg.types.Type;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.Dictionary;

/**
* Container class for holding the Arrow vector holding a batch of values along with other state needed for reading
* Container class for holding the Arrow vector storing a batch of values along with other state needed for reading
* values out of it.
*/
public class VectorHolder {
private final ColumnDescriptor columnDescriptor;
private final FieldVector vector;
private final boolean isDictionaryEncoded;

private final Dictionary dictionary;
private final NullabilityHolder nullabilityHolder;

public static final VectorHolder NULL_VECTOR_HOLDER = new VectorHolder(null, null, false, null, null);
private final Type icebergType;

public VectorHolder(
ColumnDescriptor columnDescriptor,
FieldVector vector,
boolean isDictionaryEncoded,
Dictionary dictionary,
NullabilityHolder holder) {
ColumnDescriptor columnDescriptor, FieldVector vector, boolean isDictionaryEncoded,
Dictionary dictionary, NullabilityHolder holder, Type type) {
// All the fields except dictionary are not nullable unless it is a dummy holder
Preconditions.checkNotNull(columnDescriptor, "ColumnDescriptor cannot be null");
Preconditions.checkNotNull(vector, "Vector cannot be null");
Preconditions.checkNotNull(holder, "NullabilityHolder cannot be null");
Preconditions.checkNotNull(type, "IcebergType cannot be null");
this.columnDescriptor = columnDescriptor;
this.vector = vector;
this.isDictionaryEncoded = isDictionaryEncoded;
this.dictionary = dictionary;
this.nullabilityHolder = holder;
this.icebergType = type;
}

// Only used for returning dummy holder
private VectorHolder() {
columnDescriptor = null;
vector = null;
isDictionaryEncoded = false;
dictionary = null;
nullabilityHolder = null;
icebergType = null;
}

public ColumnDescriptor descriptor() {
Expand All @@ -69,4 +82,26 @@ public Dictionary dictionary() {
public NullabilityHolder nullabilityHolder() {
return nullabilityHolder;
}

public Type icebergType() {
return icebergType;
}

public int numValues() {
return vector.getValueCount();
}

public static VectorHolder dummyHolder(int numRows) {
return new VectorHolder() {
@Override
public int numValues() {
return numRows;
}
};
}

public boolean isDummy() {
return vector == null;
}

}
Loading