Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions build_release.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
rm -rf cpp-jni java-dist java-jni cpp/debug
mkdir cpp/debug
cd cpp/debug

arch -x86_64 cmake -DCMAKE_BUILD_TYPE=RELEASE -DARROW_GANDIVA=ON -DARROW_JEMALLOC=OFF -DARROW_GANDIVA_JAVA=ON -DARROW_BUILD_TESTS=OFF ..
arch -x86_64 make -j 8
if [ $? -ne 0 ]
then
echo "failed"
exit 1
fi

cd ../../
mkdir -p java-jni cpp-jni

arch -x86_64 cmake -S cpp -B cpp-jni -DARROW_BUILD_SHARED=OFF -DARROW_JEMALLOC=OFF -DARROW_CSV=ON -DARROW_DATASET=ON -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_DEPENDENCY_USE_SHARED=OFF -DARROW_FILESYSTEM=ON -DARROW_GANDIVA=ON -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON -DARROW_ORC=ON -DARROW_PARQUET=ON -DARROW_S3=ON -DARROW_USE_CCACHE=ON -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_LIBDIR=lib/x86_64 -DCMAKE_INSTALL_PREFIX=java-dist -DCMAKE_UNITY_BUILD=ON
arch -x86_64 cmake --build cpp-jni --target install --config Release
if [ $? -ne 0 ]
then
echo "failed"
exit 1
fi

arch -x86_64 cmake -S java -B java-jni -DARROW_JAVA_JNI_ENABLE_C=OFF -DARROW_JEMALLOC=OFF -DARROW_JAVA_JNI_ENABLE_DEFAULT=ON -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_LIBDIR=lib/x86_64 -DCMAKE_INSTALL_PREFIX=java-dist -DCMAKE_PREFIX_PATH=$PWD/java-dist/lib/x86_64/cmake
arch -x86_64 cmake --build java-jni --target install --config Release
if [ $? -ne 0 ]
then
echo "failed"
exit 1
fi

cd java
/opt/homebrew/bin/mvn -DskipTests -Darrow.c.jni.dist.dir=/Users/logan.riggs/github/arrow/java-dist/lib -Darrow.cpp.build.dir=/Users/logan.riggs/github/arrow/java-dist/lib -Parrow-jni clean install
cp gandiva/target/arrow-gandiva-12.0.1.jar /Users/logan.riggs/github/dremio/enterprise/distribution/server/target/dremio-enterprise-24.3.0-SNAPSHOT/dremio-enterprise-24.3.0-SNAPSHOT/jars/3rdparty/
38 changes: 38 additions & 0 deletions build_testing.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
rm -rf cpp-jni java-dist java-jni cpp/debug
mkdir cpp/debug
cd cpp/debug

echo "====CPP===="
arch -x86_64 cmake -DCMAKE_BUILD_TYPE=DEBUG -DARROW_GANDIVA=ON -DARROW_JEMALLOC=OFF -DARROW_GANDIVA_JAVA=ON -DARROW_BUILD_TESTS=ON ..
arch -x86_64 make -j 8
if [ $? -ne 0 ]
then
echo "failed"
exit 1
fi

cd ../../
mkdir -p java-jni cpp-jni

echo "====CPP-JNI===="
arch -x86_64 cmake -S cpp -B cpp-jni -DARROW_BUILD_SHARED=OFF -DARROW_JEMALLOC=OFF -DARROW_CSV=ON -DARROW_DATASET=ON -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_DEPENDENCY_USE_SHARED=OFF -DARROW_FILESYSTEM=ON -DARROW_GANDIVA=ON -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON -DARROW_ORC=ON -DARROW_PARQUET=ON -DARROW_S3=ON -DARROW_USE_CCACHE=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_LIBDIR=lib/x86_64 -DCMAKE_INSTALL_PREFIX=java-dist -DCMAKE_UNITY_BUILD=ON
arch -x86_64 cmake --build cpp-jni --target install --config Debug
if [ $? -ne 0 ]
then
echo "failed"
exit 1
fi

echo "====JAVA-JNI===="
arch -x86_64 cmake -S java -B java-jni -DARROW_JAVA_JNI_ENABLE_C=OFF -DARROW_JEMALLOC=OFF -DARROW_JAVA_JNI_ENABLE_DEFAULT=ON -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_LIBDIR=lib/x86_64 -DCMAKE_INSTALL_PREFIX=java-dist -DCMAKE_PREFIX_PATH=$PWD/java-dist/lib/x86_64/cmake -DArrowTesting_DIR=$PWD/cpp/debug/src/arrow
arch -x86_64 cmake --build java-jni --target install --config Debug
if [ $? -ne 0 ]
then
echo "failed"
exit 1
fi

echo "====JARS===="
cd java
/opt/homebrew/bin/mvn -DskipTests -Darrow.c.jni.dist.dir=/Users/logan.riggs/github/arrow-fork/arrow/java-dist/lib -Darrow.cpp.build.dir=/Users/logan.riggs/github/arrow-fork/arrow/java-dist/lib -Parrow-jni clean install
cp java/gandiva/target/arrow-gandiva-12.0.1.jar /Users/logan.riggs/github/dremio/enterprise/distribution/server/target/dremio-enterprise-24.3.0-SNAPSHOT/dremio-enterprise-24.3.0-SNAPSHOT/jars/3rdparty/
15 changes: 13 additions & 2 deletions cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,10 +444,21 @@ class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
return Reserve(sizeof(T) * new_nb_elements);
}

public:
uint8_t* offsetBuffer;
int64_t offsetCapacity;

protected:
ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {}
ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {
offsetBuffer = nullptr;
offsetCapacity = 0;

}
ResizableBuffer(uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm)
: MutableBuffer(data, size, std::move(mm)) {}
: MutableBuffer(data, size, std::move(mm)) {
offsetBuffer = nullptr;
offsetCapacity = 0;
}
};

/// \defgroup buffer-allocation-functions Functions for allocating buffers
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/c/bridge.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ struct SchemaExporter {
}

Status ExportSchema(const Schema& schema) {
static const StructType dummy_struct_type({});
static const StructType dummy_struct_type = StructType();
flags_ = 0;

RETURN_NOT_OK(ExportFormat(dummy_struct_type));
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,10 @@ StructType::StructType(const std::vector<std::shared_ptr<Field>>& fields)
children_ = fields;
}

StructType::StructType()
: NestedType(Type::STRUCT) {
}

StructType::~StructType() {}

std::string StructType::ToString() const {
Expand Down Expand Up @@ -2527,6 +2531,7 @@ TYPE_FACTORY(float16, HalfFloatType)
TYPE_FACTORY(float32, FloatType)
TYPE_FACTORY(float64, DoubleType)
TYPE_FACTORY(utf8, StringType)
TYPE_FACTORY(structType, StructType)
TYPE_FACTORY(large_utf8, LargeStringType)
TYPE_FACTORY(binary, BinaryType)
TYPE_FACTORY(large_binary, LargeBinaryType)
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,9 @@ class ARROW_EXPORT StructType : public NestedType {
static constexpr const char* type_name() { return "struct"; }

explicit StructType(const std::vector<std::shared_ptr<Field>>& fields);
explicit StructType();
StructType(const StructType& rhs) = delete;
StructType& operator=(const StructType& rhs) = delete;

~StructType() override;

Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/type_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ class StringArray;
class StringBuilder;
struct StringScalar;

class StructType;
class StructArray;
class StructBuilder;
struct StructScalar;

class LargeStringType;
class LargeStringArray;
class LargeStringBuilder;
Expand Down Expand Up @@ -454,6 +459,7 @@ ARROW_EXPORT const std::shared_ptr<DataType>& float32();
ARROW_EXPORT const std::shared_ptr<DataType>& float64();
/// \brief Return a StringType instance
ARROW_EXPORT const std::shared_ptr<DataType>& utf8();
ARROW_EXPORT const std::shared_ptr<DataType>& structType();
/// \brief Return a LargeStringType instance
ARROW_EXPORT const std::shared_ptr<DataType>& large_utf8();
/// \brief Return a BinaryType instance
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/gandiva/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,11 @@ set(SRC_FILES
expression_registry.cc
exported_funcs_registry.cc
filter.cc
array_ops.cc
function_ir_builder.cc
function_registry.cc
function_registry_arithmetic.cc
function_registry_array.cc
function_registry_datetime.cc
function_registry_hash.cc
function_registry_math_ops.cc
Expand Down Expand Up @@ -249,7 +251,8 @@ add_gandiva_test(internals-test
random_generator_holder_test.cc
hash_utils_test.cc
gdv_function_stubs_test.cc
interval_holder_test.cc)
interval_holder_test.cc
array_ops_test.cc)

add_subdirectory(precompiled)
add_subdirectory(tests)
99 changes: 92 additions & 7 deletions cpp/src/gandiva/annotator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "gandiva/annotator.h"

#include <iostream>
#include <memory>
#include <string>

Expand Down Expand Up @@ -46,15 +47,24 @@ FieldDescriptorPtr Annotator::MakeDesc(FieldPtr field, bool is_output) {
int data_idx = buffer_count_++;
int validity_idx = buffer_count_++;
int offsets_idx = FieldDescriptor::kInvalidIdx;
int child_offsets_idx = FieldDescriptor::kInvalidIdx;
if (arrow::is_binary_like(field->type()->id())) {
offsets_idx = buffer_count_++;
}

if (field->type()->id() == arrow::Type::LIST) {
std::cout << "LR Annotator::MakeDesc 1" << std::endl;
offsets_idx = buffer_count_++;
if (arrow::is_binary_like(field->type()->field(0)->type()->id())) {
child_offsets_idx = buffer_count_++;
}
}
int data_buffer_ptr_idx = FieldDescriptor::kInvalidIdx;
if (is_output) {
data_buffer_ptr_idx = buffer_count_++;
}
return std::make_shared<FieldDescriptor>(field, data_idx, validity_idx, offsets_idx,
data_buffer_ptr_idx);
data_buffer_ptr_idx, child_offsets_idx);
}

int Annotator::AddHolderPointer(void* holder) {
Expand All @@ -71,33 +81,98 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc,
// The validity buffer is optional. Use nullptr if it does not have one.
if (array_data.buffers[buffer_idx]) {
uint8_t* validity_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -6 " << &validity_buf << std::endl;
eval_batch->SetBuffer(desc.validity_idx(), validity_buf, array_data.offset);
} else {
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -5 null " << std::endl;
eval_batch->SetBuffer(desc.validity_idx(), nullptr, array_data.offset);
}
++buffer_idx;

if (desc.HasOffsetsIdx()) {
uint8_t* offsets_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -4 " << &offsets_buf << " using idx=" << buffer_idx << std::endl;
eval_batch->SetBuffer(desc.offsets_idx(), offsets_buf, array_data.offset);
++buffer_idx;

if (desc.HasChildOffsetsIdx()) {
//std::cout << "LR Annotator::PrepareBuffersForField 1 for field " << desc.Name() << " type is " << array_data.type->id() << std::endl;
if (is_output) {
// if list field is output field, we should put buffer pointer into eval batch
// for resizing
uint8_t* child_offsets_buf = reinterpret_cast<uint8_t*>(
array_data.child_data.at(0)->buffers[buffer_idx].get());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -3 " << &child_offsets_buf << std::endl;
eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_offsets_buf,
array_data.child_data.at(0)->offset);
} else {
//std::cout << "LR Annotator::PrepareBuffersForField 2" << std::endl;
// if list field is input field, just put buffer data into eval batch
uint8_t* child_offsets_buf = const_cast<uint8_t*>(
array_data.child_data.at(0)->buffers[buffer_idx]->data());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -2 " << &child_offsets_buf << std::endl;
eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_offsets_buf,
array_data.child_data.at(0)->offset);
}
}
if (array_data.type->id() != arrow::Type::LIST ||
arrow::is_binary_like(array_data.type->field(0)->type()->id())) {
//std::cout << "LR Annotator::PrepareBuffersForField 3" << std::endl;

// primitive type list data buffer index is 1
// binary like type list data buffer index is 2
++buffer_idx;
}
}

if (array_data.type->id() != arrow::Type::LIST) {
//std::cout << "LR Annotator::PrepareBuffersForField 4" << std::endl;

//std::cout << "LR Annotator::PrepareBuffersForField 4 buffer_idx " << buffer_idx << std::endl;
uint8_t* data_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
//std::cout << "LR Annotator::PrepareBuffersForField 4a" << std::endl;
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -1 " << &data_buf << std::endl;
eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset);
//std::cout << "LR Annotator::PrepareBuffersForField 4b" << std::endl;
} else {
//std::cout << "LR Annotator::PrepareBuffersForField 5 " << desc.Name() << " buffer_idx " << buffer_idx << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 5 array_data child size " << array_data.child_data.size() << std::endl;

uint8_t* data_buf =
const_cast<uint8_t*>(array_data.child_data.at(0)->buffers[buffer_idx]->data());
std::cout << "LR Annotator::PrepareBuffersForField setting offset eval buffer idx=" << buffer_idx << " data=" << &data_buf << std::endl;
eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.child_data.at(0)->offset);
//std::cout << "LR Annotator::PrepareBuffersForField 5a" << std::endl;
}

uint8_t* data_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset);
if (is_output) {
// pass in the Buffer object for output data buffers. Can be used for resizing.
uint8_t* data_buf_ptr =
reinterpret_cast<uint8_t*>(array_data.buffers[buffer_idx].get());
eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.offset);

if (array_data.type->id() != arrow::Type::LIST) {
uint8_t* data_buf_ptr =
reinterpret_cast<uint8_t*>(array_data.buffers[buffer_idx].get());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer 1 " << &data_buf_ptr << std::endl;
eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.offset);
} else {
//std::cout << "LR Annotator::PrepareBuffersForField is_output index " << desc.data_buffer_ptr_idx() << std::endl;

// list data buffer is in child data buffer
uint8_t* data_buf_ptr = reinterpret_cast<uint8_t*>(
array_data.child_data.at(0)->buffers[buffer_idx].get());
std::cout << "LR Annotator::PrepareBuffersForField setting eval data buffer " << buffer_idx << " data=" << &data_buf_ptr << std::endl;

eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr,
array_data.child_data.at(0)->offset);
}
}

}

EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch,
const ArrayDataVector& out_vector) const {
EvalBatchPtr eval_batch = std::make_shared<EvalBatch>(
record_batch.num_rows(), buffer_count_, local_bitmap_count_);

//std::cout << "LR PrepareEvalBatch 1" << std::endl;
// Fill in the entries for the input fields.
for (int i = 0; i < record_batch.num_columns(); ++i) {
const std::string& name = record_batch.column_name(i);
Expand All @@ -107,17 +182,27 @@ EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch,
continue;
}

/*std::cout << "LR PrepareEvalBatch 1a i=" << i << " record batch schema " << record_batch.schema()->ToString()
<< " num rows " << record_batch.num_rows()
<< " num columns " << record_batch.num_columns()
<< " data size " << record_batch.column_data().size()
<< " col 1 " << record_batch.column(0)->ToString()
<< std::endl;*/

//std::cout << "LR PrepareEvalBatch 1a i=" << i << " record batch data " << record_batch.ToString() << std::endl;
PrepareBuffersForField(*(found->second), *(record_batch.column_data(i)),
eval_batch.get(), false /*is_output*/);
}

// Fill in the entries for the output fields.
//std::cout << "LR PrepareEvalBatch preparing output fields" << std::endl;
int idx = 0;
for (auto& arraydata : out_vector) {
const FieldDescriptorPtr& desc = out_descs_.at(idx);
PrepareBuffersForField(*desc, *arraydata, eval_batch.get(), true /*is_output*/);
++idx;
}
//std::cout << "LR PrepareEvalBatch 2" << std::endl;
return eval_batch;
}

Expand Down
Loading