diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 9b9faeb4495..5f494f3bc7b 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -36,6 +36,7 @@ #include #include +#include template typename ArrowType::c_type @@ -160,6 +161,9 @@ G_BEGIN_DECLS * @title: Computation on data * @include: arrow-glib/arrow-glib.h * + * You must call garrow_compute_initialize() explicitly before you use + * computation related features. + * * #GArrowExecuteContext is a class to customize how to execute a * function. * @@ -250,6 +254,25 @@ G_BEGIN_DECLS * There are many functions to compute data on an array. */ +/** + * garrow_compute_initialize: + * @error: (nullable): Return location for a #GError or %NULL. + * + * You must call this explicitly before you use computation related + * features. + * + * Returns: %TRUE if initializing the compute module completed successfully, + * %FALSE otherwise. + * + * Since: 21.0.0 + */ +gboolean +garrow_compute_initialize(GError **error) +{ + auto status = arrow::compute::Initialize(); + return garrow::check(error, status, "[compute][initialize]"); +} + typedef struct GArrowExecuteContextPrivate_ { arrow::compute::ExecContext context; diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 54b0ddb014f..0f689d147e3 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -25,6 +25,10 @@ G_BEGIN_DECLS +GARROW_AVAILABLE_IN_21_0 +gboolean +garrow_compute_initialize(GError **error); + #define GARROW_TYPE_EXECUTE_CONTEXT (garrow_execute_context_get_type()) GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build index ff52aedf003..b755ffb56ac 100644 --- a/c_glib/arrow-glib/meson.build +++ b/c_glib/arrow-glib/meson.build @@ -223,7 +223,7 @@ gio = cxx.find_library('gio-2.0', dirs: [gobject_libdir], required: false) if not gio.found() gio = dependency('gio-2.0') endif -dependencies = [arrow, arrow_acero, gobject, gio] +dependencies = [arrow_acero, arrow_compute, arrow, gobject, gio] libarrow_glib = library( 'arrow-glib', sources: sources + enums, diff --git a/c_glib/meson.build b/c_glib/meson.build index d783cb399e8..b3f62ab3010 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -147,7 +147,13 @@ if arrow_cpp_build_lib_dir == '' modules: ['ArrowCUDA::arrow_cuda_shared'], required: false, ) - # we do not support compiling GLib without Acero engine + # we do not support compiling GLib without Compute and Acero engine + arrow_compute = dependency( + 'arrow-compute', + 'ArrowCompute', + kwargs: common_args, + modules: ['ArrowCompute::arrow_compute_shared'], + ) arrow_acero = dependency( 'arrow-acero', 'ArrowAcero', @@ -215,6 +221,11 @@ main(void) dirs: [arrow_cpp_build_lib_dir], required: false, ) + arrow_compute = cpp_compiler.find_library( + 'arrow_compute', + dirs: [arrow_cpp_build_lib_dir], + required: true, + ) arrow_acero = cpp_compiler.find_library( 'arrow_acero', dirs: [arrow_cpp_build_lib_dir], diff --git a/c_glib/test/run-test.rb b/c_glib/test/run-test.rb index 46d2ebe3f6e..9fdcdcdce0e 100755 --- a/c_glib/test/run-test.rb +++ b/c_glib/test/run-test.rb @@ -31,6 +31,7 @@ Gio = GI.load("Gio") Arrow = GI.load("Arrow") +Arrow.compute_initialize module Arrow class Buffer alias_method :initialize_raw, :initialize diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9854ac4a32c..228316006c0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -553,6 +553,13 @@ if(ARROW_BUILD_STATIC) string(APPEND ARROW_ACERO_PC_CFLAGS_PRIVATE " -DARROW_ACERO_STATIC") endif() +# For arrow-compute.pc. +set(ARROW_COMPUTE_PC_CFLAGS "") +set(ARROW_COMPUTE_PC_CFLAGS_PRIVATE "") +if(ARROW_BUILD_STATIC) + string(APPEND ARROW_COMPUTE_PC_CFLAGS_PRIVATE " -DARROW_COMPUTE_STATIC") +endif() + # For arrow-cuda.pc. set(ARROW_CUDA_PC_CFLAGS "") set(ARROW_CUDA_PC_CFLAGS_PRIVATE "") diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt index bf0748f5501..87b677f3b57 100644 --- a/cpp/examples/arrow/CMakeLists.txt +++ b/cpp/examples/arrow/CMakeLists.txt @@ -43,7 +43,13 @@ if(ARROW_SUBSTRAIT) endif() if(ARROW_COMPUTE AND ARROW_CSV) - add_arrow_example(compute_and_write_csv_example) + if(ARROW_BUILD_SHARED) + set(COMPUTE_KERNELS_LINK_LIBS arrow_compute_shared) + else() + set(COMPUTE_KERNELS_LINK_LIBS arrow_compute_static) + endif() + add_arrow_example(compute_and_write_csv_example EXTRA_LINK_LIBS + ${COMPUTE_KERNELS_LINK_LIBS}) endif() if(ARROW_FLIGHT) diff --git a/cpp/examples/arrow/compute_and_write_csv_example.cc b/cpp/examples/arrow/compute_and_write_csv_example.cc index 7e0f6cdf1ce..234d6abf570 100644 --- a/cpp/examples/arrow/compute_and_write_csv_example.cc +++ b/cpp/examples/arrow/compute_and_write_csv_example.cc @@ -41,6 +41,7 @@ // in the current directory arrow::Status RunMain(int argc, char** argv) { + ARROW_RETURN_NOT_OK(arrow::compute::Initialize()); // Make Arrays arrow::NumericBuilder int64_builder; arrow::BooleanBuilder boolean_builder; diff --git a/cpp/examples/arrow/join_example.cc b/cpp/examples/arrow/join_example.cc index c1c6e5e82ff..738420d48e1 100644 --- a/cpp/examples/arrow/join_example.cc +++ b/cpp/examples/arrow/join_example.cc @@ -82,6 +82,7 @@ arrow::Result> CreateDataSetFromCSVData } arrow::Status DoHashJoin() { + ARROW_RETURN_NOT_OK(arrow::compute::Initialize()); arrow::dataset::internal::Initialize(); ARROW_ASSIGN_OR_RAISE(auto l_dataset, CreateDataSetFromCSVData(true)); diff --git a/cpp/examples/tutorial_examples/CMakeLists.txt b/cpp/examples/tutorial_examples/CMakeLists.txt index a6f8350c41d..1466bce48af 100644 --- a/cpp/examples/tutorial_examples/CMakeLists.txt +++ b/cpp/examples/tutorial_examples/CMakeLists.txt @@ -37,7 +37,7 @@ target_link_libraries(file_access_example PRIVATE Arrow::arrow_shared Parquet::parquet_shared) add_executable(compute_example compute_example.cc) -target_link_libraries(compute_example PRIVATE Arrow::arrow_shared) +target_link_libraries(compute_example PRIVATE ArrowCompute::arrow_compute_shared) add_executable(dataset_example dataset_example.cc) target_link_libraries(dataset_example PRIVATE ArrowDataset::arrow_dataset_shared) diff --git a/cpp/examples/tutorial_examples/compute_example.cc b/cpp/examples/tutorial_examples/compute_example.cc index 3a65214c0ef..767719c52b0 100644 --- a/cpp/examples/tutorial_examples/compute_example.cc +++ b/cpp/examples/tutorial_examples/compute_example.cc @@ -49,6 +49,9 @@ arrow::Status RunMain() { schema = arrow::schema({field_a, field_b}); + // Initialize the compute module to register the required compute kernels. + ARROW_RETURN_NOT_OK(arrow::compute::Initialize()); + std::shared_ptr table; table = arrow::Table::Make(schema, {some_nums, more_nums}, 5); // (Doc section: Create Tables) diff --git a/cpp/examples/tutorial_examples/dataset_example.cc b/cpp/examples/tutorial_examples/dataset_example.cc index a980fa54939..c32cf6ec4c6 100644 --- a/cpp/examples/tutorial_examples/dataset_example.cc +++ b/cpp/examples/tutorial_examples/dataset_example.cc @@ -19,6 +19,7 @@ // (Doc section: Includes) #include +#include #include // We use Parquet headers for setting up examples; they are not required for using // datasets. @@ -75,6 +76,8 @@ arrow::Result CreateExampleParquetDataset( } arrow::Status PrepareEnv() { + // Initilize the compute module to register the required kernels for Dataset + ARROW_RETURN_NOT_OK(arrow::compute::Initialize()); // Get our environment prepared for reading, by setting up some quick writing. ARROW_ASSIGN_OR_RAISE(auto src_table, CreateTable()) std::shared_ptr setup_fs; diff --git a/cpp/src/arrow/ArrowComputeConfig.cmake.in b/cpp/src/arrow/ArrowComputeConfig.cmake.in new file mode 100644 index 00000000000..f38c776c8c8 --- /dev/null +++ b/cpp/src/arrow/ArrowComputeConfig.cmake.in @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# This config sets the following variables in your project:: +# +# ArrowCompute_FOUND - true if Arrow Compute found on the system +# +# This config sets the following targets in your project:: +# +# ArrowCompute::arrow_compute_shared - for linked as shared library if shared library is built +# ArrowCompute::arrow_compute_static - for linked as static library if static library is built + +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(Arrow CONFIG) + +include("${CMAKE_CURRENT_LIST_DIR}/ArrowComputeTargets.cmake") + +arrow_keep_backward_compatibility(ArrowCompute arrow_compute) + +check_required_components(ArrowCompute) + +arrow_show_details(ArrowCompute ARROW_COMPUTE) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 77558726986..44df72cea3e 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -718,7 +718,6 @@ set(ARROW_COMPUTE_SRCS compute/registry.cc compute/kernels/chunked_internal.cc compute/kernels/codegen_internal.cc - compute/kernels/ree_util_internal.cc compute/kernels/scalar_cast_boolean.cc compute/kernels/scalar_cast_dictionary.cc compute/kernels/scalar_cast_extension.cc @@ -727,7 +726,6 @@ set(ARROW_COMPUTE_SRCS compute/kernels/scalar_cast_numeric.cc compute/kernels/scalar_cast_string.cc compute/kernels/scalar_cast_temporal.cc - compute/kernels/util_internal.cc compute/kernels/vector_hash.cc compute/kernels/vector_selection.cc compute/kernels/vector_selection_filter_internal.cc @@ -735,9 +733,18 @@ set(ARROW_COMPUTE_SRCS compute/kernels/vector_selection_take_internal.cc) if(ARROW_COMPUTE) + # If libarrow_compute.a is only built, "pkg-config --cflags --libs + # arrow-compute" outputs build flags for static linking not shared + # linking. ARROW_COMPUTE_PC_* except ARROW_COMPUTE_PC_*_PRIVATE are for + # the static linking case. + if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND ARROW_COMPUTE_PC_CFLAGS "${ARROW_COMPUTE_PC_CFLAGS_PRIVATE}") + set(ARROW_COMPUTE_PC_CFLAGS_PRIVATE "") + endif() # Include the remaining kernels list(APPEND - ARROW_COMPUTE_SRCS + ARROW_COMPUTE_LIB_SRCS + compute/initialize.cc compute/kernels/aggregate_basic.cc compute/kernels/aggregate_mode.cc compute/kernels/aggregate_pivot.cc @@ -748,6 +755,7 @@ if(ARROW_COMPUTE) compute/kernels/hash_aggregate_numeric.cc compute/kernels/hash_aggregate_pivot.cc compute/kernels/pivot_internal.cc + compute/kernels/ree_util_internal.cc compute/kernels/scalar_arithmetic.cc compute/kernels/scalar_boolean.cc compute/kernels/scalar_compare.cc @@ -761,6 +769,7 @@ if(ARROW_COMPUTE) compute/kernels/scalar_temporal_binary.cc compute/kernels/scalar_temporal_unary.cc compute/kernels/scalar_validity.cc + compute/kernels/util_internal.cc compute/kernels/vector_array_sort.cc compute/kernels/vector_cumulative_ops.cc compute/kernels/vector_nested.cc @@ -783,40 +792,89 @@ if(ARROW_COMPUTE) compute/util.cc compute/util_internal.cc) - append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx2.cc) - append_runtime_avx512_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx512.cc) - append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc) - append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc) - append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc) - append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc) - append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc) - -endif() + append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/kernels/aggregate_basic_avx2.cc) + append_runtime_avx512_src(ARROW_COMPUTE_LIB_SRCS + compute/kernels/aggregate_basic_avx512.cc) + append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/key_hash_internal_avx2.cc) + append_runtime_avx2_bmi2_src(ARROW_COMPUTE_LIB_SRCS compute/key_map_internal_avx2.cc) + append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/row/compare_internal_avx2.cc) + append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/row/encode_internal_avx2.cc) + append_runtime_avx2_bmi2_src(ARROW_COMPUTE_LIB_SRCS compute/util_avx2.cc) + + set(ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS) + set(ARROW_COMPUTE_SHARED_LINK_LIBS) + set(ARROW_COMPUTE_STATIC_LINK_LIBS) + set(ARROW_COMPUTE_STATIC_INSTALL_INTERFACE_LIBS) + set(ARROW_COMPUTE_SHARED_INSTALL_INTERFACE_LIBS) + + list(APPEND ARROW_COMPUTE_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static) + list(APPEND ARROW_COMPUTE_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS arrow_static) + list(APPEND ARROW_COMPUTE_SHARED_LINK_LIBS arrow_shared) + + if(ARROW_USE_BOOST) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS Boost::headers) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS Boost::headers) + endif() + if(ARROW_USE_XSIMD) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS ${ARROW_XSIMD}) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD}) + endif() + if(ARROW_WITH_OPENTELEMETRY) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) + endif() + if(ARROW_WITH_RE2) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS re2::re2) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS re2::re2) + endif() + if(ARROW_WITH_UTF8PROC) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS utf8proc::utf8proc) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS utf8proc::utf8proc) + endif() -arrow_add_object_library(ARROW_COMPUTE ${ARROW_COMPUTE_SRCS}) -if(ARROW_USE_BOOST) - foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) - target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE Boost::headers) + add_arrow_lib(arrow_compute + CMAKE_PACKAGE_NAME + ArrowCompute + PKG_CONFIG_NAME + arrow-compute + SHARED_LINK_LIBS + ${ARROW_COMPUTE_SHARED_LINK_LIBS} + SHARED_PRIVATE_LINK_LIBS + ${ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS} + SHARED_INSTALL_INTERFACE_LIBS + ${ARROW_COMPUTE_SHARED_INSTALL_INTERFACE_LIBS} + STATIC_LINK_LIBS + ${ARROW_COMPUTE_STATIC_LINK_LIBS} + STATIC_INSTALL_INTERFACE_LIBS + ${ARROW_COMPUTE_STATIC_INSTALL_INTERFACE_LIBS} + OUTPUTS + ARROW_COMPUTE_LIBRARIES + SOURCES + ${ARROW_COMPUTE_LIB_SRCS} + SHARED_LINK_FLAGS + ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt + ) + foreach(LIB_TARGET ${ARROW_COMPUTE_LIBRARIES}) + target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_COMPUTE_EXPORTING) endforeach() + + if(ARROW_BUILD_STATIC AND WIN32) + target_compile_definitions(arrow_compute_static PUBLIC ARROW_COMPUTE_STATIC) + endif() endif() + +arrow_add_object_library(ARROW_COMPUTE_CORE ${ARROW_COMPUTE_SRCS}) + if(ARROW_USE_XSIMD) - foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) - target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE ${ARROW_XSIMD}) + foreach(ARROW_COMPUTE_CORE_TARGET ${ARROW_COMPUTE_CORE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_CORE_TARGET} PRIVATE ${ARROW_XSIMD}) endforeach() endif() if(ARROW_WITH_OPENTELEMETRY) - foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) - target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) - endforeach() -endif() -if(ARROW_WITH_RE2) - foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) - target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE re2::re2) - endforeach() -endif() -if(ARROW_WITH_UTF8PROC) - foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) - target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE utf8proc::utf8proc) + foreach(ARROW_COMPUTE_CORE_TARGET ${ARROW_COMPUTE_CORE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_CORE_TARGET} + PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) endforeach() endif() @@ -1025,7 +1083,7 @@ add_arrow_lib(arrow ${ARROW_SHARED_LINK_FLAGS} SHARED_PRIVATE_LINK_LIBS ${ARROW_ARRAY_TARGET_SHARED} - ${ARROW_COMPUTE_TARGET_SHARED} + ${ARROW_COMPUTE_CORE_TARGET_SHARED} ${ARROW_CSV_TARGET_SHARED} ${ARROW_FILESYSTEM_TARGET_SHARED} ${ARROW_INTEGRATION_TARGET_SHARED} @@ -1041,7 +1099,7 @@ add_arrow_lib(arrow ${ARROW_SYSTEM_LINK_LIBS} STATIC_LINK_LIBS ${ARROW_ARRAY_TARGET_STATIC} - ${ARROW_COMPUTE_TARGET_STATIC} + ${ARROW_COMPUTE_CORE_TARGET_STATIC} ${ARROW_CSV_TARGET_STATIC} ${ARROW_FILESYSTEM_TARGET_STATIC} ${ARROW_INTEGRATION_TARGET_STATIC} diff --git a/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in b/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in index fbb7a2bcafa..47488e8ac86 100644 --- a/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in +++ b/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in @@ -26,8 +26,12 @@ @PACKAGE_INIT@ +set(ARROW_ACERO_REQUIRED_DEPENDENCIES "@ARROW_ACERO_REQUIRED_DEPENDENCIES@") + include(CMakeFindDependencyMacro) -find_dependency(Arrow CONFIG) +foreach(dependency ${ARROW_ACERO_REQUIRED_DEPENDENCIES}) + find_dependency(${dependency} CONFIG) +endforeach() include("${CMAKE_CURRENT_LIST_DIR}/ArrowAceroTargets.cmake") diff --git a/cpp/src/arrow/acero/CMakeLists.txt b/cpp/src/arrow/acero/CMakeLists.txt index c3b08af84e0..37e00fd2566 100644 --- a/cpp/src/arrow/acero/CMakeLists.txt +++ b/cpp/src/arrow/acero/CMakeLists.txt @@ -28,6 +28,9 @@ if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) set(ARROW_ACERO_PC_CFLAGS_PRIVATE "") endif() +set(ARROW_ACERO_PKG_CONFIG_REQUIRES "arrow-compute") +set(ARROW_ACERO_REQUIRED_DEPENDENCIES Arrow ArrowCompute) + set(ARROW_ACERO_SRCS accumulation_queue.cc scalar_aggregate_node.cc @@ -73,10 +76,12 @@ if(ARROW_WITH_OPENTELEMETRY) list(APPEND ARROW_ACERO_STATIC_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) endif() -list(APPEND ARROW_ACERO_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static) -list(APPEND ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared) -list(APPEND ARROW_ACERO_STATIC_LINK_LIBS arrow_static) -list(APPEND ARROW_ACERO_SHARED_LINK_LIBS arrow_shared) +list(APPEND ARROW_ACERO_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static + ArrowCompute::arrow_compute_static) +list(APPEND ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared + ArrowCompute::arrow_compute_shared) +list(APPEND ARROW_ACERO_STATIC_LINK_LIBS arrow_static arrow_compute_static) +list(APPEND ARROW_ACERO_SHARED_LINK_LIBS arrow_shared arrow_compute_shared) add_arrow_lib(arrow_acero CMAKE_PACKAGE_NAME diff --git a/cpp/src/arrow/acero/arrow-acero.pc.in b/cpp/src/arrow/acero/arrow-acero.pc.in index ddddd52c4dd..94249cd78bd 100644 --- a/cpp/src/arrow/acero/arrow-acero.pc.in +++ b/cpp/src/arrow/acero/arrow-acero.pc.in @@ -22,7 +22,7 @@ libdir=@ARROW_PKG_CONFIG_LIBDIR@ Name: Apache Arrow Acero Engine Description: Apache Arrow's Acero Engine. Version: @ARROW_VERSION@ -Requires: arrow +Requires: @ARROW_ACERO_PKG_CONFIG_REQUIRES@ Libs: -L${libdir} -larrow_acero Cflags:@ARROW_ACERO_PC_CFLAGS@ Cflags.private:@ARROW_ACERO_PC_CFLAGS_PRIVATE@ diff --git a/cpp/src/arrow/arrow-compute.pc.in b/cpp/src/arrow/arrow-compute.pc.in new file mode 100644 index 00000000000..2da0986d612 --- /dev/null +++ b/cpp/src/arrow/arrow-compute.pc.in @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +prefix=@CMAKE_INSTALL_PREFIX@ +includedir=@ARROW_PKG_CONFIG_INCLUDEDIR@ +libdir=@ARROW_PKG_CONFIG_LIBDIR@ + +Name: Apache Arrow Compute Kernels +Description: Apache Arrow's Compute Kernels. +Version: @ARROW_VERSION@ +Requires: arrow +Libs: -L${libdir} -larrow_compute +Cflags:@ARROW_COMPUTE_PC_CFLAGS@ +Cflags.private:@ARROW_COMPUTE_PC_CFLAGS_PRIVATE@ diff --git a/cpp/src/arrow/c/CMakeLists.txt b/cpp/src/arrow/c/CMakeLists.txt index 81a81cd3f11..a7f722aacc9 100644 --- a/cpp/src/arrow/c/CMakeLists.txt +++ b/cpp/src/arrow/c/CMakeLists.txt @@ -15,7 +15,28 @@ # specific language governing permissions and limitations # under the License. -add_arrow_test(bridge_test PREFIX "arrow-c") +# TODO(GH-37221): Remove compute dependency for REE requirements on bridge_test +set(ARROW_TEST_LINK_LIBS "") + +if(ARROW_TEST_LINKAGE STREQUAL "static") + list(APPEND ARROW_TEST_LINK_LIBS ${ARROW_TEST_STATIC_LINK_LIBS}) +else() + list(APPEND ARROW_TEST_LINK_LIBS ${ARROW_TEST_SHARED_LINK_LIBS}) +endif() + +if(ARROW_COMPUTE) + if(ARROW_TEST_LINKAGE STREQUAL "static") + list(APPEND ARROW_TEST_LINK_LIBS arrow_compute_static arrow_compute_testing) + else() + list(APPEND ARROW_TEST_LINK_LIBS arrow_compute_shared arrow_compute_testing) + endif() +endif() + +add_arrow_test(bridge_test + PREFIX + "arrow-c" + STATIC_LINK_LIBS + ${ARROW_TEST_LINK_LIBS}) add_arrow_test(dlpack_test) add_arrow_benchmark(bridge_benchmark) diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index 6deb2cbad8c..4255f2971c1 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -27,13 +27,30 @@ endif() # # Unit tests # +if(ARROW_TEST_LINKAGE STREQUAL "static") + set(ARROW_COMPUTE_TEST_LINK_LIBS arrow_compute_static ${ARROW_TEST_STATIC_LINK_LIBS}) +else() + set(ARROW_COMPUTE_TEST_LINK_LIBS arrow_compute_shared ${ARROW_TEST_SHARED_LINK_LIBS}) +endif() -# Define arrow_compute_testing object library for common test files +# Define arrow_compute_core_testing object library for common test files requiring +# only core compute. No extra kernels are required. if(ARROW_TESTING) - add_library(arrow_compute_testing OBJECT test_util_internal.cc) + add_library(arrow_compute_core_testing OBJECT test_util_internal.cc) # Even though this is still just an object library we still need to "link" our # dependencies so that include paths are configured correctly - target_link_libraries(arrow_compute_testing PUBLIC ${ARROW_GTEST_GMOCK}) + target_link_libraries(arrow_compute_core_testing PUBLIC ${ARROW_GTEST_GMOCK}) +endif() + +# Define arrow_compute_testing object library for test files requiring extra kernels. +if(ARROW_TESTING AND ARROW_COMPUTE) + set(ARROW_COMPUTE_TESTING_SRCS test_env.cc) + add_library(arrow_compute_testing OBJECT ${ARROW_COMPUTE_TESTING_SRCS}) + # Even though this is still just an object library we still need to "link" + # arrow_compute_core_testing so that is also included correctly + target_link_libraries(arrow_compute_testing + PUBLIC $ + PUBLIC ${ARROW_GTEST_GTEST_MAIN}) endif() set(ARROW_COMPUTE_TEST_PREFIX "arrow-compute") @@ -86,6 +103,8 @@ function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME) ${PREFIX} LABELS ${LABELS} + STATIC_LINK_LIBS + ${ARROW_COMPUTE_TEST_LINK_LIBS} ${ARG_UNPARSED_ARGUMENTS}) endfunction() @@ -97,7 +116,7 @@ add_arrow_test(internals_test kernel_test.cc registry_test.cc EXTRA_LINK_LIBS - arrow_compute_testing) + arrow_compute_core_testing) add_arrow_compute_test(expression_test SOURCES diff --git a/cpp/src/arrow/compute/api.h b/cpp/src/arrow/compute/api.h index b701d992869..343e30643cf 100644 --- a/cpp/src/arrow/compute/api.h +++ b/cpp/src/arrow/compute/api.h @@ -34,6 +34,7 @@ #include "arrow/compute/cast.h" // IWYU pragma: export #include "arrow/compute/function.h" // IWYU pragma: export #include "arrow/compute/function_options.h" // IWYU pragma: export +#include "arrow/compute/initialize.h" // IWYU pragma: export #include "arrow/compute/kernel.h" // IWYU pragma: export #include "arrow/compute/registry.h" // IWYU pragma: export #include "arrow/datum.h" // IWYU pragma: export diff --git a/cpp/src/arrow/compute/initialize.cc b/cpp/src/arrow/compute/initialize.cc new file mode 100644 index 00000000000..d126ac951ff --- /dev/null +++ b/cpp/src/arrow/compute/initialize.cc @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "arrow/compute/initialize.h" + +#include "arrow/compute/registry_internal.h" +#include "arrow/compute/type_fwd.h" +#include "arrow/status.h" + +namespace arrow::compute { +namespace { + +Status RegisterComputeKernels() { + auto registry = GetFunctionRegistry(); + + // Register additional kernels on libarrow_compute + // Scalar functions + internal::RegisterScalarArithmetic(registry); + internal::RegisterScalarBoolean(registry); + internal::RegisterScalarComparison(registry); + internal::RegisterScalarIfElse(registry); + internal::RegisterScalarNested(registry); + internal::RegisterScalarRandom(registry); // Nullary + internal::RegisterScalarRoundArithmetic(registry); + internal::RegisterScalarSetLookup(registry); + internal::RegisterScalarStringAscii(registry); + internal::RegisterScalarStringUtf8(registry); + internal::RegisterScalarTemporalBinary(registry); + internal::RegisterScalarTemporalUnary(registry); + internal::RegisterScalarValidity(registry); + + // Vector functions + internal::RegisterVectorArraySort(registry); + internal::RegisterVectorCumulativeSum(registry); + internal::RegisterVectorNested(registry); + internal::RegisterVectorRank(registry); + internal::RegisterVectorReplace(registry); + internal::RegisterVectorSelectK(registry); + internal::RegisterVectorSort(registry); + internal::RegisterVectorRunEndEncode(registry); + internal::RegisterVectorRunEndDecode(registry); + internal::RegisterVectorPairwise(registry); + internal::RegisterVectorStatistics(registry); + internal::RegisterVectorSwizzle(registry); + + // Aggregate functions + internal::RegisterHashAggregateBasic(registry); + internal::RegisterHashAggregateNumeric(registry); + internal::RegisterHashAggregatePivot(registry); + internal::RegisterScalarAggregateBasic(registry); + internal::RegisterScalarAggregateMode(registry); + internal::RegisterScalarAggregatePivot(registry); + internal::RegisterScalarAggregateQuantile(registry); + internal::RegisterScalarAggregateTDigest(registry); + internal::RegisterScalarAggregateVariance(registry); + + return Status::OK(); +} + +} // namespace + +Status Initialize() { + static auto st = RegisterComputeKernels(); + return st; +} + +} // namespace arrow::compute diff --git a/cpp/src/arrow/compute/initialize.h b/cpp/src/arrow/compute/initialize.h new file mode 100644 index 00000000000..db5e231325b --- /dev/null +++ b/cpp/src/arrow/compute/initialize.h @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/compute/visibility.h" +#include "arrow/status.h" + +namespace arrow::compute { + +/// \brief Initialize the compute module. +/// +/// Register the compute kernel functions to be available on the +/// global FunctionRegistry. +/// This function will only be available if ARROW_COMPUTE is enabled. +ARROW_COMPUTE_EXPORT Status Initialize(); + +} // namespace arrow::compute diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt index 81b7adeb4aa..929cca8f5a4 100644 --- a/cpp/src/arrow/compute/kernels/CMakeLists.txt +++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +arrow_install_all_headers("arrow/compute/kernels") + # ---------------------------------------------------------------------- # Tests that don't require the full kernel library @@ -32,7 +34,7 @@ add_arrow_test(scalar_cast_test scalar_cast_test.cc EXTRA_LINK_LIBS arrow_compute_kernels_testing - arrow_compute_testing) + arrow_compute_core_testing) # ---------------------------------------------------------------------- # Scalar kernels diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc index 68b1ac7c03c..ee2c615bbfb 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc @@ -18,6 +18,7 @@ #include "arrow/compute/api_aggregate.h" #include "arrow/compute/kernels/aggregate_basic_internal.h" #include "arrow/compute/kernels/aggregate_internal.h" +#include "arrow/compute/kernels/codegen_internal.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/compute/kernels/util_internal.h" #include "arrow/util/cpu_info.h" diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.h b/cpp/src/arrow/compute/kernels/chunked_internal.h index 5bc8233016f..330bd185f25 100644 --- a/cpp/src/arrow/compute/kernels/chunked_internal.h +++ b/cpp/src/arrow/compute/kernels/chunked_internal.h @@ -27,6 +27,7 @@ #include "arrow/chunk_resolver.h" #include "arrow/compute/kernels/codegen_internal.h" #include "arrow/util/span.h" +#include "arrow/util/visibility.h" namespace arrow::compute::internal { @@ -120,11 +121,11 @@ class ChunkedArrayResolver { } }; -std::vector GetArrayPointers(const ArrayVector& arrays); +ARROW_EXPORT std::vector GetArrayPointers(const ArrayVector& arrays); // A class that turns logical (linear) indices into physical (chunked) indices, // and vice-versa. -class ChunkedIndexMapper { +class ARROW_EXPORT ChunkedIndexMapper { public: ChunkedIndexMapper(const std::vector& chunks, uint64_t* indices_begin, uint64_t* indices_end) diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index 94677de9440..289ba25f0b7 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -474,10 +474,12 @@ static void VisitTwoArrayValuesInline(const ArraySpan& arr0, const ArraySpan& ar // ---------------------------------------------------------------------- // Reusable type resolvers -Result FirstType(KernelContext*, const std::vector& types); -Result LastType(KernelContext*, const std::vector& types); -Result ListValuesType(KernelContext* ctx, - const std::vector& types); +ARROW_EXPORT Result FirstType(KernelContext*, + const std::vector& types); +ARROW_EXPORT Result LastType(KernelContext*, + const std::vector& types); +ARROW_EXPORT Result ListValuesType(KernelContext* ctx, + const std::vector& types); // ---------------------------------------------------------------------- // Helpers for iterating over common DataType instances for adding kernels to diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc index 18a5590b2e3..0e3e359bde1 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc @@ -26,7 +26,9 @@ #include "arrow/array/concatenate.h" #include "arrow/compute/api_aggregate.h" #include "arrow/compute/api_vector.h" +#include "arrow/compute/kernel.h" #include "arrow/compute/kernels/aggregate_internal.h" +#include "arrow/compute/kernels/codegen_internal.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/compute/kernels/hash_aggregate_internal.h" #include "arrow/compute/kernels/util_internal.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index ed4f0c3c8ea..2864234f8a5 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -21,6 +21,7 @@ #include "arrow/builder.h" #include "arrow/compute/api_scalar.h" +#include "arrow/compute/kernels/codegen_internal.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/compute/kernels/temporal_internal.h" #include "arrow/util/checked_cast.h" diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc index bd8cbdb0430..c969f330b70 100644 --- a/cpp/src/arrow/compute/kernels/vector_hash.cc +++ b/cpp/src/arrow/compute/kernels/vector_hash.cc @@ -29,6 +29,7 @@ #include "arrow/buffer.h" #include "arrow/compute/api_vector.h" #include "arrow/compute/cast.h" +#include "arrow/compute/kernels/codegen_internal.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/result.h" #include "arrow/util/hashing.h" diff --git a/cpp/src/arrow/compute/kernels/vector_replace.cc b/cpp/src/arrow/compute/kernels/vector_replace.cc index 7f6713f74c1..945f48a64ca 100644 --- a/cpp/src/arrow/compute/kernels/vector_replace.cc +++ b/cpp/src/arrow/compute/kernels/vector_replace.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/compute/api_scalar.h" +#include "arrow/compute/kernels/codegen_internal.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/compute/kernels/copy_data_internal.h" #include "arrow/compute/kernels/util_internal.h" diff --git a/cpp/src/arrow/compute/key_hash_internal.h b/cpp/src/arrow/compute/key_hash_internal.h index 769f3b2145e..d141603ce0f 100644 --- a/cpp/src/arrow/compute/key_hash_internal.h +++ b/cpp/src/arrow/compute/key_hash_internal.h @@ -21,6 +21,7 @@ #include "arrow/compute/light_array_internal.h" #include "arrow/compute/util.h" +#include "arrow/compute/visibility.h" #include "arrow/util/simd.h" namespace arrow { @@ -34,7 +35,7 @@ enum class BloomFilterBuildStrategy; // Implementations are based on xxh3 32-bit algorithm description from: // https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md // -class ARROW_EXPORT Hashing32 { +class ARROW_COMPUTE_EXPORT Hashing32 { friend class TestVectorHash; template friend void TestBloomLargeHashHelper(int64_t, int64_t, const std::vector&, @@ -157,7 +158,7 @@ class ARROW_EXPORT Hashing32 { #endif }; -class ARROW_EXPORT Hashing64 { +class ARROW_COMPUTE_EXPORT Hashing64 { friend class TestVectorHash; template friend void TestBloomLargeHashHelper(int64_t, int64_t, const std::vector&, diff --git a/cpp/src/arrow/compute/key_map_internal.h b/cpp/src/arrow/compute/key_map_internal.h index c558ef5c2a6..27583e82ade 100644 --- a/cpp/src/arrow/compute/key_map_internal.h +++ b/cpp/src/arrow/compute/key_map_internal.h @@ -22,6 +22,7 @@ #include "arrow/compute/util.h" #include "arrow/compute/util_internal.h" +#include "arrow/compute/visibility.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" @@ -37,7 +38,7 @@ namespace compute { // slots, stamps) and operations provided by this class is given in the document: // arrow/acero/doc/key_map.md. // -class ARROW_EXPORT SwissTable { +class ARROW_COMPUTE_EXPORT SwissTable { friend class SwissTableMerge; public: diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h index cf7b95cbe74..ecd7e758ecd 100644 --- a/cpp/src/arrow/compute/light_array_internal.h +++ b/cpp/src/arrow/compute/light_array_internal.h @@ -23,6 +23,7 @@ #include "arrow/compute/exec.h" #include "arrow/compute/util.h" #include "arrow/compute/util_internal.h" +#include "arrow/compute/visibility.h" #include "arrow/type.h" #include "arrow/util/cpu_info.h" #include "arrow/util/logging.h" @@ -53,7 +54,7 @@ struct LightContext { /// and no children. /// /// This metadata object is a zero-allocation analogue of arrow::DataType -struct ARROW_EXPORT KeyColumnMetadata { +struct ARROW_COMPUTE_EXPORT KeyColumnMetadata { KeyColumnMetadata() = default; KeyColumnMetadata(bool is_fixed_length_in, uint32_t fixed_length_in, bool is_null_type_in = false) @@ -81,7 +82,7 @@ struct ARROW_EXPORT KeyColumnMetadata { /// A "key" column is a non-nested, non-union column \see KeyColumnMetadata /// /// This metadata object is a zero-allocation analogue of arrow::ArrayData -class ARROW_EXPORT KeyColumnArray { +class ARROW_COMPUTE_EXPORT KeyColumnArray { public: /// \brief Create an uninitialized KeyColumnArray KeyColumnArray() = default; @@ -218,7 +219,7 @@ class ARROW_EXPORT KeyColumnArray { /// /// This should only be called on "key" columns. Calling this with /// a non-key column will return Status::TypeError. -ARROW_EXPORT Result ColumnMetadataFromDataType( +ARROW_COMPUTE_EXPORT Result ColumnMetadataFromDataType( const std::shared_ptr& type); /// \brief Create KeyColumnArray from ArrayData @@ -228,7 +229,7 @@ ARROW_EXPORT Result ColumnMetadataFromDataType( /// /// The caller should ensure this is only called on "key" columns. /// \see ColumnMetadataFromDataType for details -ARROW_EXPORT Result ColumnArrayFromArrayData( +ARROW_COMPUTE_EXPORT Result ColumnArrayFromArrayData( const std::shared_ptr& array_data, int64_t start_row, int64_t num_rows); /// \brief Create KeyColumnArray from ArrayData and KeyColumnMetadata @@ -238,7 +239,7 @@ ARROW_EXPORT Result ColumnArrayFromArrayData( /// /// The caller should ensure this is only called on "key" columns. /// \see ColumnMetadataFromDataType for details -ARROW_EXPORT KeyColumnArray ColumnArrayFromArrayDataAndMetadata( +ARROW_COMPUTE_EXPORT KeyColumnArray ColumnArrayFromArrayDataAndMetadata( const std::shared_ptr& array_data, const KeyColumnMetadata& metadata, int64_t start_row, int64_t num_rows); @@ -248,7 +249,7 @@ ARROW_EXPORT KeyColumnArray ColumnArrayFromArrayDataAndMetadata( /// /// All columns in `batch` must be eligible "key" columns and have an array shape /// \see ColumnMetadataFromDataType for more details -ARROW_EXPORT Status ColumnMetadatasFromExecBatch( +ARROW_COMPUTE_EXPORT Status ColumnMetadatasFromExecBatch( const ExecBatch& batch, std::vector* column_metadatas); /// \brief Create KeyColumnArray instances from a slice of an ExecBatch @@ -257,9 +258,9 @@ ARROW_EXPORT Status ColumnMetadatasFromExecBatch( /// /// All columns in `batch` must be eligible "key" columns and have an array shape /// \see ColumnArrayFromArrayData for more details -ARROW_EXPORT Status ColumnArraysFromExecBatch(const ExecBatch& batch, int64_t start_row, - int64_t num_rows, - std::vector* column_arrays); +ARROW_COMPUTE_EXPORT Status +ColumnArraysFromExecBatch(const ExecBatch& batch, int64_t start_row, int64_t num_rows, + std::vector* column_arrays); /// \brief Create KeyColumnArray instances from an ExecBatch /// @@ -267,8 +268,8 @@ ARROW_EXPORT Status ColumnArraysFromExecBatch(const ExecBatch& batch, int64_t st /// /// All columns in `batch` must be eligible "key" columns and have an array shape /// \see ColumnArrayFromArrayData for more details -ARROW_EXPORT Status ColumnArraysFromExecBatch(const ExecBatch& batch, - std::vector* column_arrays); +ARROW_COMPUTE_EXPORT Status ColumnArraysFromExecBatch( + const ExecBatch& batch, std::vector* column_arrays); /// A lightweight resizable array for "key" columns /// @@ -276,7 +277,7 @@ ARROW_EXPORT Status ColumnArraysFromExecBatch(const ExecBatch& batch, /// /// Resizing is handled by arrow::ResizableBuffer and a doubling approach is /// used so that resizes will always grow up to the next power of 2 -class ARROW_EXPORT ResizableArrayData { +class ARROW_COMPUTE_EXPORT ResizableArrayData { public: /// \brief Create an uninitialized instance /// @@ -372,7 +373,7 @@ class ARROW_EXPORT ResizableArrayData { /// \brief A builder to concatenate batches of data into a larger batch /// /// Will only store num_rows_max() rows -class ARROW_EXPORT ExecBatchBuilder { +class ARROW_COMPUTE_EXPORT ExecBatchBuilder { public: /// \brief Add rows from `source` into `target` column /// diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc index b4f1c0f2f97..37e9d6c930a 100644 --- a/cpp/src/arrow/compute/registry.cc +++ b/cpp/src/arrow/compute/registry.cc @@ -292,50 +292,6 @@ static std::unique_ptr CreateBuiltInRegistry() { RegisterVectorOptions(registry.get()); RegisterAggregateOptions(registry.get()); -#ifdef ARROW_COMPUTE - // Register additional kernels - - // Scalar functions - RegisterScalarArithmetic(registry.get()); - RegisterScalarBoolean(registry.get()); - RegisterScalarComparison(registry.get()); - RegisterScalarIfElse(registry.get()); - RegisterScalarNested(registry.get()); - RegisterScalarRandom(registry.get()); // Nullary - RegisterScalarRoundArithmetic(registry.get()); - RegisterScalarSetLookup(registry.get()); - RegisterScalarStringAscii(registry.get()); - RegisterScalarStringUtf8(registry.get()); - RegisterScalarTemporalBinary(registry.get()); - RegisterScalarTemporalUnary(registry.get()); - RegisterScalarValidity(registry.get()); - - // Vector functions - RegisterVectorArraySort(registry.get()); - RegisterVectorCumulativeSum(registry.get()); - RegisterVectorNested(registry.get()); - RegisterVectorRank(registry.get()); - RegisterVectorReplace(registry.get()); - RegisterVectorSelectK(registry.get()); - RegisterVectorSort(registry.get()); - RegisterVectorRunEndEncode(registry.get()); - RegisterVectorRunEndDecode(registry.get()); - RegisterVectorPairwise(registry.get()); - RegisterVectorStatistics(registry.get()); - RegisterVectorSwizzle(registry.get()); - - // Aggregate functions - RegisterHashAggregateBasic(registry.get()); - RegisterHashAggregateNumeric(registry.get()); - RegisterHashAggregatePivot(registry.get()); - RegisterScalarAggregateBasic(registry.get()); - RegisterScalarAggregateMode(registry.get()); - RegisterScalarAggregatePivot(registry.get()); - RegisterScalarAggregateQuantile(registry.get()); - RegisterScalarAggregateTDigest(registry.get()); - RegisterScalarAggregateVariance(registry.get()); -#endif - return registry; } diff --git a/cpp/src/arrow/compute/row/CMakeLists.txt b/cpp/src/arrow/compute/row/CMakeLists.txt index 747fd0a92d9..542dc314806 100644 --- a/cpp/src/arrow/compute/row/CMakeLists.txt +++ b/cpp/src/arrow/compute/row/CMakeLists.txt @@ -20,6 +20,11 @@ arrow_install_all_headers("arrow/compute/row") -if(ARROW_COMPUTE) +if(ARROW_BUILD_BENCHMARKS AND ARROW_COMPUTE) add_arrow_benchmark(grouper_benchmark PREFIX "arrow-compute") + if(ARROW_BUILD_STATIC) + target_link_libraries(arrow-compute-grouper-benchmark PUBLIC arrow_compute_static) + else() + target_link_libraries(arrow-compute-grouper-benchmark PUBLIC arrow_compute_shared) + endif() endif() diff --git a/cpp/src/arrow/compute/row/compare_internal.h b/cpp/src/arrow/compute/row/compare_internal.h index 29d7f859e59..264ef69b39f 100644 --- a/cpp/src/arrow/compute/row/compare_internal.h +++ b/cpp/src/arrow/compute/row/compare_internal.h @@ -23,6 +23,7 @@ #include "arrow/compute/row/encode_internal.h" #include "arrow/compute/row/row_internal.h" #include "arrow/compute/util.h" +#include "arrow/compute/visibility.h" #include "arrow/memory_pool.h" #include "arrow/result.h" #include "arrow/status.h" @@ -30,7 +31,7 @@ namespace arrow { namespace compute { -class ARROW_EXPORT KeyCompare { +class ARROW_COMPUTE_EXPORT KeyCompare { public: // Clarify the max temp stack usage for CompareColumnsToRows, which might be necessary // for the caller to be aware of (possibly at compile time) to reserve enough stack size diff --git a/cpp/src/arrow/compute/row/encode_internal.h b/cpp/src/arrow/compute/row/encode_internal.h index 5ad82e0c8e7..6bfb87e6f84 100644 --- a/cpp/src/arrow/compute/row/encode_internal.h +++ b/cpp/src/arrow/compute/row/encode_internal.h @@ -26,6 +26,7 @@ #include "arrow/compute/light_array_internal.h" #include "arrow/compute/row/row_internal.h" #include "arrow/compute/util.h" +#include "arrow/compute/visibility.h" #include "arrow/memory_pool.h" #include "arrow/result.h" #include "arrow/status.h" @@ -44,7 +45,7 @@ namespace compute { /// be accessed together, as in the case of hash table key. /// /// Does not support nested types -class ARROW_EXPORT RowTableEncoder { +class ARROW_COMPUTE_EXPORT RowTableEncoder { public: void Init(const std::vector& cols, int row_alignment, int string_alignment); diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h index 7554e5ef159..9424559385b 100644 --- a/cpp/src/arrow/compute/row/grouper.h +++ b/cpp/src/arrow/compute/row/grouper.h @@ -21,6 +21,7 @@ #include #include "arrow/compute/kernel.h" +#include "arrow/compute/visibility.h" #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/util/visibility.h" @@ -36,7 +37,7 @@ namespace compute { /// same segment key within a given batch. When a segment group span cross batches, it /// will have multiple segments. A segment never spans cross batches. The segment data /// structure only makes sense when used along with a exec batch. -struct ARROW_EXPORT Segment { +struct ARROW_COMPUTE_EXPORT Segment { /// \brief the offset into the batch where the segment starts int64_t offset; /// \brief the length of the segment @@ -74,7 +75,7 @@ inline bool operator!=(const Segment& segment1, const Segment& segment2) { /// /// If the next call to the segmenter starts with `A A` then that segment would set the /// "extends" flag, which indicates whether the segment continues the last open batch. -class ARROW_EXPORT RowSegmenter { +class ARROW_COMPUTE_EXPORT RowSegmenter { public: virtual ~RowSegmenter() = default; @@ -101,7 +102,7 @@ class ARROW_EXPORT RowSegmenter { }; /// Consumes batches of keys and yields batches of the group ids. -class ARROW_EXPORT Grouper { +class ARROW_COMPUTE_EXPORT Grouper { public: virtual ~Grouper() = default; diff --git a/cpp/src/arrow/compute/row/grouper_internal.h b/cpp/src/arrow/compute/row/grouper_internal.h index eb3dfe8ba16..bce9ea1d3d5 100644 --- a/cpp/src/arrow/compute/row/grouper_internal.h +++ b/cpp/src/arrow/compute/row/grouper_internal.h @@ -20,7 +20,7 @@ namespace arrow { namespace compute { -ARROW_EXPORT Result> MakeAnyKeysSegmenter( +ARROW_COMPUTE_EXPORT Result> MakeAnyKeysSegmenter( const std::vector& key_types, ExecContext* ctx); } // namespace compute diff --git a/cpp/src/arrow/compute/row/row_encoder_internal.h b/cpp/src/arrow/compute/row/row_encoder_internal.h index 2cb47d4a600..9337e78bf8a 100644 --- a/cpp/src/arrow/compute/row/row_encoder_internal.h +++ b/cpp/src/arrow/compute/row/row_encoder_internal.h @@ -20,6 +20,7 @@ #include #include "arrow/compute/kernels/codegen_internal.h" +#include "arrow/compute/visibility.h" #include "arrow/visit_data_inline.h" namespace arrow { @@ -29,7 +30,7 @@ using internal::checked_cast; namespace compute { namespace internal { -struct ARROW_EXPORT KeyEncoder { +struct ARROW_COMPUTE_EXPORT KeyEncoder { // the first byte of an encoded key is used to indicate nullity static constexpr bool kExtraByteForNull = true; @@ -85,7 +86,7 @@ struct ARROW_EXPORT KeyEncoder { } }; -struct ARROW_EXPORT BooleanKeyEncoder : KeyEncoder { +struct ARROW_COMPUTE_EXPORT BooleanKeyEncoder : KeyEncoder { static constexpr int kByteWidth = 1; void AddLength(const ExecValue& data, int64_t batch_length, int32_t* lengths) override; @@ -101,7 +102,7 @@ struct ARROW_EXPORT BooleanKeyEncoder : KeyEncoder { MemoryPool* pool) override; }; -struct ARROW_EXPORT FixedWidthKeyEncoder : KeyEncoder { +struct ARROW_COMPUTE_EXPORT FixedWidthKeyEncoder : KeyEncoder { explicit FixedWidthKeyEncoder(std::shared_ptr type) : type_(std::move(type)), byte_width_(checked_cast(*type_).bit_width() / 8) {} @@ -122,7 +123,7 @@ struct ARROW_EXPORT FixedWidthKeyEncoder : KeyEncoder { const int byte_width_; }; -struct ARROW_EXPORT DictionaryKeyEncoder : FixedWidthKeyEncoder { +struct ARROW_COMPUTE_EXPORT DictionaryKeyEncoder : FixedWidthKeyEncoder { DictionaryKeyEncoder(std::shared_ptr type, MemoryPool* pool) : FixedWidthKeyEncoder(std::move(type)), pool_(pool) {} @@ -251,7 +252,7 @@ struct VarLengthKeyEncoder : KeyEncoder { std::shared_ptr type_; }; -struct ARROW_EXPORT NullKeyEncoder : KeyEncoder { +struct ARROW_COMPUTE_EXPORT NullKeyEncoder : KeyEncoder { void AddLength(const ExecValue&, int64_t batch_length, int32_t* lengths) override {} void AddLengthNull(int32_t* length) override {} @@ -331,7 +332,7 @@ struct ARROW_EXPORT NullKeyEncoder : KeyEncoder { /// # Row Encoding /// /// The row format is the concatenation of the encodings of each column. -class ARROW_EXPORT RowEncoder { +class ARROW_COMPUTE_EXPORT RowEncoder { public: static constexpr int kRowIdForNulls() { return -1; } diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h index 0919773a228..219fcbc51f4 100644 --- a/cpp/src/arrow/compute/row/row_internal.h +++ b/cpp/src/arrow/compute/row/row_internal.h @@ -21,6 +21,7 @@ #include "arrow/buffer.h" #include "arrow/compute/light_array_internal.h" +#include "arrow/compute/visibility.h" #include "arrow/memory_pool.h" #include "arrow/status.h" #include "arrow/util/logging.h" @@ -29,7 +30,7 @@ namespace arrow { namespace compute { /// Description of the data stored in a RowTable -struct ARROW_EXPORT RowTableMetadata { +struct ARROW_COMPUTE_EXPORT RowTableMetadata { using offset_type = int64_t; /// \brief True if there are no variable length columns in the table @@ -170,7 +171,7 @@ struct ARROW_EXPORT RowTableMetadata { /// Can store both fixed-size data types and variable-length data types /// /// The row table is not safe -class ARROW_EXPORT RowTableImpl { +class ARROW_COMPUTE_EXPORT RowTableImpl { public: using offset_type = RowTableMetadata::offset_type; diff --git a/cpp/src/arrow/compute/test_env.cc b/cpp/src/arrow/compute/test_env.cc new file mode 100644 index 00000000000..530ef5fa24d --- /dev/null +++ b/cpp/src/arrow/compute/test_env.cc @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/compute/initialize.h" +#include "arrow/testing/gtest_util.h" + +namespace arrow::compute { + +namespace { + +class ComputeKernelEnvironment : public ::testing::Environment { + public: + // This must be done before using the compute kernels in order to + // register them to the FunctionRegistry. + ComputeKernelEnvironment() : ::testing::Environment() {} + + void SetUp() override { ASSERT_OK(arrow::compute::Initialize()); } +}; + +} // namespace + +// Initialize the compute module +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + +} // namespace arrow::compute diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 1aaff43e10e..ad541e182a4 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -26,6 +26,7 @@ #include "arrow/compute/expression.h" #include "arrow/compute/type_fwd.h" +#include "arrow/compute/visibility.h" #include "arrow/result.h" #include "arrow/util/cpu_info.h" #include "arrow/util/simd.h" @@ -66,49 +67,54 @@ class MiniBatch { namespace bit_util { -ARROW_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags, - const int num_bits, const uint8_t* bits, - int* num_indexes, uint16_t* indexes, - int bit_offset = 0); +ARROW_COMPUTE_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags, + const int num_bits, const uint8_t* bits, + int* num_indexes, uint16_t* indexes, + int bit_offset = 0); -ARROW_EXPORT void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, - const int num_bits, const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes, int bit_offset = 0); +ARROW_COMPUTE_EXPORT void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, + const int num_bits, const uint8_t* bits, + const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes, + int bit_offset = 0); // Input and output indexes may be pointing to the same data (in-place filtering). -ARROW_EXPORT void bits_split_indexes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, int* num_indexes_bit0, - uint16_t* indexes_bit0, uint16_t* indexes_bit1, - int bit_offset = 0); +ARROW_COMPUTE_EXPORT void bits_split_indexes(int64_t hardware_flags, const int num_bits, + const uint8_t* bits, int* num_indexes_bit0, + uint16_t* indexes_bit0, + uint16_t* indexes_bit1, int bit_offset = 0); // Bit 1 is replaced with byte 0xFF. -ARROW_EXPORT void bits_to_bytes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, uint8_t* bytes, int bit_offset = 0); +ARROW_COMPUTE_EXPORT void bits_to_bytes(int64_t hardware_flags, const int num_bits, + const uint8_t* bits, uint8_t* bytes, + int bit_offset = 0); // Return highest bit of each byte. -ARROW_EXPORT void bytes_to_bits(int64_t hardware_flags, const int num_bits, - const uint8_t* bytes, uint8_t* bits, int bit_offset = 0); +ARROW_COMPUTE_EXPORT void bytes_to_bits(int64_t hardware_flags, const int num_bits, + const uint8_t* bytes, uint8_t* bits, + int bit_offset = 0); -ARROW_EXPORT bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, - uint32_t num_bytes); +ARROW_COMPUTE_EXPORT bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, + uint32_t num_bytes); #if defined(ARROW_HAVE_RUNTIME_AVX2) && defined(ARROW_HAVE_RUNTIME_BMI2) // The functions below use BMI2 instructions, be careful before calling! namespace avx2 { -ARROW_EXPORT void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, - const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes); -ARROW_EXPORT void bits_to_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, int* num_indexes, - uint16_t* indexes, uint16_t base_index = 0); -ARROW_EXPORT void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, - uint8_t* bytes); -ARROW_EXPORT void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, - uint8_t* bits); -ARROW_EXPORT bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes); +ARROW_COMPUTE_EXPORT void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, + const uint8_t* bits, + const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes); +ARROW_COMPUTE_EXPORT void bits_to_indexes_avx2(int bit_to_search, const int num_bits, + const uint8_t* bits, int* num_indexes, + uint16_t* indexes, + uint16_t base_index = 0); +ARROW_COMPUTE_EXPORT void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, + uint8_t* bytes); +ARROW_COMPUTE_EXPORT void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, + uint8_t* bits); +ARROW_COMPUTE_EXPORT bool are_all_bytes_zero_avx2(const uint8_t* bytes, + uint32_t num_bytes); } // namespace avx2 #endif diff --git a/cpp/src/arrow/compute/util_internal.h b/cpp/src/arrow/compute/util_internal.h index 5e5b15a5ff6..301fd4939b4 100644 --- a/cpp/src/arrow/compute/util_internal.h +++ b/cpp/src/arrow/compute/util_internal.h @@ -17,6 +17,7 @@ #pragma once +#include "arrow/compute/visibility.h" #include "arrow/status.h" #include "arrow/type_fwd.h" #include "arrow/util/logging.h" @@ -34,7 +35,7 @@ void CheckAlignment(const void* ptr) { /// Temporary vectors should resemble allocating temporary variables on the stack /// but in the context of vectorized processing where we need to store a vector of /// temporaries instead of a single value. -class ARROW_EXPORT TempVectorStack { +class ARROW_COMPUTE_EXPORT TempVectorStack { template friend class TempVectorHolder; diff --git a/cpp/src/arrow/compute/visibility.h b/cpp/src/arrow/compute/visibility.h new file mode 100644 index 00000000000..ae994bd2333 --- /dev/null +++ b/cpp/src/arrow/compute/visibility.h @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#if defined(_WIN32) || defined(__CYGWIN__) +# if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4251) +# else +# pragma GCC diagnostic ignored "-Wattributes" +# endif + +# ifdef ARROW_COMPUTE_STATIC +# define ARROW_COMPUTE_EXPORT +# elif defined(ARROW_COMPUTE_EXPORTING) +# define ARROW_COMPUTE_EXPORT __declspec(dllexport) +# else +# define ARROW_COMPUTE_EXPORT __declspec(dllimport) +# endif + +# define ARROW_COMPUTE_NO_EXPORT + +# if defined(_MSC_VER) +# pragma warning(pop) +# endif + +#else // Not Windows +# ifndef ARROW_COMPUTE_EXPORT +# define ARROW_COMPUTE_EXPORT __attribute__((visibility("default"))) +# endif +# ifndef ARROW_COMPUTE_NO_EXPORT +# define ARROW_COMPUTE_NO_EXPORT __attribute__((visibility("hidden"))) +# endif +#endif diff --git a/cpp/src/arrow/dataset/CMakeLists.txt b/cpp/src/arrow/dataset/CMakeLists.txt index 34e26a4cb90..809bdfaae6c 100644 --- a/cpp/src/arrow/dataset/CMakeLists.txt +++ b/cpp/src/arrow/dataset/CMakeLists.txt @@ -40,8 +40,8 @@ set(ARROW_DATASET_SRCS scanner.cc scan_node.cc) -set(ARROW_DATASET_PKG_CONFIG_REQUIRES "arrow-acero") -set(ARROW_DATASET_REQUIRED_DEPENDENCIES Arrow ArrowAcero) +set(ARROW_DATASET_PKG_CONFIG_REQUIRES "arrow-acero arrow-compute") +set(ARROW_DATASET_REQUIRED_DEPENDENCIES Arrow ArrowCompute ArrowAcero) if(ARROW_PARQUET) string(APPEND ARROW_DATASET_PKG_CONFIG_REQUIRES " parquet") list(APPEND ARROW_DATASET_REQUIRED_DEPENDENCIES Parquet) diff --git a/cpp/src/arrow/engine/CMakeLists.txt b/cpp/src/arrow/engine/CMakeLists.txt index 94bee50089a..adf98087ad1 100644 --- a/cpp/src/arrow/engine/CMakeLists.txt +++ b/cpp/src/arrow/engine/CMakeLists.txt @@ -87,6 +87,7 @@ add_arrow_test(substrait_test substrait/test_util.cc EXTRA_LINK_LIBS ${ARROW_SUBSTRAIT_TEST_LINK_LIBS} + arrow_compute_testing PREFIX "arrow-substrait" LABELS diff --git a/cpp/src/arrow/flight/sql/CMakeLists.txt b/cpp/src/arrow/flight/sql/CMakeLists.txt index 6f34e6e3798..958fea40acf 100644 --- a/cpp/src/arrow/flight/sql/CMakeLists.txt +++ b/cpp/src/arrow/flight/sql/CMakeLists.txt @@ -119,6 +119,7 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_EXAMPLES) set(ARROW_FLIGHT_SQL_TEST_LIBS ${SQLite3_LIBRARIES}) set(ARROW_FLIGHT_SQL_ACERO_SRCS example/acero_server.cc) + set(ARROW_FLIGHT_SQL_EXTRA_LINK_LIBS "") if(ARROW_COMPUTE AND ARROW_PARQUET @@ -129,6 +130,7 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_EXAMPLES) else() list(APPEND ARROW_FLIGHT_SQL_TEST_LIBS arrow_substrait_shared) endif() + list(APPEND ARROW_FLIGHT_SQL_EXTRA_LINK_LIBS arrow_compute_testing) if(ARROW_BUILD_EXAMPLES) add_executable(acero-flight-sql-server ${ARROW_FLIGHT_SQL_ACERO_SRCS} @@ -146,6 +148,8 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_EXAMPLES) STATIC_LINK_LIBS ${ARROW_FLIGHT_SQL_TEST_LINK_LIBS} ${ARROW_FLIGHT_SQL_TEST_LIBS} + EXTRA_LINK_LIBS + ${ARROW_FLIGHT_SQL_EXTRA_LINK_LIBS} EXTRA_INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/../" LABELS diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index de86d2845b9..f68d2dcb619 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -292,6 +292,7 @@ std::ostream& operator<<(std::ostream& os, const TypeHolder& type); /// - if a `PhysicalType` alias exists in the concrete type class, return /// an instance of `PhysicalType`. /// - otherwise, return the input type itself. +ARROW_EXPORT std::shared_ptr GetPhysicalType(const std::shared_ptr& type); /// \brief Base class for all fixed-width data types diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index 21ffcf8d5ed..3dc67b066bb 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -67,6 +67,19 @@ Description: Apache Arrow is a data processing library for analysis . This package provides tools. +Package: libarrow-compute2100 +Section: libs +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow2100 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ library files for Compute support. + Package: libarrow-cuda2100 Section: libs Architecture: @CUDA_ARCHITECTURE@ @@ -88,7 +101,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow2100 (= ${binary:Version}) + libarrow-compute2100 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Acero module. @@ -161,6 +174,17 @@ Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files. +Package: libarrow-compute-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Depends: + ${misc:Depends}, + libarrow-compute2100 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides C++ header files for compute module. + Package: libarrow-cuda-dev Section: libdevel Architecture: @CUDA_ARCHITECTURE@ @@ -320,6 +344,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libglib2.0-dev, + libarrow-compute-dev (= ${binary:Version}), libarrow-acero-dev (= ${binary:Version}), libarrow-glib2100 (= ${binary:Version}), gir1.2-arrow-1.0 (= ${binary:Version}) diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-compute-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-compute-dev.install new file mode 100644 index 00000000000..44b63512be2 --- /dev/null +++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-compute-dev.install @@ -0,0 +1,4 @@ +usr/lib/*/cmake/ArrowCompute/ +usr/lib/*/libarrow_compute.a +usr/lib/*/libarrow_compute.so +usr/lib/*/pkgconfig/arrow-compute.pc diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-compute2100.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-compute2100.install new file mode 100644 index 00000000000..f014d075f75 --- /dev/null +++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-compute2100.install @@ -0,0 +1 @@ +usr/lib/*/libarrow_compute.so.* diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install index 9df014c54ca..802095804ab 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install +++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install @@ -3,7 +3,6 @@ usr/lib/*/cmake/Arrow/ usr/lib/*/libarrow.a usr/lib/*/libarrow.so usr/lib/*/libarrow_bundled_dependencies.a -usr/lib/*/pkgconfig/arrow-compute.pc usr/lib/*/pkgconfig/arrow-csv.pc usr/lib/*/pkgconfig/arrow-filesystem.pc usr/lib/*/pkgconfig/arrow-json.pc diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 8557071ee6c..47e8230a071 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -320,6 +320,8 @@ Libraries and header files for Apache Arrow C++. %{_datadir}/gdb/auto-load/ %{_includedir}/arrow/ %exclude %{_includedir}/arrow/acero/ +%exclude %{_includedir}/arrow/compute/kernels +%exclude %{_includedir}/arrow/compute/row %exclude %{_includedir}/arrow/dataset/ %if %{use_flight} %exclude %{_includedir}/arrow/flight/ @@ -328,17 +330,50 @@ Libraries and header files for Apache Arrow C++. %{_libdir}/libarrow.a %{_libdir}/libarrow.so %{_libdir}/libarrow_bundled_dependencies.a -%{_libdir}/pkgconfig/arrow-compute.pc %{_libdir}/pkgconfig/arrow-csv.pc %{_libdir}/pkgconfig/arrow-filesystem.pc %{_libdir}/pkgconfig/arrow-json.pc %{_libdir}/pkgconfig/arrow-orc.pc %{_libdir}/pkgconfig/arrow.pc +%package -n %{name}%{so_version}-compute-libs +Summary: C++ library for extra compute functions +License: Apache-2.0 +Requires: %{name}%{so_version}-libs = %{version}-%{release} + +%description -n %{name}%{so_version}-compute-libs +This package contains the libraries for Apache Arrow Compute. + +%files -n %{name}%{so_version}-compute-libs +%defattr(-,root,root,-) +%doc README.md +%license LICENSE.txt NOTICE.txt +%{_libdir}/libarrow_compute.so.* + +%package compute-devel +Summary: Libraries and header files for Apache Arrow Compute +License: Apache-2.0 +Requires: %{name}%{so_version}-compute-libs = %{version}-%{release} +Requires: %{name}-devel = %{version}-%{release} + +%description compute-devel +Libraries and header files for Apache Arrow Compute + +%files compute-devel +%defattr(-,root,root,-) +%doc README.md +%license LICENSE.txt NOTICE.txt +%{_includedir}/arrow/compute/kernels +%{_includedir}/arrow/compute/row +%{_libdir}/cmake/ArrowCompute/ +%{_libdir}/libarrow_compute.a +%{_libdir}/libarrow_compute.so +%{_libdir}/pkgconfig/arrow-compute.pc + %package -n %{name}%{so_version}-acero-libs Summary: C++ library to execute a query in streaming License: Apache-2.0 -Requires: %{name}%{so_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-compute-libs = %{version}-%{release} %description -n %{name}%{so_version}-acero-libs This package contains the libraries for Apache Arrow Acero. diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 204e0a62881..dc06f315d67 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -338,6 +338,9 @@ tasks: - libarrow-acero-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-acero{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - libarrow-acero{so_version}_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-compute-dev_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-compute{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-compute{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb @@ -425,6 +428,11 @@ tasks: - arrow-acero-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm {% if not is_rhel7_based %} - arrow[0-9]+-acero-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} + - arrow[0-9]+-compute-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - arrow-compute-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if not is_rhel7_based %} + - arrow[0-9]+-compute-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm {% endif %} - arrow[0-9]+-dataset-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-dataset-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 585845d8306..b25ece967c1 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -28,7 +28,8 @@ Compute Functions The generic Compute API ======================= -.. TODO: describe API and how to invoke compute functions +.. seealso:: + :doc:`Compute Functions API reference ` Functions and function registry ------------------------------- @@ -42,6 +43,17 @@ whether the inputs are integral or floating-point). Functions are stored in a global :class:`FunctionRegistry` where they can be looked up by name. +Compute Initialization +---------------------- + +The compute library requires a call to :func:`arrow::compute::Initialize` +in order to register the individual functions into the global :class:`FunctionRegistry`, +otherwise only the functions required for Arrow core functionality will be available. + +.. note:: + The set of functions required for Arrow core functionality are an implementation detail + of the library, and should not be considered stable. + Input shapes ------------ diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 048f01ab9f0..4138d2b282f 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -426,6 +426,19 @@ if(PYARROW_BUILD_ACERO) endif() endif() +# Currently PyArrow cannot be built without ARROW_COMPUTE +if(NOT ARROW_COMPUTE) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_COMPUTE=ON") +else() + message(STATUS "Building PyArrow with Compute") + find_package(ArrowCompute REQUIRED) + if(ARROW_BUILD_SHARED) + list(APPEND PYARROW_CPP_LINK_LIBS ArrowCompute::arrow_compute_shared) + else() + list(APPEND PYARROW_CPP_LINK_LIBS ArrowCompute::arrow_compute_static) + endif() +endif() + if(PYARROW_BUILD_PARQUET) message(STATUS "Building PyArrow with Parquet") if(NOT ARROW_PARQUET) @@ -643,12 +656,13 @@ get_filename_component(ARROW_INCLUDE_ARROW_DIR_REAL ${ARROW_INCLUDE_DIR}/arrow R install(DIRECTORY ${ARROW_INCLUDE_ARROW_DIR_REAL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) if(PYARROW_BUNDLE_ARROW_CPP) - # Arrow + # Arrow and Compute bundle_arrow_lib(${ARROW_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + bundle_arrow_lib(${ARROW_COMPUTE_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) if(MSVC) - # TODO(kszucs): locate msvcp140.dll in a portable fashion and bundle it bundle_arrow_import_lib(${ARROW_IMPORT_LIB}) + bundle_arrow_import_lib(${ARROW_COMPUTE_IMPORT_LIB}) endif() endif() diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index bf01f94a3e1..2ff126b8b39 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -40,6 +40,10 @@ except ImportError: import warnings +# Call to initialize the compute module (register kernels) on import +check_status(InitializeCompute()) + + __pas = None _substrait_msg = ( "The pyarrow installation is not built with support for Substrait." diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 2ccadbc6e46..7f01c59950d 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2228,6 +2228,8 @@ cdef extern from "arrow/util/thread_pool.h" namespace "arrow::internal" nogil: cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: + CStatus InitializeCompute " arrow::compute::Initialize"() + cdef cppclass CExecBatch "arrow::compute::ExecBatch": vector[CDatum] values int64_t length diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 901898e5b29..4ed612fc734 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -536,6 +536,10 @@ compute__GetFunctionNames <- function() { .Call(`_arrow_compute__GetFunctionNames`) } +compute__Initialize <- function() { + invisible(.Call(`_arrow_compute__Initialize`)) +} + RegisterScalarUDF <- function(name, func_sexp) { invisible(.Call(`_arrow_RegisterScalarUDF`, name, func_sexp)) } diff --git a/r/configure b/r/configure index e1f0bad3787..5ecd4a761b4 100755 --- a/r/configure +++ b/r/configure @@ -333,6 +333,13 @@ add_feature_flags () { # NOTE: parquet is assumed to have the same -L flag as arrow # so there is no need to add its location to PKG_DIRS fi + if arrow_built_with ARROW_COMPUTE; then + PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_COMPUTE" + PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-compute" + PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_compute $PKG_LIBS_FEATURES_WITHOUT_PC" + # NOTE: arrow_compute is assumed to have the same -L flag as arrow + # so there is no need to add its location to PKG_DIRS + fi if arrow_built_with ARROW_DATASET; then PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_DATASET" PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-dataset" diff --git a/r/configure.win b/r/configure.win index e0682917e9b..ae175f5622b 100755 --- a/r/configure.win +++ b/r/configure.win @@ -81,12 +81,13 @@ function configure_binaries() { # NOTE: If you make changes to the libraries below, you should also change # ci/scripts/r_windows_build.sh and ci/scripts/PKGBUILD - PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DPARQUET_STATIC -DARROW_DS_STATIC \ - -DARROW_ACERO_STATIC -DARROW_R_WITH_PARQUET -DARROW_R_WITH_ACERO \ + PKG_CFLAGS="-I${RWINLIB}/include -DARROW_STATIC -DARROW_COMPUTE_STATIC -DPARQUET_STATIC \ + -DARROW_DS_STATIC -DARROW_ACERO_STATIC -DARROW_R_WITH_PARQUET \ + -DARROW_R_WITH_COMPUTE -DARROW_R_WITH_ACERO \ -DARROW_R_WITH_DATASET -DARROW_R_WITH_JSON" PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) ' PKG_LIBS="$PKG_LIBS -L${RWINLIB}/lib"'$(R_ARCH)$(CRT) ' - PKG_LIBS="$PKG_LIBS -larrow_dataset -larrow_acero -lparquet -larrow -larrow_bundled_dependencies \ + PKG_LIBS="$PKG_LIBS -larrow_dataset -larrow_acero -lparquet -larrow_compute -larrow -larrow_bundled_dependencies \ -lutf8proc -lsnappy -lz -lzstd -llz4 -lbz2 ${BROTLI_LIBS} -lole32 \ ${MIMALLOC_LIBS} ${OPENSSL_LIBS}" @@ -160,6 +161,13 @@ add_feature_flags () { # NOTE: parquet is assumed to have the same -L flag as arrow # so there is no need to add its location to PKG_DIRS fi + if arrow_built_with ARROW_COMPUTE; then + PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_COMPUTE" + PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-compute" + PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_compute $PKG_LIBS_FEATURES_WITHOUT_PC" + # NOTE: arrow_compute is assumed to have the same -L flag as arrow + # so there is no need to add its location to PKG_DIRS + fi if arrow_built_with ARROW_DATASET; then PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_DATASET" PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-dataset" @@ -269,6 +277,11 @@ function configure_dev() { PKG_CONFIG_PACKAGES="$PKG_CONFIG_PACKAGES parquet" fi + if [ $(cmake_option ARROW_COMPUTE) -eq 1 ]; then + PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_COMPUTE" + PKG_CONFIG_PACKAGES="$PKG_CONFIG_PACKAGES arrow-compute" + fi + if [ $(cmake_option ARROW_ACERO) -eq 1 ]; then PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_ACERO" PKG_CONFIG_PACKAGES="$PKG_CONFIG_PACKAGES arrow-acero" diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R index 4f027a3d9dd..d211dc412c8 100644 --- a/r/data-raw/codegen.R +++ b/r/data-raw/codegen.R @@ -191,6 +191,7 @@ static const R_CallMethodDef CallEntries[] = { arrow::r::altrep::Init_Altrep_classes(dll); #endif + _arrow_compute__Initialize(); } \n' ) diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index e75d38a303f..c71d1c77305 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1350,6 +1350,14 @@ BEGIN_CPP11 END_CPP11 } // compute.cpp +void compute__Initialize(); +extern "C" SEXP _arrow_compute__Initialize(){ +BEGIN_CPP11 + compute__Initialize(); + return R_NilValue; +END_CPP11 +} +// compute.cpp void RegisterScalarUDF(std::string name, cpp11::list func_sexp); extern "C" SEXP _arrow_RegisterScalarUDF(SEXP name_sexp, SEXP func_sexp_sexp){ BEGIN_CPP11 @@ -5805,6 +5813,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, { "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, { "_arrow_compute__GetFunctionNames", (DL_FUNC) &_arrow_compute__GetFunctionNames, 0}, + { "_arrow_compute__Initialize", (DL_FUNC) &_arrow_compute__Initialize, 0}, { "_arrow_RegisterScalarUDF", (DL_FUNC) &_arrow_RegisterScalarUDF, 2}, { "_arrow_build_info", (DL_FUNC) &_arrow_build_info, 0}, { "_arrow_runtime_info", (DL_FUNC) &_arrow_runtime_info, 0}, @@ -6223,6 +6232,7 @@ extern "C" void R_init_arrow(DllInfo* dll){ arrow::r::altrep::Init_Altrep_classes(dll); #endif + _arrow_compute__Initialize(); } diff --git a/r/src/compute.cpp b/r/src/compute.cpp index bd97e30005c..0777ca8bc72 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -621,6 +621,12 @@ std::vector compute__GetFunctionNames() { return arrow::compute::GetFunctionRegistry()->GetFunctionNames(); } +// [[arrow::export]] +void compute__Initialize() { + auto status = arrow::compute::Initialize(); + StopIfNotOk(status); +} + class RScalarUDFKernelState : public arrow::compute::KernelState { public: RScalarUDFKernelState(cpp11::sexp exec_func, cpp11::sexp resolver) diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb index b56350ddac2..89a219bac32 100644 --- a/ruby/red-arrow/lib/arrow/loader.rb +++ b/ruby/red-arrow/lib/arrow/loader.rb @@ -31,6 +31,7 @@ def post_load(repository, namespace) require_extension_library gc_guard self.class.start_callback_dispatch_thread + @base_module.compute_initialize end def require_libraries