diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 065fb48ae74..1eb29d035a0 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -29,6 +29,8 @@ #include #include #include +#include +#include template typename ArrowType::c_type @@ -1514,6 +1516,174 @@ garrow_array_sort_to_indices(GArrowArray *array, } } +/** + * garrow_table_filter: + * @table: A #GArrowTable. + * @filter: The values indicates which values should be filtered out. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowTable filterd + * with a boolean selection filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 1.0.0 + */ +GArrowTable * +garrow_table_filter(GArrowTable *table, + GArrowBooleanArray *filter, + GError **error) +{ + auto arrow_table = garrow_table_get_raw(table); + auto arrow_filter = garrow_array_get_raw(GARROW_ARRAY(filter)); + auto memory_pool = arrow::default_memory_pool(); + arrow::compute::FunctionContext context(memory_pool); + std::shared_ptr arrow_filtered_table; + auto status = arrow::compute::Filter(&context, + *arrow_table, + *arrow_filter, + &arrow_filtered_table); + if (garrow_error_check(error, status, "[table][filter]")) { + return garrow_table_new_raw(&arrow_filtered_table); + } else { + return NULL; + } +} + +/** + * garrow_table_filter_chunked_array: + * @table: A #GArrowTable. + * @filter: The values indicates which values should be filtered out. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowTable filterd + * with a chunked array filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 1.0.0 + */ +GArrowTable * +garrow_table_filter_chunked_array(GArrowTable *table, + GArrowChunkedArray *filter, + GError **error) +{ + auto arrow_table = garrow_table_get_raw(table); + auto arrow_filter = garrow_chunked_array_get_raw(filter); + auto memory_pool = arrow::default_memory_pool(); + arrow::compute::FunctionContext context(memory_pool); + std::shared_ptr arrow_filtered_table; + auto status = arrow::compute::Filter(&context, + *arrow_table, + *arrow_filter, + &arrow_filtered_table); + if (garrow_error_check(error, status, "[table][filter-chunked-array]")) { + return garrow_table_new_raw(&arrow_filtered_table); + } else { + return NULL; + } +} + +/** + * garrow_chunked_array_filter: + * @chunked_array: A #GArrowChunkedArray. + * @filter: The values indicates which values should be filtered out. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowChunkedArray filterd + * with a boolean selection filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 1.0.0 + */ +GArrowChunkedArray * +garrow_chunked_array_filter(GArrowChunkedArray *chunked_array, + GArrowBooleanArray *filter, + GError **error) +{ + auto arrow_chunked_array = + garrow_chunked_array_get_raw(chunked_array); + auto arrow_filter = garrow_array_get_raw(GARROW_ARRAY(filter)); + auto memory_pool = arrow::default_memory_pool(); + arrow::compute::FunctionContext context(memory_pool); + std::shared_ptr arrow_filtered_chunked_array; + auto status = arrow::compute::Filter(&context, + *arrow_chunked_array, + *arrow_filter, + &arrow_filtered_chunked_array); + if (garrow_error_check(error, status, "[chunked-array][filter]")) { + return garrow_chunked_array_new_raw(&arrow_filtered_chunked_array); + } else { + return NULL; + } +} + +/** + * garrow_chunked_array_filter_chunked_array: + * @chunked_array: A #GArrowChunkedArray. + * @filter: The values indicates which values should be filtered out. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowChunkedArray filterd + * with a chunked array filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 1.0.0 + */ +GArrowChunkedArray * +garrow_chunked_array_filter_chunked_array(GArrowChunkedArray *chunked_array, + GArrowChunkedArray *filter, + GError **error) +{ + auto arrow_chunked_array = + garrow_chunked_array_get_raw(chunked_array); + auto arrow_filter = garrow_chunked_array_get_raw(filter); + auto memory_pool = arrow::default_memory_pool(); + arrow::compute::FunctionContext context(memory_pool); + std::shared_ptr arrow_filtered_chunked_array; + auto status = arrow::compute::Filter(&context, + *arrow_chunked_array, + *arrow_filter, + &arrow_filtered_chunked_array); + if (garrow_error_check(error, status, "[chunked-array][filter-chunked-array]")) { + return garrow_chunked_array_new_raw(&arrow_filtered_chunked_array); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_filter: + * @record_batch: A #GArrowRecordBatch. + * @filter: The values indicates which values should be filtered out. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): The #GArrowRecordBatch filterd + * with a boolean selection filter. Nulls in the filter will + * result in nulls in the output. + * + * Since: 1.0.0 + */ +GArrowRecordBatch * +garrow_record_batch_filter(GArrowRecordBatch *record_batch, + GArrowBooleanArray *filter, + GError **error) +{ + auto arrow_record_batch = + garrow_record_batch_get_raw(record_batch); + auto arrow_filter = garrow_array_get_raw(GARROW_ARRAY(filter)); + auto memory_pool = arrow::default_memory_pool(); + arrow::compute::FunctionContext context(memory_pool); + std::shared_ptr arrow_filtered_record_batch; + auto status = arrow::compute::Filter(&context, + *arrow_record_batch, + *arrow_filter, + &arrow_filtered_record_batch); + if (garrow_error_check(error, status, "[record-batch][filter]")) { + return garrow_record_batch_new_raw(&arrow_filtered_record_batch); + } else { + return NULL; + } +} + G_END_DECLS GArrowCastOptions * diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 762904511f5..4af1157a427 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -21,6 +21,8 @@ #include #include +#include +#include G_BEGIN_DECLS @@ -268,5 +270,30 @@ GARROW_AVAILABLE_IN_0_15 GArrowUInt64Array * garrow_array_sort_to_indices(GArrowArray *array, GError **error); +GARROW_AVAILABLE_IN_1_0 +GArrowTable * +garrow_table_filter(GArrowTable *table, + GArrowBooleanArray *filter, + GError **error); +GARROW_AVAILABLE_IN_1_0 +GArrowTable * +garrow_table_filter_chunked_array(GArrowTable *table, + GArrowChunkedArray *filter, + GError **error); +GARROW_AVAILABLE_IN_1_0 +GArrowChunkedArray * +garrow_chunked_array_filter(GArrowChunkedArray *chunked_array, + GArrowBooleanArray *filter, + GError **error); +GARROW_AVAILABLE_IN_1_0 +GArrowChunkedArray * +garrow_chunked_array_filter_chunked_array(GArrowChunkedArray *chunked_array, + GArrowChunkedArray *filter, + GError **error); +GARROW_AVAILABLE_IN_1_0 +GArrowRecordBatch * +garrow_record_batch_filter(GArrowRecordBatch *record_batch, + GArrowBooleanArray *filter, + GError **error); G_END_DECLS diff --git a/c_glib/test/test-filter.rb b/c_glib/test/test-filter.rb index e5c07cf87aa..b099847e1fd 100644 --- a/c_glib/test/test-filter.rb +++ b/c_glib/test/test-filter.rb @@ -18,16 +18,142 @@ class TestFilter < Test::Unit::TestCase include Helper::Buildable - def test_filter - filter = build_boolean_array([false, true, true, nil]) - assert_equal(build_int16_array([1, 0, nil]), - build_int16_array([0, 1, 0, 2]).filter(filter)) + sub_test_case("Array") do + def test_filter + filter = build_boolean_array([false, true, true, nil]) + assert_equal(build_int16_array([1, 0, nil]), + build_int16_array([0, 1, 0, 2]).filter(filter)) + end + + def test_invalid_array_length + filter = build_boolean_array([false, true, true, false]) + assert_raise(Arrow::Error::Invalid) do + build_int16_array([0, 1, 0]).filter(filter) + end + end end - def test_invalid_array_length - filter = build_boolean_array([false, true, true, false]) - assert_raise(Arrow::Error::Invalid) do - build_int16_array([0, 1, 0]).filter(filter) + sub_test_case("Table") do + def setup + fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + @schema = Arrow::Schema.new(fields) + arrays = [ + build_boolean_array([true, false, true]), + build_boolean_array([false, true, true]), + ] + @table = Arrow::Table.new(@schema, arrays) + end + + def test_filter + filter = build_boolean_array([false, true, nil]) + arrays = [ + build_boolean_array([false, nil]), + build_boolean_array([true, nil]), + ] + filtered_table = Arrow::Table.new(@schema, arrays) + assert_equal(filtered_table, + @table.filter(filter)) + end + + def test_filter_chunked_array + chunks = [ + build_boolean_array([false]), + build_boolean_array([true, nil]), + ] + filter = Arrow::ChunkedArray.new(chunks) + arrays = [ + build_boolean_array([false, nil]), + build_boolean_array([true, nil]), + ] + filtered_table = Arrow::Table.new(@schema, arrays) + assert_equal(filtered_table, + @table.filter_chunked_array(filter)) + end + + def test_invalid_array_length + filter = build_boolean_array([false, true, true, false]) + assert_raise(Arrow::Error::Invalid) do + @table.filter(filter) + end + end + end + + sub_test_case("ChunkedArray") do + def setup + chunks = [ + build_boolean_array([true, false]), + build_boolean_array([true]), + ] + @chunked_array = Arrow::ChunkedArray.new(chunks) + end + + def test_filter + filter = build_boolean_array([false, true, nil]) + chunks = [ + build_boolean_array([false]), + build_boolean_array([nil]), + ] + filtered_chunked_array = Arrow::ChunkedArray.new(chunks) + assert_equal(filtered_chunked_array, + @chunked_array.filter(filter)) + end + + def test_filter_chunked_array + chunks = [ + build_boolean_array([false]), + build_boolean_array([true, nil]), + ] + filter = Arrow::ChunkedArray.new(chunks) + filtered_chunks = [ + build_boolean_array([false]), + build_boolean_array([nil]), + ] + filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks) + assert_equal(filtered_chunked_array, + @chunked_array.filter_chunked_array(filter)) + end + + def test_invalid_array_length + filter = build_boolean_array([false, true, true, false]) + assert_raise(Arrow::Error::Invalid) do + @chunked_array.filter(filter) + end + end + end + + sub_test_case("RecordBatch") do + def setup + fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("valid", Arrow::BooleanDataType.new), + ] + @schema = Arrow::Schema.new(fields) + columns = [ + build_boolean_array([true, false, true]), + build_boolean_array([false, true, false]), + ] + @record_batch = Arrow::RecordBatch.new(@schema, 3, columns) + end + + def test_filter + filter = build_boolean_array([false, true, nil]) + columns = [ + build_boolean_array([false, nil]), + build_boolean_array([true, nil]), + ] + filtered_record_batch = Arrow::RecordBatch.new(@schema, 2, columns) + assert_equal(filtered_record_batch, + @record_batch.filter(filter)) + end + + def test_invalid_array_length + filter = build_boolean_array([false, true, true, false]) + assert_raise(Arrow::Error::Invalid) do + @record_batch.filter(filter) + end end end end