Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 170 additions & 0 deletions c_glib/arrow-glib/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
#include <arrow-glib/data-type.hpp>
#include <arrow-glib/enums.h>
#include <arrow-glib/error.hpp>
#include <arrow-glib/record-batch.hpp>
#include <arrow-glib/table.hpp>

template <typename ArrowType, typename GArrowArrayType>
typename ArrowType::c_type
Expand Down Expand Up @@ -1514,6 +1516,174 @@ garrow_array_sort_to_indices(GArrowArray *array,
}
}

/**
* garrow_table_filter:
* @table: A #GArrowTable.
* @filter: The values indicates which values should be filtered out.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (nullable) (transfer full): The #GArrowTable filterd
* with a boolean selection filter. Nulls in the filter will
* result in nulls in the output.
*
* Since: 1.0.0
*/
GArrowTable *
garrow_table_filter(GArrowTable *table,
GArrowBooleanArray *filter,
GError **error)
{
auto arrow_table = garrow_table_get_raw(table);
auto arrow_filter = garrow_array_get_raw(GARROW_ARRAY(filter));
auto memory_pool = arrow::default_memory_pool();
arrow::compute::FunctionContext context(memory_pool);
std::shared_ptr<arrow::Table> arrow_filtered_table;
auto status = arrow::compute::Filter(&context,
*arrow_table,
*arrow_filter,
&arrow_filtered_table);
if (garrow_error_check(error, status, "[table][filter]")) {
return garrow_table_new_raw(&arrow_filtered_table);
} else {
return NULL;
}
}

/**
* garrow_table_filter_chunked_array:
* @table: A #GArrowTable.
* @filter: The values indicates which values should be filtered out.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (nullable) (transfer full): The #GArrowTable filterd
* with a chunked array filter. Nulls in the filter will
* result in nulls in the output.
*
* Since: 1.0.0
*/
GArrowTable *
garrow_table_filter_chunked_array(GArrowTable *table,
GArrowChunkedArray *filter,
GError **error)
{
auto arrow_table = garrow_table_get_raw(table);
auto arrow_filter = garrow_chunked_array_get_raw(filter);
auto memory_pool = arrow::default_memory_pool();
arrow::compute::FunctionContext context(memory_pool);
std::shared_ptr<arrow::Table> arrow_filtered_table;
auto status = arrow::compute::Filter(&context,
*arrow_table,
*arrow_filter,
&arrow_filtered_table);
if (garrow_error_check(error, status, "[table][filter-chunked-array]")) {
return garrow_table_new_raw(&arrow_filtered_table);
} else {
return NULL;
}
}

/**
* garrow_chunked_array_filter:
* @chunked_array: A #GArrowChunkedArray.
* @filter: The values indicates which values should be filtered out.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (nullable) (transfer full): The #GArrowChunkedArray filterd
* with a boolean selection filter. Nulls in the filter will
* result in nulls in the output.
*
* Since: 1.0.0
*/
GArrowChunkedArray *
garrow_chunked_array_filter(GArrowChunkedArray *chunked_array,
GArrowBooleanArray *filter,
GError **error)
{
auto arrow_chunked_array =
garrow_chunked_array_get_raw(chunked_array);
auto arrow_filter = garrow_array_get_raw(GARROW_ARRAY(filter));
auto memory_pool = arrow::default_memory_pool();
arrow::compute::FunctionContext context(memory_pool);
std::shared_ptr<arrow::ChunkedArray> arrow_filtered_chunked_array;
auto status = arrow::compute::Filter(&context,
*arrow_chunked_array,
*arrow_filter,
&arrow_filtered_chunked_array);
if (garrow_error_check(error, status, "[chunked-array][filter]")) {
return garrow_chunked_array_new_raw(&arrow_filtered_chunked_array);
} else {
return NULL;
}
}

/**
* garrow_chunked_array_filter_chunked_array:
* @chunked_array: A #GArrowChunkedArray.
* @filter: The values indicates which values should be filtered out.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (nullable) (transfer full): The #GArrowChunkedArray filterd
* with a chunked array filter. Nulls in the filter will
* result in nulls in the output.
*
* Since: 1.0.0
*/
GArrowChunkedArray *
garrow_chunked_array_filter_chunked_array(GArrowChunkedArray *chunked_array,
GArrowChunkedArray *filter,
GError **error)
{
auto arrow_chunked_array =
garrow_chunked_array_get_raw(chunked_array);
auto arrow_filter = garrow_chunked_array_get_raw(filter);
auto memory_pool = arrow::default_memory_pool();
arrow::compute::FunctionContext context(memory_pool);
std::shared_ptr<arrow::ChunkedArray> arrow_filtered_chunked_array;
auto status = arrow::compute::Filter(&context,
*arrow_chunked_array,
*arrow_filter,
&arrow_filtered_chunked_array);
if (garrow_error_check(error, status, "[chunked-array][filter-chunked-array]")) {
return garrow_chunked_array_new_raw(&arrow_filtered_chunked_array);
} else {
return NULL;
}
}

/**
* garrow_record_batch_filter:
* @record_batch: A #GArrowRecordBatch.
* @filter: The values indicates which values should be filtered out.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (nullable) (transfer full): The #GArrowRecordBatch filterd
* with a boolean selection filter. Nulls in the filter will
* result in nulls in the output.
*
* Since: 1.0.0
*/
GArrowRecordBatch *
garrow_record_batch_filter(GArrowRecordBatch *record_batch,
GArrowBooleanArray *filter,
GError **error)
{
auto arrow_record_batch =
garrow_record_batch_get_raw(record_batch);
auto arrow_filter = garrow_array_get_raw(GARROW_ARRAY(filter));
auto memory_pool = arrow::default_memory_pool();
arrow::compute::FunctionContext context(memory_pool);
std::shared_ptr<arrow::RecordBatch> arrow_filtered_record_batch;
auto status = arrow::compute::Filter(&context,
*arrow_record_batch,
*arrow_filter,
&arrow_filtered_record_batch);
if (garrow_error_check(error, status, "[record-batch][filter]")) {
return garrow_record_batch_new_raw(&arrow_filtered_record_batch);
} else {
return NULL;
}
}

G_END_DECLS

GArrowCastOptions *
Expand Down
27 changes: 27 additions & 0 deletions c_glib/arrow-glib/compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#include <arrow-glib/array.h>
#include <arrow-glib/chunked-array.h>
#include <arrow-glib/record-batch.h>
#include <arrow-glib/table.h>

G_BEGIN_DECLS

Expand Down Expand Up @@ -268,5 +270,30 @@ GARROW_AVAILABLE_IN_0_15
GArrowUInt64Array *
garrow_array_sort_to_indices(GArrowArray *array,
GError **error);
GARROW_AVAILABLE_IN_1_0
GArrowTable *
garrow_table_filter(GArrowTable *table,
GArrowBooleanArray *filter,
GError **error);
GARROW_AVAILABLE_IN_1_0
GArrowTable *
garrow_table_filter_chunked_array(GArrowTable *table,
GArrowChunkedArray *filter,
GError **error);
GARROW_AVAILABLE_IN_1_0
GArrowChunkedArray *
garrow_chunked_array_filter(GArrowChunkedArray *chunked_array,
GArrowBooleanArray *filter,
GError **error);
GARROW_AVAILABLE_IN_1_0
GArrowChunkedArray *
garrow_chunked_array_filter_chunked_array(GArrowChunkedArray *chunked_array,
GArrowChunkedArray *filter,
GError **error);
GARROW_AVAILABLE_IN_1_0
GArrowRecordBatch *
garrow_record_batch_filter(GArrowRecordBatch *record_batch,
GArrowBooleanArray *filter,
GError **error);

G_END_DECLS
142 changes: 134 additions & 8 deletions c_glib/test/test-filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,142 @@
class TestFilter < Test::Unit::TestCase
include Helper::Buildable

def test_filter
filter = build_boolean_array([false, true, true, nil])
assert_equal(build_int16_array([1, 0, nil]),
build_int16_array([0, 1, 0, 2]).filter(filter))
sub_test_case("Array") do
def test_filter
filter = build_boolean_array([false, true, true, nil])
assert_equal(build_int16_array([1, 0, nil]),
build_int16_array([0, 1, 0, 2]).filter(filter))
end

def test_invalid_array_length
filter = build_boolean_array([false, true, true, false])
assert_raise(Arrow::Error::Invalid) do
build_int16_array([0, 1, 0]).filter(filter)
end
end
end

def test_invalid_array_length
filter = build_boolean_array([false, true, true, false])
assert_raise(Arrow::Error::Invalid) do
build_int16_array([0, 1, 0]).filter(filter)
sub_test_case("Table") do
def setup
fields = [
Arrow::Field.new("visible", Arrow::BooleanDataType.new),
Arrow::Field.new("valid", Arrow::BooleanDataType.new),
]
@schema = Arrow::Schema.new(fields)
arrays = [
build_boolean_array([true, false, true]),
build_boolean_array([false, true, true]),
]
@table = Arrow::Table.new(@schema, arrays)
end

def test_filter
filter = build_boolean_array([false, true, nil])
arrays = [
build_boolean_array([false, nil]),
build_boolean_array([true, nil]),
]
filtered_table = Arrow::Table.new(@schema, arrays)
assert_equal(filtered_table,
@table.filter(filter))
end

def test_filter_chunked_array
chunks = [
build_boolean_array([false]),
build_boolean_array([true, nil]),
]
filter = Arrow::ChunkedArray.new(chunks)
arrays = [
build_boolean_array([false, nil]),
build_boolean_array([true, nil]),
]
filtered_table = Arrow::Table.new(@schema, arrays)
assert_equal(filtered_table,
@table.filter_chunked_array(filter))
end

def test_invalid_array_length
filter = build_boolean_array([false, true, true, false])
assert_raise(Arrow::Error::Invalid) do
@table.filter(filter)
end
end
end

sub_test_case("ChunkedArray") do
def setup
chunks = [
build_boolean_array([true, false]),
build_boolean_array([true]),
]
@chunked_array = Arrow::ChunkedArray.new(chunks)
end

def test_filter
filter = build_boolean_array([false, true, nil])
chunks = [
build_boolean_array([false]),
build_boolean_array([nil]),
]
filtered_chunked_array = Arrow::ChunkedArray.new(chunks)
assert_equal(filtered_chunked_array,
@chunked_array.filter(filter))
end

def test_filter_chunked_array
chunks = [
build_boolean_array([false]),
build_boolean_array([true, nil]),
]
filter = Arrow::ChunkedArray.new(chunks)
filtered_chunks = [
build_boolean_array([false]),
build_boolean_array([nil]),
]
filtered_chunked_array = Arrow::ChunkedArray.new(filtered_chunks)
assert_equal(filtered_chunked_array,
@chunked_array.filter_chunked_array(filter))
end

def test_invalid_array_length
filter = build_boolean_array([false, true, true, false])
assert_raise(Arrow::Error::Invalid) do
@chunked_array.filter(filter)
end
end
end

sub_test_case("RecordBatch") do
def setup
fields = [
Arrow::Field.new("visible", Arrow::BooleanDataType.new),
Arrow::Field.new("valid", Arrow::BooleanDataType.new),
]
@schema = Arrow::Schema.new(fields)
columns = [
build_boolean_array([true, false, true]),
build_boolean_array([false, true, false]),
]
@record_batch = Arrow::RecordBatch.new(@schema, 3, columns)
end

def test_filter
filter = build_boolean_array([false, true, nil])
columns = [
build_boolean_array([false, nil]),
build_boolean_array([true, nil]),
]
filtered_record_batch = Arrow::RecordBatch.new(@schema, 2, columns)
assert_equal(filtered_record_batch,
@record_batch.filter(filter))
end

def test_invalid_array_length
filter = build_boolean_array([false, true, true, false])
assert_raise(Arrow::Error::Invalid) do
@record_batch.filter(filter)
end
end
end
end