Skip to content

Commit b26ce23

Browse files
juwentus1234meta-codesync[bot]
authored andcommitted
feat: Add bool AND/OR and bit AND/OR/XOR aggregation feature to TableEvolutionFuzzer (facebookincubator#15528)
Summary: Pull Request resolved: facebookincubator#15528 Add support for bool_and, bool_or aggregation and bitwise_and_agg, bitwise_or_agg, bitwise_xor_agg functions in the TableEvolutionFuzzer's aggregation pushdown testing. ## Changes Extended the aggregation pushdown configuration generator to support boolean aggregation functions and bitwise aggregation functions in addition to the existing numeric aggregation functions: - Split `supportedAggs` into two separate lists: - `supportedNumericAggs`: {min, max} for numeric columns (INTEGER, BIGINT, REAL, DOUBLE, etc.) - `supportedBooleanAggs`: {bool_and, bool_or} for boolean columns - `supportedIntegerAggs`: {bitwise_and_agg, bitwise_or_agg, bitwise_xor_agg} for integer columns - Updated column collection logic to separately track numeric and boolean columns that are eligible for aggregation - Modified aggregation selection to independently try both numeric and boolean aggregations with 50% probability each, allowing for more diverse test coverage This change enables more comprehensive testing of aggregation pushdown functionality by including boolean aggregate functions, ensuring better coverage of different data types and aggregation scenarios. Reviewed By: Yuhta Differential Revision: D87204100 fbshipit-source-id: 9ae782fb70496884362edfc380da5595e9065284
1 parent 90b3c69 commit b26ce23

File tree

1 file changed

+72
-29
lines changed

1 file changed

+72
-29
lines changed

velox/exec/tests/TableEvolutionFuzzer.cpp

Lines changed: 72 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,43 @@ TableEvolutionFuzzer::parseFileFormats(std::string input) {
187187

188188
namespace {
189189

190+
// Helper function to randomly select aggregates from available columns
191+
// without replacement. Returns a list of aggregate expressions.
192+
void generateAggregatesForColumns(
193+
const std::vector<int>& availableColumns,
194+
const std::vector<std::string>& supportedAggFuncs,
195+
const RowTypePtr& schema,
196+
FuzzerGenerator& rng,
197+
std::vector<std::string>& aggregates) {
198+
if (availableColumns.empty()) {
199+
return;
200+
}
201+
202+
int numAggregates = std::min(
203+
static_cast<int>(availableColumns.size()),
204+
std::min(
205+
static_cast<int>(5),
206+
static_cast<int>(
207+
folly::Random::rand32(1, availableColumns.size() + 1, rng))));
208+
209+
std::unordered_set<int> selectedIndices;
210+
for (int i = 0; i < numAggregates; ++i) {
211+
if (folly::Random::oneIn(2, rng)) {
212+
int randomIdx;
213+
do {
214+
randomIdx = folly::Random::rand32(availableColumns.size(), rng);
215+
} while (selectedIndices.count(randomIdx) > 0);
216+
selectedIndices.insert(randomIdx);
217+
218+
int colIdx = availableColumns[randomIdx];
219+
std::string aggFunc = supportedAggFuncs[folly::Random::rand32(
220+
supportedAggFuncs.size(), rng)];
221+
aggregates.push_back(
222+
fmt::format("{}({})", aggFunc, schema->nameOf(colIdx)));
223+
}
224+
}
225+
}
226+
190227
std::vector<std::vector<RowVectorPtr>> runTaskCursors(
191228
const std::vector<std::shared_ptr<TaskCursor>>& cursors,
192229
folly::Executor& executor) {
@@ -410,7 +447,7 @@ fuzzer::ExpressionFuzzer::FuzzedExpressionData generateRemainingFilters(
410447

411448
// Generate random aggregation configuration for pushdown testing.
412449
// Only generates aggregations that are eligible for pushdown:
413-
// - Supported aggregate functions: min, max, sum
450+
// - Supported aggregate functions: min, max, bool_and, bool_or
414451
// - Each column can only be used by at most one aggregate
415452
// - Grouping keys are optional (can be empty for global aggregation)
416453
// - Columns with filters (subfield or remaining) are excluded to enable
@@ -421,7 +458,11 @@ std::optional<AggregationConfig> generateAggregationConfig(
421458
const std::unordered_set<std::string>& filteredColumns) {
422459
// List of aggregate functions that support pushdown
423460
// Note: Excluding 'sum' to avoid integer overflow in fuzzer with random data
424-
static const std::vector<std::string> supportedAggs = {"min", "max"};
461+
static const std::vector<std::string> supportedNumericAggs = {"min", "max"};
462+
static const std::vector<std::string> supportedBooleanAggs = {
463+
"bool_and", "bool_or"};
464+
static const std::vector<std::string> supportedIntegerAggs = {
465+
"bitwise_and_agg", "bitwise_or_agg", "bitwise_xor_agg"};
425466

426467
// Randomly decide number of grouping keys (0 to 2)
427468
int numGroupingKeys = folly::Random::rand32(3, rng);
@@ -441,7 +482,9 @@ std::optional<AggregationConfig> generateAggregationConfig(
441482
// For aggregation pushdown to work, each column should only be used once
442483
// and columns with filters should be excluded
443484
std::vector<std::string> aggregates;
444-
std::vector<int> availableColumns;
485+
std::vector<int> availableNumericColumns;
486+
std::vector<int> availableIntegerColumns;
487+
std::vector<int> availableBooleanColumns;
445488
for (int i = 0; i < schema->size(); ++i) {
446489
if (usedColumnIndices.count(i) == 0) {
447490
auto columnName = schema->nameOf(i);
@@ -451,41 +494,41 @@ std::optional<AggregationConfig> generateAggregationConfig(
451494
}
452495

453496
auto type = schema->childAt(i);
454-
// Only numeric types support min/max/sum
455-
// Check if it's a primitive numeric type (not decimal)
497+
// Integer types: randomly choose between min/max or bitwise aggregations
498+
// Note: Exclude DATE type as it doesn't support bitwise aggregations
456499
if ((type->isInteger() || type->isBigint() || type->isSmallint() ||
457-
type->isTinyint() || type->isReal() || type->isDouble()) &&
458-
!type->isDecimal()) {
459-
availableColumns.push_back(i);
500+
type->isTinyint()) &&
501+
!type->isDate()) {
502+
if (folly::Random::oneIn(2, rng)) {
503+
availableIntegerColumns.push_back(i);
504+
} else {
505+
availableNumericColumns.push_back(i);
506+
}
507+
}
508+
// Float types support min/max only
509+
else if ((type->isReal() || type->isDouble()) && !type->isDecimal()) {
510+
availableNumericColumns.push_back(i);
511+
}
512+
// Boolean types support bool_and/bool_or
513+
else if (type->isBoolean()) {
514+
availableBooleanColumns.push_back(i);
460515
}
461516
}
462517
}
463518

464-
if (availableColumns.empty()) {
519+
// Need at least one column to aggregate
520+
if (availableNumericColumns.empty() && availableBooleanColumns.empty() &&
521+
availableIntegerColumns.empty()) {
465522
return std::nullopt;
466523
}
467524

468-
// Randomly select 1-5 aggregates
469-
int numAggregates = std::min(
470-
static_cast<int>(5),
471-
static_cast<int>(
472-
folly::Random::rand32(1, availableColumns.size() + 1, rng)));
473-
474525
// Randomly pick columns for aggregates without replacement
475-
std::unordered_set<int> selectedIndices;
476-
for (int i = 0; i < numAggregates; ++i) {
477-
int randomIdx;
478-
do {
479-
randomIdx = folly::Random::rand32(availableColumns.size(), rng);
480-
} while (selectedIndices.count(randomIdx) > 0);
481-
selectedIndices.insert(randomIdx);
482-
483-
int colIdx = availableColumns[randomIdx];
484-
std::string aggFunc =
485-
supportedAggs[folly::Random::rand32(supportedAggs.size(), rng)];
486-
aggregates.push_back(
487-
fmt::format("{}({})", aggFunc, schema->nameOf(colIdx)));
488-
}
526+
generateAggregatesForColumns(
527+
availableNumericColumns, supportedNumericAggs, schema, rng, aggregates);
528+
generateAggregatesForColumns(
529+
availableBooleanColumns, supportedBooleanAggs, schema, rng, aggregates);
530+
generateAggregatesForColumns(
531+
availableIntegerColumns, supportedIntegerAggs, schema, rng, aggregates);
489532

490533
if (aggregates.empty()) {
491534
return std::nullopt;

0 commit comments

Comments
 (0)