Skip to content

Commit e951481

Browse files
yingsu00PingLiuPing
authored andcommitted
refactor: Move toValues from InPredicate.cpp to Filter.h
The function toValues removes duplicated values from the vector and return them in a std::vector. It was used to build an InPredicate. It will be needed for building NOT IN filters for Iceberg equality delete read as well, therefore moving it from velox/functions/prestosql/InPred icate.cpp to velox/type/Filter.h. This commit also renames it to deDuplicateValues to make it easier to understand.
1 parent a417b2b commit e951481

File tree

2 files changed

+42
-39
lines changed

2 files changed

+42
-39
lines changed

velox/functions/prestosql/InPredicate.cpp

Lines changed: 7 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -139,40 +139,6 @@ class VectorSetInPredicate : public exec::VectorFunction {
139139
const VectorPtr originalValues_;
140140
};
141141

142-
// Read 'size' values from 'valuesVector' starting at 'offset', de-duplicate
143-
// remove nulls and sort. Return a list of unique non-null values sorted in
144-
// ascending order and a boolean indicating whether there were any null values.
145-
template <typename T, typename U = T>
146-
std::pair<std::vector<T>, bool> toValues(
147-
const VectorPtr& valuesVector,
148-
vector_size_t offset,
149-
vector_size_t size) {
150-
auto simpleValues = valuesVector->as<SimpleVector<U>>();
151-
152-
bool nullAllowed = false;
153-
std::vector<T> values;
154-
values.reserve(size);
155-
156-
for (auto i = offset; i < offset + size; i++) {
157-
if (simpleValues->isNullAt(i)) {
158-
nullAllowed = true;
159-
} else {
160-
if constexpr (std::is_same_v<U, Timestamp>) {
161-
values.emplace_back(simpleValues->valueAt(i).toMillis());
162-
} else {
163-
values.emplace_back(simpleValues->valueAt(i));
164-
}
165-
}
166-
}
167-
168-
// In-place sort, remove duplicates, and later std::move to save memory
169-
std::sort(values.begin(), values.end());
170-
auto last = std::unique(values.begin(), values.end());
171-
values.resize(std::distance(values.begin(), last));
172-
173-
return {std::move(values), nullAllowed};
174-
}
175-
176142
// Creates a filter for constant values. A null filter means either
177143
// no values or only null values. The boolean is true if the list is
178144
// non-empty and consists of nulls only.
@@ -181,7 +147,8 @@ std::pair<std::unique_ptr<common::Filter>, bool> createBigintValuesFilter(
181147
const VectorPtr& valuesVector,
182148
vector_size_t offset,
183149
vector_size_t size) {
184-
auto valuesPair = toValues<int64_t, T>(valuesVector, offset, size);
150+
auto valuesPair =
151+
common::deDuplicateValues<int64_t, T>(valuesVector, offset, size);
185152

186153
const auto& values = valuesPair.first;
187154
bool nullAllowed = valuesPair.second;
@@ -210,7 +177,7 @@ createFloatingPointValuesFilter(
210177
const VectorPtr& valuesVector,
211178
vector_size_t offset,
212179
vector_size_t size) {
213-
auto valuesPair = toValues<T, T>(valuesVector, offset, size);
180+
auto valuesPair = common::deDuplicateValues<T, T>(valuesVector, offset, size);
214181

215182
auto& values = valuesPair.first;
216183
bool nullAllowed = valuesPair.second;
@@ -252,7 +219,8 @@ std::pair<std::unique_ptr<common::Filter>, bool> createHugeintValuesFilter(
252219
const VectorPtr& valuesVector,
253220
vector_size_t offset,
254221
vector_size_t size) {
255-
auto valuesPair = toValues<int128_t, T>(valuesVector, offset, size);
222+
auto valuesPair =
223+
common::deDuplicateValues<int128_t, T>(valuesVector, offset, size);
256224

257225
const auto& values = valuesPair.first;
258226
bool nullAllowed = valuesPair.second;
@@ -278,8 +246,8 @@ std::pair<std::unique_ptr<common::Filter>, bool> createBytesValuesFilter(
278246
const VectorPtr& valuesVector,
279247
vector_size_t offset,
280248
vector_size_t size) {
281-
auto valuesPair =
282-
toValues<std::string, StringView>(valuesVector, offset, size);
249+
auto valuesPair = common::deDuplicateValues<std::string, StringView>(
250+
valuesVector, offset, size);
283251

284252
const auto& values = valuesPair.first;
285253
bool nullAllowed = valuesPair.second;

velox/type/Filter.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "velox/type/StringView.h"
3232
#include "velox/type/Subfield.h"
3333
#include "velox/type/Type.h"
34+
#include "velox/vector/BaseVector.h"
3435

3536
namespace facebook::velox::common {
3637

@@ -2259,6 +2260,40 @@ static inline bool applyFilter(TFilter& filter, StringView value) {
22592260
return filter.testBytes(value.data(), value.size());
22602261
}
22612262

2263+
// Read 'size' values from 'valuesVector' starting at 'offset', de-duplicate
2264+
// remove nulls and sort. Return a list of unique non-null values sorted in
2265+
// ascending order and a boolean indicating whether there were any null values.
2266+
template <typename T, typename U>
2267+
std::pair<std::vector<T>, bool> deDuplicateValues(
2268+
const VectorPtr& valuesVector,
2269+
vector_size_t offset,
2270+
vector_size_t size) {
2271+
auto simpleValues = valuesVector->as<SimpleVector<U>>();
2272+
2273+
bool hasNull = false;
2274+
std::vector<T> values;
2275+
values.reserve(size);
2276+
2277+
for (auto i = offset; i < offset + size; i++) {
2278+
if (simpleValues->isNullAt(i)) {
2279+
hasNull = true;
2280+
} else {
2281+
if constexpr (std::is_same_v<U, Timestamp>) {
2282+
values.emplace_back(simpleValues->valueAt(i).toMillis());
2283+
} else {
2284+
values.emplace_back(simpleValues->valueAt(i));
2285+
}
2286+
}
2287+
}
2288+
2289+
// In-place sort, remove duplicates, and later std::move to save memory
2290+
std::sort(values.begin(), values.end());
2291+
auto last = std::unique(values.begin(), values.end());
2292+
values.resize(std::distance(values.begin(), last));
2293+
2294+
return {std::move(values), hasNull};
2295+
}
2296+
22622297
// Creates a hash or bitmap based IN filter depending on value distribution.
22632298
std::unique_ptr<Filter> createBigintValues(
22642299
const std::vector<int64_t>& values,

0 commit comments

Comments
 (0)