Skip to content

Commit c2aa59f

Browse files
authored
Merge branch 'main' into date_format
2 parents 1cca2ab + a236cf5 commit c2aa59f

File tree

28 files changed

+859
-300
lines changed

28 files changed

+859
-300
lines changed

.github/workflows/linux-build-base.yml

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,36 @@ jobs:
225225
shell: bash
226226
working-directory: velox
227227
steps:
228+
- uses: actions/checkout@v5
229+
with:
230+
fetch-depth: 2
231+
path: velox
232+
persist-credentials: false
233+
234+
- name: Fix git permissions
235+
# Usually actions/checkout does this but as we run in a container
236+
# it doesn't work
237+
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}
238+
239+
- name: Install Dependencies
240+
env:
241+
VELOX_BUILD_SHARED: "ON"
242+
VELOX_ARROW_CMAKE_PATCH: ${{ github.workspace }}/velox/CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch
243+
run: |
244+
if git diff --name-only HEAD^1 HEAD | grep -q "scripts/setup-"; then
245+
# Overwrite old setup scripts with changed versions
246+
cp scripts/setup-* /
247+
248+
mkdir /tmp/build
249+
cd /tmp/build
250+
251+
# Install basic deps with GCC.
252+
USE_CLANG=false bash /setup-fedora.sh
253+
254+
cd /
255+
rm -rf /tmp/build # cleanup to avoid issues with disk space
256+
fi
257+
228258
- name: Get Ccache Stash
229259
uses: apache/infrastructure-actions/stash/restore@3354c1565d4b0e335b78a76aedd82153a9e144d4
230260
with:
@@ -236,11 +266,6 @@ jobs:
236266
run: |
237267
mkdir -p "$CCACHE_DIR"
238268
239-
- uses: actions/checkout@v5
240-
with:
241-
path: velox
242-
persist-credentials: false
243-
244269
- name: Clear CCache Statistics
245270
run: |
246271
ccache -sz

velox/common/Casts.h

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,73 @@ void ensureCastSucceeded(To* casted, From* original) {
4040

4141
} // namespace detail
4242

43-
// `checked_pointer_cast` is a dynamic casting tool to throw a Velox exception
43+
// `checkedPointerCast` is a dynamic casting tool to throw a Velox exception
4444
// when the casting failed. Use this instead of `std::dynamic_pointer_cast`
4545
// when:
4646
// 1) Casting must happen
4747
// 2) We want a stack trace if it failed.
4848
template <typename To, typename From>
49+
std::shared_ptr<To> checkedPointerCast(const std::shared_ptr<From>& input) {
50+
VELOX_CHECK_NOT_NULL(input.get());
51+
auto casted = std::dynamic_pointer_cast<To>(input);
52+
detail::ensureCastSucceeded(casted.get(), input.get());
53+
return casted;
54+
}
55+
56+
template <typename To, typename From>
57+
std::unique_ptr<To> checkedPointerCast(std::unique_ptr<From> input) {
58+
VELOX_CHECK_NOT_NULL(input.get());
59+
auto* released = input.release();
60+
To* casted{nullptr};
61+
try {
62+
casted = dynamic_cast<To*>(released);
63+
detail::ensureCastSucceeded(casted, released);
64+
} catch (...) {
65+
input.reset(released);
66+
throw;
67+
}
68+
return std::unique_ptr<To>(casted);
69+
}
70+
71+
template <typename To, typename From>
72+
To* checkedPointerCast(From* input) {
73+
VELOX_CHECK_NOT_NULL(input);
74+
auto* casted = dynamic_cast<To*>(input);
75+
detail::ensureCastSucceeded(casted, input);
76+
return casted;
77+
}
78+
79+
template <typename To, typename From>
80+
std::unique_ptr<To> staticUniquePointerCast(std::unique_ptr<From> input) {
81+
VELOX_CHECK_NOT_NULL(input.get());
82+
auto* released = input.release();
83+
auto* casted = static_cast<To*>(released);
84+
return std::unique_ptr<To>(casted);
85+
}
86+
87+
template <typename To, typename From>
88+
bool isInstanceOf(const std::shared_ptr<From>& input) {
89+
VELOX_CHECK_NOT_NULL(input.get());
90+
auto* casted = dynamic_cast<const To*>(input.get());
91+
return casted != nullptr;
92+
}
93+
94+
template <typename To, typename From>
95+
bool isInstanceOf(const std::unique_ptr<From>& input) {
96+
VELOX_CHECK_NOT_NULL(input.get());
97+
auto* casted = dynamic_cast<const To*>(input.get());
98+
return casted != nullptr;
99+
}
100+
101+
template <typename To, typename From>
102+
bool isInstanceOf(const From* input) {
103+
VELOX_CHECK_NOT_NULL(input);
104+
auto* casted = dynamic_cast<const To*>(input);
105+
return casted != nullptr;
106+
}
107+
108+
#ifdef VELOX_ENABLE_BACKWARD_COMPATIBILITY
109+
template <typename To, typename From>
49110
std::shared_ptr<To> checked_pointer_cast(const std::shared_ptr<From>& input) {
50111
VELOX_CHECK_NOT_NULL(input.get());
51112
auto casted = std::dynamic_pointer_cast<To>(input);
@@ -104,5 +165,6 @@ bool is_instance_of(const From* input) {
104165
auto* casted = dynamic_cast<const To*>(input);
105166
return casted != nullptr;
106167
}
168+
#endif
107169

108170
} // namespace facebook::velox
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#define XXH_INLINE_ALL
19+
20+
#include <xxhash.h>
21+
#include <cmath>
22+
23+
#include "velox/common/base/Exceptions.h"
24+
#include "velox/common/hyperloglog/DenseHll.h"
25+
#include "velox/common/hyperloglog/Murmur3Hash128.h"
26+
#include "velox/common/hyperloglog/SparseHll.h"
27+
#include "velox/common/memory/HashStringAllocator.h"
28+
29+
namespace facebook::velox::common::hll {
30+
31+
namespace detail {
32+
template <typename T, bool HllAsFinalResult>
33+
inline uint64_t hashOne(const T& value) {
34+
if constexpr (HllAsFinalResult) {
35+
if constexpr (std::is_same_v<T, int64_t>) {
36+
return common::hll::Murmur3Hash128::hash64ForLong(value, 0);
37+
} else if constexpr (std::is_same_v<T, double>) {
38+
return common::hll::Murmur3Hash128::hash64ForLong(
39+
*reinterpret_cast<const int64_t*>(&value), 0);
40+
}
41+
return common::hll::Murmur3Hash128::hash64(&value, sizeof(T), 0);
42+
} else {
43+
return XXH64(&value, sizeof(T), 0);
44+
}
45+
}
46+
47+
// Use timestamp.toMillis() to compute hash value.
48+
template <>
49+
inline uint64_t hashOne<Timestamp, false>(const Timestamp& value) {
50+
return hashOne<int64_t, false>(value.toMillis());
51+
}
52+
53+
template <>
54+
inline uint64_t hashOne<Timestamp, true>(const Timestamp& /*value*/) {
55+
VELOX_UNREACHABLE("approx_set(timestamp) is not supported.");
56+
}
57+
58+
template <>
59+
inline uint64_t hashOne<StringView, false>(const StringView& value) {
60+
return XXH64(value.data(), value.size(), 0);
61+
}
62+
63+
template <>
64+
inline uint64_t hashOne<StringView, true>(const StringView& value) {
65+
return common::hll::Murmur3Hash128::hash64(value.data(), value.size(), 0);
66+
}
67+
68+
} // namespace detail
69+
70+
template <typename T, bool HllAsFinalResult>
71+
struct HllAccumulator {
72+
explicit HllAccumulator(HashStringAllocator* allocator)
73+
: sparseHll_{allocator}, denseHll_{allocator} {}
74+
75+
void setIndexBitLength(int8_t indexBitLength) {
76+
indexBitLength_ = indexBitLength;
77+
sparseHll_.setSoftMemoryLimit(
78+
common::hll::DenseHlls::estimateInMemorySize(indexBitLength_));
79+
}
80+
81+
void append(T value) {
82+
const auto hash = detail::hashOne<T, HllAsFinalResult>(value);
83+
84+
if (isSparse_) {
85+
if (sparseHll_.insertHash(hash)) {
86+
toDense();
87+
}
88+
} else {
89+
denseHll_.insertHash(hash);
90+
}
91+
}
92+
93+
int64_t cardinality() const {
94+
return isSparse_ ? sparseHll_.cardinality() : denseHll_.cardinality();
95+
}
96+
97+
void mergeWith(StringView serialized, HashStringAllocator* allocator) {
98+
auto input = serialized.data();
99+
if (common::hll::SparseHlls::canDeserialize(input)) {
100+
if (isSparse_) {
101+
sparseHll_.mergeWith(input);
102+
if (indexBitLength_ < 0) {
103+
setIndexBitLength(
104+
common::hll::DenseHlls::deserializeIndexBitLength(input));
105+
}
106+
if (sparseHll_.overLimit()) {
107+
toDense();
108+
}
109+
} else {
110+
common::hll::SparseHll<> other{input, allocator};
111+
other.toDense(denseHll_);
112+
}
113+
} else if (common::hll::DenseHlls::canDeserialize(input)) {
114+
if (isSparse_) {
115+
if (indexBitLength_ < 0) {
116+
setIndexBitLength(
117+
common::hll::DenseHlls::deserializeIndexBitLength(input));
118+
}
119+
toDense();
120+
}
121+
denseHll_.mergeWith(input);
122+
} else {
123+
VELOX_USER_FAIL("Unexpected type of HLL");
124+
}
125+
}
126+
127+
int32_t serializedSize() {
128+
return isSparse_ ? sparseHll_.serializedSize() : denseHll_.serializedSize();
129+
}
130+
131+
void serialize(char* outputBuffer) {
132+
return isSparse_ ? sparseHll_.serialize(indexBitLength_, outputBuffer)
133+
: denseHll_.serialize(outputBuffer);
134+
}
135+
136+
private:
137+
void toDense() {
138+
isSparse_ = false;
139+
denseHll_.initialize(indexBitLength_);
140+
sparseHll_.toDense(denseHll_);
141+
sparseHll_.reset();
142+
}
143+
144+
bool isSparse_{true};
145+
int8_t indexBitLength_{-1};
146+
common::hll::SparseHll<> sparseHll_;
147+
common::hll::DenseHll<> denseHll_;
148+
};
149+
150+
template <>
151+
struct HllAccumulator<bool, false> {
152+
explicit HllAccumulator(HashStringAllocator* /*allocator*/) {}
153+
154+
void append(bool value) {
155+
approxDistinctState_ |= (1 << value);
156+
}
157+
158+
int64_t cardinality() const {
159+
return (approxDistinctState_ & 1) + ((approxDistinctState_ & 2) >> 1);
160+
}
161+
162+
void mergeWith(
163+
StringView /*serialized*/,
164+
HashStringAllocator* /*allocator*/) {
165+
VELOX_UNREACHABLE(
166+
"APPROX_DISTINCT<BOOLEAN> unsupported mergeWith(StringView, HashStringAllocator*)");
167+
}
168+
169+
void mergeWith(int8_t data) {
170+
approxDistinctState_ |= data;
171+
}
172+
173+
int32_t serializedSize() const {
174+
return sizeof(int8_t);
175+
}
176+
177+
void serialize(char* /*outputBuffer*/) {
178+
VELOX_UNREACHABLE("APPROX_DISTINCT<BOOLEAN> unsupported serialize(char*)");
179+
}
180+
181+
void setIndexBitLength(int8_t /*indexBitLength*/) {}
182+
183+
int8_t getState() const {
184+
return approxDistinctState_;
185+
}
186+
187+
private:
188+
int8_t approxDistinctState_{0};
189+
};
190+
191+
} // namespace facebook::velox::common::hll

velox/common/memory/MemoryPool.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,7 @@ bool MemoryPoolImpl::transferTo(MemoryPool* dest, void* buffer, uint64_t size) {
599599
return false;
600600
}
601601
VELOX_CHECK_NOT_NULL(dest);
602-
auto* destImpl = checked_pointer_cast<MemoryPoolImpl, MemoryPool>(dest);
602+
auto* destImpl = checkedPointerCast<MemoryPoolImpl, MemoryPool>(dest);
603603
if (allocator_ != destImpl->allocator_) {
604604
return false;
605605
}

0 commit comments

Comments
 (0)