Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions CMake/resolve_dependency_modules/boost.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@ include_guard(GLOBAL)

add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/boost)

if(ICU_SOURCE)
if(${ICU_SOURCE} STREQUAL "BUNDLED")
# ensure ICU is built before Boost
add_dependencies(boost_regex ICU ICU::i18n)
endif()
endif()

# This prevents system boost from leaking in
set(Boost_NO_SYSTEM_PATHS ON)
# We have to keep the FindBoost.cmake in an subfolder to prevent it from
Expand Down
1 change: 0 additions & 1 deletion velox/dwio/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ velox_link_libraries(
velox_expression
velox_memory
velox_type_tz
Boost::regex
Folly::folly
glog::glog
protobuf::libprotobuf
Expand Down
1 change: 0 additions & 1 deletion velox/dwio/common/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ target_link_libraries(
velox_dwio_common_test_utils
velox_temp_path
velox_vector_test_lib
Boost::regex
velox_link_libs
Folly::folly
${TEST_LINK_LIBS}
Expand Down
2 changes: 0 additions & 2 deletions velox/dwio/parquet/tests/writer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ target_link_libraries(
velox_dwio_common_test_utils
velox_vector_fuzzer
velox_caching
Boost::regex
velox_link_libs
Folly::folly
${TEST_LINK_LIBS}
Expand All @@ -49,7 +48,6 @@ target_link_libraries(
velox_dwio_common_test_utils
velox_caching
velox_link_libs
Boost::regex
Folly::folly
${TEST_LINK_LIBS}
GTest::gtest
Expand Down
1 change: 0 additions & 1 deletion velox/dwio/text/tests/writer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ target_link_libraries(
velox_dwio_text_reader_register
velox_dwio_text_writer_register
velox_link_libs
Boost::regex
Folly::folly
${TEST_LINK_LIBS}
GTest::gtest
Expand Down
1 change: 0 additions & 1 deletion velox/exec/fuzzer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ target_link_libraries(
velox_expression_functions
velox_presto_types
cpr::cpr
Boost::regex
velox_presto_type_parser
Folly::folly
velox_hive_connector
Expand Down
2 changes: 0 additions & 2 deletions velox/exec/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,6 @@ target_link_libraries(
Boost::date_time
Boost::filesystem
Boost::program_options
Boost::regex
Boost::thread
Boost::system
GTest::gtest
Expand Down Expand Up @@ -210,7 +209,6 @@ target_link_libraries(
Boost::date_time
Boost::filesystem
Boost::program_options
Boost::regex
Boost::thread
Boost::system
GTest::gtest
Expand Down
1 change: 0 additions & 1 deletion velox/experimental/wave/vector/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ target_link_libraries(
Boost::date_time
Boost::filesystem
Boost::program_options
Boost::regex
Boost::thread
Boost::system
GTest::gtest
Expand Down
49 changes: 18 additions & 31 deletions velox/functions/prestosql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#define XXH_INLINE_ALL
#include <fast_float/fast_float.h>
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need to use velox_include_directories to include the fast-float header? If the environment does not have fast-float, the velox build will fail, I'm not sure why Velox does not build it from source. @MBkkt CC @czentgr
https://github.com/IBM/velox/actions/runs/18915929559/job/53999183287

FAILED: velox/buffer/CMakeFiles/velox.dir/__/functions/prestosql/registration/DateTimeFunctionsRegistration.cpp.o 
/usr/bin/ccache /opt/rh/devtoolset-11/root/usr/bin/c++ -DAWS_MQTT_WITH_WEBSOCKETS -DAWS_SDK_VERSION_MAJOR=1 -DAWS_SDK_VERSION_MINOR=11 -DAWS_SDK_VERSION_PATCH=285 -DAWS_USE_EPOLL -DAZ_RTTI -DCURL_STATICLIB -DGEOS_INLINE -DGFLAGS_IS_A_DLL=0 -DNDEBUG -DSIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -DSIMDJSON_THREADS_ENABLED=1 -DUSE_UNSTABLE_GEOS_CPP_API -DVELOX_DISABLE_GOOGLETEST -DVELOX_ENABLE_ABFS -DVELOX_ENABLE_COMPRESSION_LZ4 -DVELOX_ENABLE_GCS -DVELOX_ENABLE_GEO -DVELOX_ENABLE_HDFS -DVELOX_ENABLE_PARQUET -DVELOX_ENABLE_S3 -I/work/. -I/work/velox/external/xxhash -I/work/_build/release -I/work/velox/tpch/gen/dbgen/include -I/work/velox/tpcds/gen/dsdgen/include -I/work/_build/release/_deps/simdjson-src/include -isystem /work/gluten/dev/vcpkg/vcpkg_installed/x64-linux-avx/include -isystem /work/velox -isystem /work/velox/external -isystem /work/gluten/dev/vcpkg/vcpkg_installed/x64-linux-avx/lib/pkgconfig/../../include -isystem /work/gluten/dev/vcpkg/vcpkg_installed/x64-linux-avx/include/geos -Wno-error=stringop-overflow -Wno-error=cpp -Wno-missing-field-initializers -Wno-unknown-warning-option -mavx2 -mfma -mavx -mf16c -mlzcnt -mbmi2 -D USE_VELOX_COMMON_BASE -D HAS_UNCAUGHT_EXCEPTIONS -DFOLLY_CFG_NO_COROUTINES -Wall -Wextra -Wno-unused        -Wno-unused-parameter        -Wno-sign-compare        -Wno-ignored-qualifiers        -Wno-implicit-fallthrough          -Wno-class-memaccess          -Wno-comment          -Wno-int-in-bool-context          -Wno-redundant-move          -Wno-array-bounds          -Wno-maybe-uninitialized          -Wno-unused-result          -Wno-format-overflow          -Wno-strict-aliasing -Werror -O3 -DNDEBUG -std=gnu++20 -fPIC -fdiagnostics-color=always -ffp-contract=off -DS2N_KYBER512R3_AVX2_BMI2 -DS2N_STACKTRACE -DS2N_CPUID_AVAILABLE -DS2N_FEATURES_AVAILABLE -fPIC -DS2N_FALL_THROUGH_SUPPORTED -DS2N___RESTRICT__SUPPORTED -DS2N_MADVISE_SUPPORTED -DS2N_CLONE_SUPPORTED -MD -MT velox/buffer/CMakeFiles/velox.dir/__/functions/prestosql/registration/DateTimeFunctionsRegistration.cpp.o -MF velox/buffer/CMakeFiles/velox.dir/__/functions/prestosql/registration/DateTimeFunctionsRegistration.cpp.o.d -o velox/buffer/CMakeFiles/velox.dir/__/functions/prestosql/registration/DateTimeFunctionsRegistration.cpp.o -c /work/velox/functions/prestosql/registration/DateTimeFunctionsRegistration.cpp
In file included from /work/velox/functions/prestosql/registration/DateTimeFunctionsRegistration.cpp:18:
/work/./velox/functions/prestosql/DateTimeFunctions.h:19:10: fatal error: fast_float/fast_float.h: No such file or directory
   19 | #include <fast_float/fast_float.h>
      |          ^~~~~~~~~~~~~~~~~~~~~~~~~
compilation terminated.

Copy link
Copy Markdown
Collaborator Author

@MBkkt MBkkt Oct 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No I shouldn't, because library target already linked in cmake

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you build folly in some specific way, because fast_float link provided from Folly::folly now I think.
Lets do this explicit

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jinchengchenghh #15322 does it help?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IBM installs the folly by vcpkg, so the environment already has folly, but vcpkg does not support install fast-float version 8.0.2 apache/gluten#10977, so fast-float does not exist in the environment. It does not call setup-script.sh

Copy link
Copy Markdown
Collaborator Author

@MBkkt MBkkt Oct 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so fast-float does not exist in the environment. It does not call setup-script.sh

So you don't have fast_float in vcpkg folly or how it works?

but vcpkg does not support install fast-float version 8.0.2

Hmm, strange, on their site they're claim support 8.0.2
image

Maybe I'm missing something because I'm not working with C++ package managers and system libraries.

As an alternative solution we probably can update fast_float version? I don't think it's bad, but it's also affects folly

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe because Gluten uses an old vcpkg version? I'm not sure for that.

But I think current #15322 can help, so we don't need to update fast_float version and care about Gluten vcpkg fast-float version.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, lets try current approach, if it doesn't help, please ping me. And we will try something else

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your in time fix!

#include <re2/re2.h>
#include <xxhash.h>
#include <string_view>
Expand Down Expand Up @@ -1923,53 +1924,39 @@ template <typename T>
struct ParseDurationFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

std::unique_ptr<RE2> durationRegex_;

FOLLY_ALWAYS_INLINE void initialize(
const std::vector<TypePtr>& /*inputTypes*/,
const core::QueryConfig& /*config*/,
const arg_type<Varchar>* /*amountUnit*/) {
durationRegex_ =
std::make_unique<RE2>(R"(^\s*(\d+(?:\.\d+)?)\s*([a-zA-Z]+)\s*$)");
}

FOLLY_ALWAYS_INLINE void call(
out_type<IntervalDayTime>& result,
const arg_type<Varchar>& amountUnit) {
static const LazyRE2 kDurationRegex{
.pattern_ = R"(^\s*(\d+(?:\.\d+)?)\s*([a-zA-Z]+)\s*$)",
.options_ = {},
};
// TODO: Remove re2::StringPiece != std::string_view hacks.
// It's needed because for some systems in CI,
// re2 and abseil libraries are old.
re2::StringPiece valueStr;
re2::StringPiece unit;
if (!RE2::FullMatch(
re2::StringPiece(amountUnit.data(), amountUnit.size()),
*durationRegex_,
&valueStr,
&unit)) {
re2::StringPiece unitStr;
re2::StringPiece amountUnitStr{amountUnit.data(), amountUnit.size()};
if (!RE2::FullMatch(amountUnitStr, *kDurationRegex, &valueStr, &unitStr)) {
VELOX_USER_FAIL(
"Input duration is not a valid data duration string: {}", amountUnit);
"Input duration is not a valid data duration string: {}",
std::string_view(amountUnitStr.data(), amountUnitStr.size()));
}

double value{};
try {
size_t pos = 0;
// Create temporary string from re2::StringPiece for stod
std::string valueString(valueStr.data(), valueStr.size());
value = std::stod(valueString, &pos);
if (pos != valueString.size()) {
VELOX_USER_FAIL(
"Input duration value is not a valid number: {}",
std::string_view(valueStr.data(), valueStr.size()));
}
} catch (const std::out_of_range&) {
auto [_, error] = fast_float::from_chars(
valueStr.data(), valueStr.data() + valueStr.size(), value);
if (error == std::errc::result_out_of_range) {
VELOX_USER_FAIL(
"Input duration value is out of range for double: {}",
std::string_view(valueStr.data(), valueStr.size()));
} catch (const std::exception&) {
} else if (error != std::errc{}) {
VELOX_USER_FAIL(
"Input duration value is not a valid number: {}",
std::string_view(valueStr.data(), valueStr.size()));
}

result = valueOfTimeUnitToMillis(
value, std::string_view(unit.data(), unit.size()));
result = valueOfTimeUnitToMillis(value, {unitStr.data(), unitStr.size()});
}
};

Expand Down
17 changes: 5 additions & 12 deletions velox/functions/sparksql/GetJsonObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,7 @@ struct GetJsonObjectFunction {
private:
FOLLY_ALWAYS_INLINE bool checkJsonPath(StringView jsonPath) {
// Spark requires the first char in jsonPath is '$'.
if (jsonPath.empty() || jsonPath.data()[0] != '$') {
return false;
}
return true;
return std::string_view{jsonPath}.starts_with('$');
}

// Spark's json path requires field name surrounded by single quotes if it is
Expand All @@ -106,7 +103,7 @@ struct GetJsonObjectFunction {
if (pairBegin == std::string::npos) {
break;
}
pairEnd = result.find("]", pairBegin);
pairEnd = result.find(']', pairBegin);
// If expected pattern, like ['a'], is not found.
if (pairEnd == std::string::npos || result[pairEnd - 1] != '\'') {
return "-1";
Expand Down Expand Up @@ -249,13 +246,9 @@ struct GetJsonObjectFunction {
}
return false;
}
case simdjson::ondemand::json_type::object: {
// For nested case, e.g., for "{"my": {"hello": 10}}", "$.my" will
// return an object type.
ss << rawResult;
result.append(ss.str());
return true;
}
// For nested case, e.g., for "{"my": {"hello": 10}}",
// "$.my" will return an object type.
case simdjson::ondemand::json_type::object:
Copy link
Copy Markdown
Collaborator

@rui-mo rui-mo Oct 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc: @philo-he Would you like to take a look at the change to get_json_object? Thanks.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just in case, change here, it was same code in two cases

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes make sense. Thanks.

case simdjson::ondemand::json_type::array: {
ss << rawResult;
result.append(ss.str());
Expand Down
9 changes: 1 addition & 8 deletions velox/type/tz/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,4 @@ velox_add_library(
TimeZoneNames.cpp
)

velox_link_libraries(
velox_type_tz
velox_exception
velox_external_tzdb
Boost::regex
fmt::fmt
Folly::folly
)
velox_link_libraries(velox_type_tz velox_exception velox_external_tzdb fmt::fmt Folly::folly)
1 change: 0 additions & 1 deletion velox/vector/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ target_link_libraries(
Boost::date_time
Boost::filesystem
Boost::program_options
Boost::regex
Boost::thread
Boost::system
GTest::gtest
Expand Down
Loading