diff --git a/CMake/resolve_dependency_modules/boost.cmake b/CMake/resolve_dependency_modules/boost.cmake index 842cba6f133..dfbc6169847 100644 --- a/CMake/resolve_dependency_modules/boost.cmake +++ b/CMake/resolve_dependency_modules/boost.cmake @@ -15,13 +15,6 @@ include_guard(GLOBAL) add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/boost) -if(ICU_SOURCE) - if(${ICU_SOURCE} STREQUAL "BUNDLED") - # ensure ICU is built before Boost - add_dependencies(boost_regex ICU ICU::i18n) - endif() -endif() - # This prevents system boost from leaking in set(Boost_NO_SYSTEM_PATHS ON) # We have to keep the FindBoost.cmake in an subfolder to prevent it from diff --git a/velox/dwio/common/CMakeLists.txt b/velox/dwio/common/CMakeLists.txt index a2539e8458e..2023728686b 100644 --- a/velox/dwio/common/CMakeLists.txt +++ b/velox/dwio/common/CMakeLists.txt @@ -82,7 +82,6 @@ velox_link_libraries( velox_expression velox_memory velox_type_tz - Boost::regex Folly::folly glog::glog protobuf::libprotobuf diff --git a/velox/dwio/common/tests/CMakeLists.txt b/velox/dwio/common/tests/CMakeLists.txt index cb6f356a31a..bebfa827e31 100644 --- a/velox/dwio/common/tests/CMakeLists.txt +++ b/velox/dwio/common/tests/CMakeLists.txt @@ -63,7 +63,6 @@ target_link_libraries( velox_dwio_common_test_utils velox_temp_path velox_vector_test_lib - Boost::regex velox_link_libs Folly::folly ${TEST_LINK_LIBS} diff --git a/velox/dwio/parquet/tests/writer/CMakeLists.txt b/velox/dwio/parquet/tests/writer/CMakeLists.txt index a6b9daec340..2a84d03fa03 100644 --- a/velox/dwio/parquet/tests/writer/CMakeLists.txt +++ b/velox/dwio/parquet/tests/writer/CMakeLists.txt @@ -26,7 +26,6 @@ target_link_libraries( velox_dwio_common_test_utils velox_vector_fuzzer velox_caching - Boost::regex velox_link_libs Folly::folly ${TEST_LINK_LIBS} @@ -49,7 +48,6 @@ target_link_libraries( velox_dwio_common_test_utils velox_caching velox_link_libs - Boost::regex Folly::folly ${TEST_LINK_LIBS} GTest::gtest diff --git a/velox/dwio/text/tests/writer/CMakeLists.txt b/velox/dwio/text/tests/writer/CMakeLists.txt index f53ecec5bc3..2d42a84a7f9 100644 --- a/velox/dwio/text/tests/writer/CMakeLists.txt +++ b/velox/dwio/text/tests/writer/CMakeLists.txt @@ -33,7 +33,6 @@ target_link_libraries( velox_dwio_text_reader_register velox_dwio_text_writer_register velox_link_libs - Boost::regex Folly::folly ${TEST_LINK_LIBS} GTest::gtest diff --git a/velox/exec/fuzzer/CMakeLists.txt b/velox/exec/fuzzer/CMakeLists.txt index d030d0f8fc1..b7fdf12f1a3 100644 --- a/velox/exec/fuzzer/CMakeLists.txt +++ b/velox/exec/fuzzer/CMakeLists.txt @@ -36,7 +36,6 @@ target_link_libraries( velox_expression_functions velox_presto_types cpr::cpr - Boost::regex velox_presto_type_parser Folly::folly velox_hive_connector diff --git a/velox/exec/tests/CMakeLists.txt b/velox/exec/tests/CMakeLists.txt index 25f6e9f87f6..e966d0ef281 100644 --- a/velox/exec/tests/CMakeLists.txt +++ b/velox/exec/tests/CMakeLists.txt @@ -172,7 +172,6 @@ target_link_libraries( Boost::date_time Boost::filesystem Boost::program_options - Boost::regex Boost::thread Boost::system GTest::gtest @@ -210,7 +209,6 @@ target_link_libraries( Boost::date_time Boost::filesystem Boost::program_options - Boost::regex Boost::thread Boost::system GTest::gtest diff --git a/velox/experimental/wave/vector/tests/CMakeLists.txt b/velox/experimental/wave/vector/tests/CMakeLists.txt index dade3ca1dcd..1dc7377ee4d 100644 --- a/velox/experimental/wave/vector/tests/CMakeLists.txt +++ b/velox/experimental/wave/vector/tests/CMakeLists.txt @@ -26,7 +26,6 @@ target_link_libraries( Boost::date_time Boost::filesystem Boost::program_options - Boost::regex Boost::thread Boost::system GTest::gtest diff --git a/velox/functions/prestosql/DateTimeFunctions.h b/velox/functions/prestosql/DateTimeFunctions.h index ea66ed48739..c2271eb2ae2 100644 --- a/velox/functions/prestosql/DateTimeFunctions.h +++ b/velox/functions/prestosql/DateTimeFunctions.h @@ -16,6 +16,7 @@ #pragma once #define XXH_INLINE_ALL +#include #include #include #include @@ -1923,53 +1924,39 @@ template struct ParseDurationFunction { VELOX_DEFINE_FUNCTION_TYPES(T); - std::unique_ptr durationRegex_; - - FOLLY_ALWAYS_INLINE void initialize( - const std::vector& /*inputTypes*/, - const core::QueryConfig& /*config*/, - const arg_type* /*amountUnit*/) { - durationRegex_ = - std::make_unique(R"(^\s*(\d+(?:\.\d+)?)\s*([a-zA-Z]+)\s*$)"); - } - FOLLY_ALWAYS_INLINE void call( out_type& result, const arg_type& amountUnit) { + static const LazyRE2 kDurationRegex{ + .pattern_ = R"(^\s*(\d+(?:\.\d+)?)\s*([a-zA-Z]+)\s*$)", + .options_ = {}, + }; + // TODO: Remove re2::StringPiece != std::string_view hacks. + // It's needed because for some systems in CI, + // re2 and abseil libraries are old. re2::StringPiece valueStr; - re2::StringPiece unit; - if (!RE2::FullMatch( - re2::StringPiece(amountUnit.data(), amountUnit.size()), - *durationRegex_, - &valueStr, - &unit)) { + re2::StringPiece unitStr; + re2::StringPiece amountUnitStr{amountUnit.data(), amountUnit.size()}; + if (!RE2::FullMatch(amountUnitStr, *kDurationRegex, &valueStr, &unitStr)) { VELOX_USER_FAIL( - "Input duration is not a valid data duration string: {}", amountUnit); + "Input duration is not a valid data duration string: {}", + std::string_view(amountUnitStr.data(), amountUnitStr.size())); } double value{}; - try { - size_t pos = 0; - // Create temporary string from re2::StringPiece for stod - std::string valueString(valueStr.data(), valueStr.size()); - value = std::stod(valueString, &pos); - if (pos != valueString.size()) { - VELOX_USER_FAIL( - "Input duration value is not a valid number: {}", - std::string_view(valueStr.data(), valueStr.size())); - } - } catch (const std::out_of_range&) { + auto [_, error] = fast_float::from_chars( + valueStr.data(), valueStr.data() + valueStr.size(), value); + if (error == std::errc::result_out_of_range) { VELOX_USER_FAIL( "Input duration value is out of range for double: {}", std::string_view(valueStr.data(), valueStr.size())); - } catch (const std::exception&) { + } else if (error != std::errc{}) { VELOX_USER_FAIL( "Input duration value is not a valid number: {}", std::string_view(valueStr.data(), valueStr.size())); } - result = valueOfTimeUnitToMillis( - value, std::string_view(unit.data(), unit.size())); + result = valueOfTimeUnitToMillis(value, {unitStr.data(), unitStr.size()}); } }; diff --git a/velox/functions/sparksql/GetJsonObject.h b/velox/functions/sparksql/GetJsonObject.h index 915e8753af5..ae48040a753 100644 --- a/velox/functions/sparksql/GetJsonObject.h +++ b/velox/functions/sparksql/GetJsonObject.h @@ -88,10 +88,7 @@ struct GetJsonObjectFunction { private: FOLLY_ALWAYS_INLINE bool checkJsonPath(StringView jsonPath) { // Spark requires the first char in jsonPath is '$'. - if (jsonPath.empty() || jsonPath.data()[0] != '$') { - return false; - } - return true; + return std::string_view{jsonPath}.starts_with('$'); } // Spark's json path requires field name surrounded by single quotes if it is @@ -106,7 +103,7 @@ struct GetJsonObjectFunction { if (pairBegin == std::string::npos) { break; } - pairEnd = result.find("]", pairBegin); + pairEnd = result.find(']', pairBegin); // If expected pattern, like ['a'], is not found. if (pairEnd == std::string::npos || result[pairEnd - 1] != '\'') { return "-1"; @@ -249,13 +246,9 @@ struct GetJsonObjectFunction { } return false; } - case simdjson::ondemand::json_type::object: { - // For nested case, e.g., for "{"my": {"hello": 10}}", "$.my" will - // return an object type. - ss << rawResult; - result.append(ss.str()); - return true; - } + // For nested case, e.g., for "{"my": {"hello": 10}}", + // "$.my" will return an object type. + case simdjson::ondemand::json_type::object: case simdjson::ondemand::json_type::array: { ss << rawResult; result.append(ss.str()); diff --git a/velox/type/tz/CMakeLists.txt b/velox/type/tz/CMakeLists.txt index 10307187c03..b1d16071aab 100644 --- a/velox/type/tz/CMakeLists.txt +++ b/velox/type/tz/CMakeLists.txt @@ -24,11 +24,4 @@ velox_add_library( TimeZoneNames.cpp ) -velox_link_libraries( - velox_type_tz - velox_exception - velox_external_tzdb - Boost::regex - fmt::fmt - Folly::folly -) +velox_link_libraries(velox_type_tz velox_exception velox_external_tzdb fmt::fmt Folly::folly) diff --git a/velox/vector/tests/CMakeLists.txt b/velox/vector/tests/CMakeLists.txt index 248e8be2ea0..81f67031d4a 100644 --- a/velox/vector/tests/CMakeLists.txt +++ b/velox/vector/tests/CMakeLists.txt @@ -59,7 +59,6 @@ target_link_libraries( Boost::date_time Boost::filesystem Boost::program_options - Boost::regex Boost::thread Boost::system GTest::gtest