diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f94409c57e..9c66e51875 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -163,7 +163,7 @@ set(SOURCES if(KvikIO_REMOTE_SUPPORT) list(APPEND SOURCES "src/hdfs.cpp" "src/remote_handle.cpp" "src/detail/remote_handle.cpp" - "src/detail/url.cpp" "src/shim/libcurl.cpp" + "src/detail/tls.cpp" "src/detail/url.cpp" "src/shim/libcurl.cpp" ) endif() diff --git a/cpp/include/kvikio/detail/tls.hpp b/cpp/include/kvikio/detail/tls.hpp new file mode 100644 index 0000000000..be40eaf5e5 --- /dev/null +++ b/cpp/include/kvikio/detail/tls.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace kvikio::detail { +/** + * @brief Search for the CA bundle file and directory paths + * + * This function searches for the Certificate Authority (CA) paths required for TLS/SSL verification + * in libcurl. The search is performed in the following priority order, returning as soon as either + * a bundle file or a directory is found: + * - CA bundle file: Check env vars CURL_CA_BUNDLE, SSL_CERT_FILE + * - CA directory: Check env var SSL_CERT_DIR + * - CA bundle file: Search distribution-specific locations for accessible bundle + * - CA directory: Search distribution-specific locations for accessible directory + * - CA bundle file: Check if curl's compile-time default bundle path is accessible + * - CA directory: Check if curl's compile-time default directory path is accessible + * + * @return Result containing CA bundle file and CA certificate directory + * + * @exception std::runtime_error if neither CA bundle nor directory is found + * + * @note Environment Variables: + * - CURL_CA_BUNDLE: Override CA bundle file location (curl-specific) + * - SSL_CERT_FILE: Override CA bundle file location (OpenSSL-compatible) + * - SSL_CERT_DIR: Override CA directory location (OpenSSL-compatible) + */ +std::pair, std::optional> get_ca_paths(); + +/** + * @brief Configure curl handle with discovered CA certificate paths + * + * As a performance optimization, the discovered CA certificate paths are cached to avoid repeated + * searching. + * + * @param curl Curl handle to configure with CA certificate paths + */ +void set_up_ca_paths(CurlHandle& curl); +} // namespace kvikio::detail diff --git a/cpp/src/detail/tls.cpp b/cpp/src/detail/tls.cpp new file mode 100644 index 0000000000..0dccb6b612 --- /dev/null +++ b/cpp/src/detail/tls.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace kvikio::detail { + +namespace { +/** + * @brief Search for a CA certificate path using environment variables + * + * @param env_vars Environment variable names to check in order + * @return Path string if found in any environment variable, std::nullopt otherwise + */ +std::optional find_ca_path_from_env_var(std::vector const& env_vars) +{ + for (auto const& env_var : env_vars) { + auto const* path = std::getenv(env_var.data()); + if (path != nullptr) { return path; } + } + + return std::nullopt; +} + +/** + * @brief Search for a CA certificate path in standard system locations + * + * @param system_paths file system paths to check in order + * @return First accessible path if found, std::nullopt otherwise + */ +std::optional find_ca_path_in_system_locations( + std::vector const& system_paths) +{ + for (auto const& path : system_paths) { + // Check whether the file/directory exists, and whether it grants read permission to the calling + // process's real UID and GID. If the path is a symbolic link, it is dereferenced. + auto const result = access(path.data(), R_OK); + + if (result != -1) { return path; } + } + + return std::nullopt; +} + +/** + * @brief Get CA certificate path from curl's compile-time defaults + * + * @param default_path Path provided by curl_version_info (may be nullptr) + * @return Path string if accessible, std::nullopt otherwise + */ +std::optional get_ca_path_from_curl_defaults(char const* default_path) +{ + if (default_path != nullptr && access(default_path, R_OK) != -1) { return default_path; } + + return std::nullopt; +} +} // namespace + +std::pair, std::optional> get_ca_paths() +{ + auto* version_info = curl_version_info(::CURLVERSION_NOW); + KVIKIO_EXPECT(version_info != nullptr, "Failed to get curl version info", std::runtime_error); + + std::optional ca_bundle_file; + std::optional ca_directory; + + // Priority 1: CA bundle file from environment variables + ca_bundle_file = find_ca_path_from_env_var({ + "CURL_CA_BUNDLE", // curl program + "SSL_CERT_FILE" // OpenSSL + }); + if (ca_bundle_file.has_value()) { return {ca_bundle_file, ca_directory}; } + + // Priority 2: CA directory from environment variables + ca_directory = find_ca_path_from_env_var({ + "SSL_CERT_DIR" // OpenSSL + }); + if (ca_directory.has_value()) { return {ca_bundle_file, ca_directory}; } + + // Priority 3: CA bundle file from system locations + ca_bundle_file = find_ca_path_in_system_locations( + {"/etc/ssl/certs/ca-certificates.crt", // Debian/Ubuntu, Arch, Alpine, Gentoo + "/etc/pki/tls/certs/ca-bundle.crt", // RHEL/CentOS/Rocky/AlmaLinux, Fedora + "/etc/ssl/ca-bundle.pem", // OpenSUSE/SLES + "/etc/pki/tls/cert.pem", // RHEL-based (symlink to ca-bundle.crt) + "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", // Fedora 28+, RHEL 8+ + + // Additional locations mentioned by libcurl: + // https://github.com/curl/curl/blob/master/CMakeLists.txt + "/usr/share/ssl/certs/ca-bundle.crt", + "/usr/local/share/certs/ca-root-nss.crt", + "/etc/ssl/cert.pem"}); + if (ca_bundle_file.has_value()) { return {ca_bundle_file, ca_directory}; } + + // Priority 4: CA directory from system locations + ca_directory = find_ca_path_in_system_locations({ + "/etc/ssl/certs", // Debian/Ubuntu, Arch, Alpine, OpenSUSE, Gentoo + "/etc/pki/tls/certs" // RHEL/CentOS/Rocky/AlmaLinux, Fedora + }); + if (ca_directory.has_value()) { return {ca_bundle_file, ca_directory}; } + + // Priority 5: CA bundle file from curl compile-time defaults + ca_bundle_file = get_ca_path_from_curl_defaults(version_info->cainfo); + if (ca_bundle_file.has_value()) { return {ca_bundle_file, ca_directory}; } + + // Priority 6: CA directory from curl compile-time defaults + ca_directory = get_ca_path_from_curl_defaults(version_info->capath); + if (ca_directory.has_value()) { return {ca_bundle_file, ca_directory}; } + + // At least one path must exist + KVIKIO_EXPECT(ca_bundle_file.has_value() || ca_directory.has_value(), + "Failed to find accessible CA certificates.", + std::runtime_error); + return {ca_bundle_file, ca_directory}; +} + +void set_up_ca_paths(CurlHandle& curl) +{ + static auto const [ca_bundle_file, ca_directory] = get_ca_paths(); + + if (ca_bundle_file.has_value()) { + curl.setopt(CURLOPT_CAINFO, ca_bundle_file->c_str()); + curl.setopt(CURLOPT_CAPATH, nullptr); + } else if (ca_directory.has_value()) { + curl.setopt(CURLOPT_CAINFO, nullptr); + curl.setopt(CURLOPT_CAPATH, ca_directory->c_str()); + } +} +} // namespace kvikio::detail diff --git a/cpp/src/shim/libcurl.cpp b/cpp/src/shim/libcurl.cpp index feddf33254..a78fb33d30 100644 --- a/cpp/src/shim/libcurl.cpp +++ b/cpp/src/shim/libcurl.cpp @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -116,6 +117,8 @@ CurlHandle::CurlHandle(LibCurl::UniqueHandlePtr handle, // Optionally enable verbose output if it's configured. auto const verbose = getenv_or("KVIKIO_REMOTE_VERBOSE", false); if (verbose) { setopt(CURLOPT_VERBOSE, 1L); } + + detail::set_up_ca_paths(*this); } CurlHandle::~CurlHandle() noexcept { LibCurl::instance().retain_handle(std::move(_handle)); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a6fd2c67e4..afa7e8d97b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -79,6 +79,7 @@ kvikio_add_test(NAME MMAP_TEST SOURCES test_mmap.cpp) if(KvikIO_REMOTE_SUPPORT) kvikio_add_test(NAME REMOTE_HANDLE_TEST SOURCES test_remote_handle.cpp utils/env.cpp) kvikio_add_test(NAME HDFS_TEST SOURCES test_hdfs.cpp utils/hdfs_helper.cpp) + kvikio_add_test(NAME TLS_TEST SOURCES test_tls.cpp utils/env.cpp) kvikio_add_test(NAME URL_TEST SOURCES test_url.cpp) endif() diff --git a/cpp/tests/test_tls.cpp b/cpp/tests/test_tls.cpp new file mode 100644 index 0000000000..4b04f10296 --- /dev/null +++ b/cpp/tests/test_tls.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include "utils/env.hpp" + +TEST(TlsTest, get_ca_paths) +{ + std::string const expected_ca_bundle_path{"ca_bundle_path"}; + std::string const expected_ca_directory{"ca_directory"}; + { + // Env var CURL_CA_BUNDLE has the highest priority. Both SSL_CERT_FILE and SSL_CERT_DIR shall be + // skipped + kvikio::test::EnvVarContext env_var_ctx{{"CURL_CA_BUNDLE", expected_ca_bundle_path}, + {"SSL_CERT_FILE", "another_ca_bundle_path"}, + {"SSL_CERT_DIR", expected_ca_directory}}; + auto const& [ca_bundle_file, ca_directory] = kvikio::detail::get_ca_paths(); + + EXPECT_EQ(ca_bundle_file, expected_ca_bundle_path); + EXPECT_EQ(ca_directory, std::nullopt); + } + + { + // Env var CURL_CA_BUNDLE and SSL_CERT_FILE are not specified, SSL_CERT_DIR shall be used + kvikio::test::EnvVarContext env_var_ctx{{"SSL_CERT_DIR", expected_ca_directory}}; + auto const& [ca_bundle_file, ca_directory] = kvikio::detail::get_ca_paths(); + + EXPECT_EQ(ca_bundle_file, std::nullopt); + EXPECT_EQ(ca_directory, expected_ca_directory); + } +} diff --git a/docs/source/runtime_settings.rst b/docs/source/runtime_settings.rst index 0e304c9ed5..e707031720 100644 --- a/docs/source/runtime_settings.rst +++ b/docs/source/runtime_settings.rst @@ -64,3 +64,13 @@ Set the environment variable ``KVIKIO_REMOTE_VERBOSE`` to ``true``, ``on``, ``ye .. warning:: This may show sensitive contents from headers and data. + +CA bundle file and CA directory ``CURL_CA_BUNDLE``, ``SSL_CERT_FILE``, ``SSL_CERT_DIR`` +--------------------------------------------------------------------------------------- + +The Certificate Authority (CA) paths required for TLS/SSL verification in ``libcurl`` can be explicitly specified using the following environment variables in order of overriding priority: + + * ``CURL_CA_BUNDLE`` (also used in the ``curl`` program) or ``SSL_CERT_FILE`` (also used in OpenSSL): Specifies the CA certificate bundle file location. + * ``SSL_CERT_DIR`` (also used in OpenSSL): Specifies the CA certificate directory. + +When neither is specified, KvikIO searches several standard system locations for the CA file and directory, and if the search fails falls back to the libcurl compile-time defaults.