Skip to content
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ set(SOURCES

if(KvikIO_REMOTE_SUPPORT)
list(APPEND SOURCES "src/hdfs.cpp" "src/remote_handle.cpp" "src/detail/remote_handle.cpp"
"src/detail/url.cpp" "src/shim/libcurl.cpp"
"src/detail/tls.cpp" "src/detail/url.cpp" "src/shim/libcurl.cpp"
)
endif()

Expand Down
58 changes: 58 additions & 0 deletions cpp/include/kvikio/detail/tls.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <optional>
#include <string>
#include <utility>

#include <kvikio/shim/libcurl.hpp>

namespace kvikio::detail {
/**
* @brief Search for the CA bundle file and directory paths
*
* This function searches for the Certificate Authority (CA) paths required for TLS/SSL verification
* in libcurl. The search is performed in the following priority order, returning as soon as either
* a bundle file or a directory is found:
* - CA bundle file: Check env vars CURL_CA_BUNDLE, SSL_CERT_FILE
* - CA directory: Check env var SSL_CERT_DIR
* - CA bundle file: Search distribution-specific locations for accessible bundle
* - CA directory: Search distribution-specific locations for accessible directory
* - CA bundle file: Check if curl's compile-time default bundle path is accessible
* - CA directory: Check if curl's compile-time default directory path is accessible
*
* @return Result containing CA bundle file and CA certificate directory
*
* @exception std::runtime_error if neither CA bundle nor directory is found
*
* @note Environment Variables:
* - CURL_CA_BUNDLE: Override CA bundle file location (curl-specific)
* - SSL_CERT_FILE: Override CA bundle file location (OpenSSL-compatible)
* - SSL_CERT_DIR: Override CA directory location (OpenSSL-compatible)
*/
std::pair<std::optional<std::string>, std::optional<std::string>> get_ca_paths();

/**
* @brief Configure curl handle with discovered CA certificate paths
*
* As a performance optimization, the discovered CA certificate paths are cached to avoid repeated
* searching.
*
* @param curl Curl handle to configure with CA certificate paths
*/
void set_up_ca_paths(CurlHandle& curl);
} // namespace kvikio::detail
150 changes: 150 additions & 0 deletions cpp/src/detail/tls.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <unistd.h>
#include <cstdlib>

#include <curl/curl.h>
#include <kvikio/detail/tls.hpp>
#include <kvikio/error.hpp>
#include <kvikio/shim/libcurl.hpp>
#include <optional>
#include <stdexcept>

namespace kvikio::detail {

namespace {
/**
* @brief Search for a CA certificate path using environment variables
*
* @param env_vars Environment variable names to check in order
* @return Path string if found in any environment variable, std::nullopt otherwise
*/
std::optional<std::string> find_ca_path_from_env_var(std::vector<std::string> const& env_vars)
{
for (auto const& env_var : env_vars) {
auto const* path = std::getenv(env_var.data());
if (path != nullptr) { return path; }
}

return std::nullopt;
}

/**
* @brief Search for a CA certificate path in standard system locations
*
* @param system_paths file system paths to check in order
* @return First accessible path if found, std::nullopt otherwise
*/
std::optional<std::string> find_ca_path_in_system_locations(
std::vector<std::string> const& system_paths)
{
for (auto const& path : system_paths) {
// Check whether the file/directory exists, and whether it grants read permission to the calling
// process's real UID and GID. If the path is a symbolic link, it is dereferenced.
auto const result = access(path.data(), R_OK);

if (result != -1) { return path; }
}

return std::nullopt;
}

/**
* @brief Get CA certificate path from curl's compile-time defaults
*
* @param default_path Path provided by curl_version_info (may be nullptr)
* @return Path string if accessible, std::nullopt otherwise
*/
std::optional<std::string> get_ca_path_from_curl_defaults(char const* default_path)
{
if (default_path != nullptr && access(default_path, R_OK) != -1) { return default_path; }

return std::nullopt;
}
} // namespace

std::pair<std::optional<std::string>, std::optional<std::string>> get_ca_paths()
{
auto* version_info = curl_version_info(::CURLVERSION_NOW);
KVIKIO_EXPECT(version_info != nullptr, "Failed to get curl version info", std::runtime_error);

std::optional<std::string> ca_bundle_file;
std::optional<std::string> ca_directory;

// Priority 1: CA bundle file from environment variables
ca_bundle_file = find_ca_path_from_env_var({
"CURL_CA_BUNDLE", // curl program
"SSL_CERT_FILE" // OpenSSL
});
if (ca_bundle_file.has_value()) { return {ca_bundle_file, ca_directory}; }

// Priority 2: CA directory from environment variables
ca_directory = find_ca_path_from_env_var({
"SSL_CERT_DIR" // OpenSSL
});
if (ca_directory.has_value()) { return {ca_bundle_file, ca_directory}; }

// Priority 3: CA bundle file from system locations
ca_bundle_file = find_ca_path_in_system_locations(
{"/etc/ssl/certs/ca-certificates.crt", // Debian/Ubuntu, Arch, Alpine, Gentoo
"/etc/pki/tls/certs/ca-bundle.crt", // RHEL/CentOS/Rocky/AlmaLinux, Fedora
"/etc/ssl/ca-bundle.pem", // OpenSUSE/SLES
"/etc/pki/tls/cert.pem", // RHEL-based (symlink to ca-bundle.crt)
"/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", // Fedora 28+, RHEL 8+

// Additional locations mentioned by libcurl:
// https://github.com/curl/curl/blob/master/CMakeLists.txt
"/usr/share/ssl/certs/ca-bundle.crt",
"/usr/local/share/certs/ca-root-nss.crt",
"/etc/ssl/cert.pem"});
if (ca_bundle_file.has_value()) { return {ca_bundle_file, ca_directory}; }

// Priority 4: CA directory from system locations
ca_directory = find_ca_path_in_system_locations({
"/etc/ssl/certs", // Debian/Ubuntu, Arch, Alpine, OpenSUSE, Gentoo
"/etc/pki/tls/certs" // RHEL/CentOS/Rocky/AlmaLinux, Fedora
});
if (ca_directory.has_value()) { return {ca_bundle_file, ca_directory}; }

// Priority 5: CA bundle file from curl compile-time defaults
ca_bundle_file = get_ca_path_from_curl_defaults(version_info->cainfo);
if (ca_bundle_file.has_value()) { return {ca_bundle_file, ca_directory}; }

// Priority 6: CA directory from curl compile-time defaults
ca_directory = get_ca_path_from_curl_defaults(version_info->capath);
if (ca_directory.has_value()) { return {ca_bundle_file, ca_directory}; }

// At least one path must exist
KVIKIO_EXPECT(ca_bundle_file.has_value() || ca_directory.has_value(),
"Failed to find accessible CA certificates.",
std::runtime_error);
return {ca_bundle_file, ca_directory};
}

void set_up_ca_paths(CurlHandle& curl)
{
static auto const [ca_bundle_file, ca_directory] = get_ca_paths();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

static structured binding is not supported in C++17, and this helped to identify an oversight to be fixed by #819.


if (ca_bundle_file.has_value()) {
curl.setopt(CURLOPT_CAINFO, ca_bundle_file->c_str());
curl.setopt(CURLOPT_CAPATH, nullptr);
} else if (ca_directory.has_value()) {
curl.setopt(CURLOPT_CAINFO, nullptr);
curl.setopt(CURLOPT_CAPATH, ca_directory->c_str());
}
}
} // namespace kvikio::detail
3 changes: 3 additions & 0 deletions cpp/src/shim/libcurl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <curl/curl.h>

#include <kvikio/defaults.hpp>
#include <kvikio/detail/tls.hpp>
#include <kvikio/error.hpp>
#include <kvikio/parallel_operation.hpp>
#include <kvikio/posix_io.hpp>
Expand Down Expand Up @@ -116,6 +117,8 @@ CurlHandle::CurlHandle(LibCurl::UniqueHandlePtr handle,
// Optionally enable verbose output if it's configured.
auto const verbose = getenv_or("KVIKIO_REMOTE_VERBOSE", false);
if (verbose) { setopt(CURLOPT_VERBOSE, 1L); }

detail::set_up_ca_paths(*this);
}

CurlHandle::~CurlHandle() noexcept { LibCurl::instance().retain_handle(std::move(_handle)); }
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ kvikio_add_test(NAME MMAP_TEST SOURCES test_mmap.cpp)
if(KvikIO_REMOTE_SUPPORT)
kvikio_add_test(NAME REMOTE_HANDLE_TEST SOURCES test_remote_handle.cpp utils/env.cpp)
kvikio_add_test(NAME HDFS_TEST SOURCES test_hdfs.cpp utils/hdfs_helper.cpp)
kvikio_add_test(NAME TLS_TEST SOURCES test_tls.cpp utils/env.cpp)
kvikio_add_test(NAME URL_TEST SOURCES test_url.cpp)
endif()

Expand Down
48 changes: 48 additions & 0 deletions cpp/tests/test_tls.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gmock/gmock.h>
#include <gtest/gtest.h>

#include <kvikio/detail/tls.hpp>

#include "utils/env.hpp"

TEST(TlsTest, get_ca_paths)
{
std::string const expected_ca_bundle_path{"ca_bundle_path"};
std::string const expected_ca_directory{"ca_directory"};
{
// Env var CURL_CA_BUNDLE has the highest priority. Both SSL_CERT_FILE and SSL_CERT_DIR shall be
// skipped
kvikio::test::EnvVarContext env_var_ctx{{"CURL_CA_BUNDLE", expected_ca_bundle_path},
{"SSL_CERT_FILE", "another_ca_bundle_path"},
{"SSL_CERT_DIR", expected_ca_directory}};
auto const& [ca_bundle_file, ca_directory] = kvikio::detail::get_ca_paths();

EXPECT_EQ(ca_bundle_file, expected_ca_bundle_path);
EXPECT_EQ(ca_directory, std::nullopt);
}

{
// Env var CURL_CA_BUNDLE and SSL_CERT_FILE are not specified, SSL_CERT_DIR shall be used
kvikio::test::EnvVarContext env_var_ctx{{"SSL_CERT_DIR", expected_ca_directory}};
auto const& [ca_bundle_file, ca_directory] = kvikio::detail::get_ca_paths();

EXPECT_EQ(ca_bundle_file, std::nullopt);
EXPECT_EQ(ca_directory, expected_ca_directory);
}
}
10 changes: 10 additions & 0 deletions docs/source/runtime_settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,13 @@ Set the environment variable ``KVIKIO_REMOTE_VERBOSE`` to ``true``, ``on``, ``ye
.. warning::

This may show sensitive contents from headers and data.

CA bundle file and CA directory ``CURL_CA_BUNDLE``, ``SSL_CERT_FILE``, ``SSL_CERT_DIR``
---------------------------------------------------------------------------------------

The Certificate Authority (CA) paths required for TLS/SSL verification in ``libcurl`` can be explicitly specified using the following environment variables in order of overriding priority:

* ``CURL_CA_BUNDLE`` (also used in the ``curl`` program) or ``SSL_CERT_FILE`` (also used in OpenSSL): Specifies the CA certificate bundle file location.
* ``SSL_CERT_DIR`` (also used in OpenSSL): Specifies the CA certificate directory.

When neither is specified, KvikIO searches several standard system locations for the CA file and directory, and if the search fails falls back to the libcurl compile-time defaults.
Loading