Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/manual/rl-next/libcurl-content-encoding.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
synopsis: Content-Encoding decompression is now handled by libcurl
prs: [15336]
issues: [14324]
---

Transparent decompression of HTTP downloads specifying `Content-Encoding` header now uses libcurl. This adds support for previously advertised, but not supported `deflate` encoding as well as deprecated `x-gzip` alias.
Non-standard `xz`, `bzip2` encodings that were previously advertised are no longer supported, as they do not commonly appear in the wild and should not be sent by compliant servers.

`br`, `zstd`, `gzip` continue to be supported. Distro packaging should ensure that the `libcurl` dependency is linked against required libraries to support these encodings. By default now the build system requires libcurl >= 8.17.0 which is not known to have issues around [pausing and decompression](https://github.com/curl/curl/issues/16280).
16 changes: 13 additions & 3 deletions packaging/dependencies.nix
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,19 @@ scope: {
NIX_CFLAGS_COMPILE = "-DINITIAL_MARK_STACK_SIZE=1048576";
});

curl = pkgs.curl.override {
http3Support = !pkgs.stdenv.hostPlatform.isWindows;
};
curl =
(pkgs.curl.override {
http3Support = !pkgs.stdenv.hostPlatform.isWindows;
# Make sure we enable all the dependencies for Content-Encoding/Transfer-Encoding decompression.
zstdSupport = true;
brotliSupport = true;
zlibSupport = true;
}).overrideAttrs
{
# TODO: Fix in nixpkgs. Static build with brotli is marked as broken, but it's not the case.
# Remove once https://github.com/NixOS/nixpkgs/pull/494111 lands in the 25.11 channel.
meta.broken = false;
};

libblake3 = pkgs.libblake3.override {
useTBB = !(stdenv.hostPlatform.isWindows || stdenv.hostPlatform.isStatic);
Expand Down
100 changes: 55 additions & 45 deletions src/libstore/filetransfer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#include "nix/store/globals.hh"
#include "nix/util/config-global.hh"
#include "nix/store/store-api.hh"
#include "nix/util/compression.hh"
#include "nix/util/finally.hh"
#include "nix/util/callback.hh"
#include "nix/util/signals.hh"
Expand Down Expand Up @@ -88,14 +87,10 @@ struct curlFileTransfer : public FileTransfer
FileTransferRequest request;
FileTransferResult result;
std::unique_ptr<Activity> _act;
bool done = false; // whether either the success or failure function has been called
Callback<FileTransferResult> callback;
CURL * req = 0;
// buffer to accompany the `req` above
char errbuf[CURL_ERROR_SIZE];
bool active = false; // whether the handle has been added to the multi object
bool paused = false; // whether the request has been paused previously
bool enqueued = false; // whether the request has been added the incoming queue
std::string statusMsg;

unsigned int attempt = 0;
Expand All @@ -106,9 +101,35 @@ struct curlFileTransfer : public FileTransfer

curlSList requestHeaders;

std::string encoding;
/**
* Whether either the success or failure function has been called.
*/
bool done:1 = false;

/**
* Whether the handle has been added to the multi object.
*/
bool active:1 = false;

/**
* Whether the request has been paused previously.
*/
bool paused:1 = false;

/**
* Whether the request has been added the incoming queue.
*/
bool enqueued:1 = false;

/**
* Whether we can use range downloads for retries.
*/
bool acceptRanges:1 = false;

bool acceptRanges = false;
/**
* Whether the response has a non-trivial (not "identity") Content-Encoding.
*/
bool hasContentEncoding:1 = false;

curl_off_t writtenToSink = 0;

Expand Down Expand Up @@ -169,15 +190,6 @@ struct curlFileTransfer : public FileTransfer
{
result.urls.push_back(request.uri.to_string());

/* Don't set Accept-Encoding for S3 requests that use AWS SigV4 signing.
curl's SigV4 implementation signs all headers including Accept-Encoding,
but some S3-compatible services (like GCS) modify this header in transit,
causing signature verification to fail.
See https://github.com/NixOS/nix/issues/15019 */
#if NIX_WITH_AWS_AUTH
if (!request.awsSigV4Provider)
#endif
appendHeaders("Accept-Encoding: zstd, br, gzip, deflate, bzip2, xz");
if (!request.expectedETag.empty())
appendHeaders("If-None-Match: " + request.expectedETag);
if (!request.mimeType.empty())
Expand Down Expand Up @@ -225,7 +237,6 @@ struct curlFileTransfer : public FileTransfer
}

LambdaSink finalSink;
std::shared_ptr<FinishSink> decompressionSink;
std::optional<StringSink> errorSink;

std::exception_ptr callbackException;
Expand All @@ -235,18 +246,15 @@ struct curlFileTransfer : public FileTransfer
size_t realSize = size * nmemb;
result.bodySize += realSize;

if (!decompressionSink) {
decompressionSink = makeDecompressionSink(encoding, finalSink);
if (!successfulStatuses.count(getHTTPStatus())) {
// In this case we want to construct a TeeSink, to keep
// the response around (which we figure won't be big
// like an actual download should be) to improve error
// messages.
errorSink = StringSink{};
}
if (!errorSink && !successfulStatuses.count(getHTTPStatus())) {
// In this case we want to construct a TeeSink, to keep
// the response around (which we figure won't be big
// like an actual download should be) to improve error
// messages.
errorSink = StringSink{};
}

(*decompressionSink)({(char *) contents, realSize});
finalSink({static_cast<const char *>(contents), realSize});
if (paused) {
/* The callback has signaled that the transfer needs to be
paused. Already consumed data won't be returned twice unlike
Expand Down Expand Up @@ -288,7 +296,7 @@ struct curlFileTransfer : public FileTransfer
result.bodySize = 0;
statusMsg = trim(match.str(1));
acceptRanges = false;
encoding = "";
hasContentEncoding = false;
appendCurrentUrl();
} else {

Expand All @@ -311,8 +319,10 @@ struct curlFileTransfer : public FileTransfer
}
}

else if (name == "content-encoding")
encoding = trim(line.substr(i + 1));
else if (name == "content-encoding") {
auto value = toLower(trim(line.substr(i + 1)));
hasContentEncoding = !value.empty() && value != "identity";
}

else if (name == "accept-ranges" && toLower(trim(line.substr(i + 1))) == "bytes")
acceptRanges = true;
Expand All @@ -330,14 +340,10 @@ struct curlFileTransfer : public FileTransfer
}
return realSize;
} catch (...) {
#if LIBCURL_VERSION_NUM >= 0x075700
/* https://curl.se/libcurl/c/CURLOPT_HEADERFUNCTION.html:
You can also abort the transfer by returning CURL_WRITEFUNC_ERROR. */
callbackException = std::current_exception();
return CURL_WRITEFUNC_ERROR;
#else
return realSize;
#endif
}

static size_t headerCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp)
Expand Down Expand Up @@ -475,6 +481,16 @@ struct curlFileTransfer : public FileTransfer
}

curl_easy_setopt(req, CURLOPT_URL, request.uri.to_string().c_str());

/* Enable transparent decompression for downloads.
Skip for uploads (Accept-Encoding is meaningless when sending data)
and when resuming from an offset (byte ranges don't work with
compressed content). */
if (writtenToSink == 0 && !request.data)
/* Empty string means to enable all supported (that libcurl has
been linked to support) encodings. */
curl_easy_setopt(req, CURLOPT_ACCEPT_ENCODING, "");

curl_easy_setopt(req, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(req, CURLOPT_MAXREDIRS, 10);
curl_easy_setopt(req, CURLOPT_NOSIGNAL, 1);
Expand Down Expand Up @@ -621,14 +637,6 @@ struct curlFileTransfer : public FileTransfer

appendCurrentUrl();

if (decompressionSink) {
try {
decompressionSink->finish();
} catch (...) {
callbackException = std::current_exception();
}
}

if (code == CURLE_WRITE_ERROR && result.etag == request.expectedETag) {
code = CURLE_OK;
httpStatus = 304;
Expand All @@ -646,7 +654,9 @@ struct curlFileTransfer : public FileTransfer
if (httpStatus == 304 && result.etag == "")
result.etag = request.expectedETag;

act().progress(result.bodySize, result.bodySize);
curl_off_t dlSize = 0;
curl_easy_getinfo(req, CURLINFO_SIZE_DOWNLOAD_T, &dlSize);
act().progress(dlSize, dlSize);
done = true;
callback(std::move(result));
}
Expand Down Expand Up @@ -695,6 +705,7 @@ struct curlFileTransfer : public FileTransfer
case CURLE_TOO_MANY_REDIRECTS:
case CURLE_WRITE_ERROR:
case CURLE_UNSUPPORTED_PROTOCOL:
case CURLE_BAD_CONTENT_ENCODING:
err = Misc;
break;
default: // Shut up warnings
Expand Down Expand Up @@ -738,15 +749,14 @@ struct curlFileTransfer : public FileTransfer
sink, we can only retry if the server supports
ranged requests. */
if (err == Transient && attempt < fileTransfer.settings.tries
&& (!this->request.dataCallback || writtenToSink == 0 || (acceptRanges && encoding.empty()))) {
&& (!this->request.dataCallback || writtenToSink == 0 || (acceptRanges && !hasContentEncoding))) {
int ms = retryTimeMs
* std::pow(
2.0f, attempt - 1 + std::uniform_real_distribution<>(0.0, 0.5)(fileTransfer.mt19937));
if (writtenToSink)
warn("%s; retrying from offset %d in %d ms", exc.what(), writtenToSink, ms);
else
warn("%s; retrying in %d ms", exc.what(), ms);
decompressionSink.reset();
errorSink.reset();
embargo = std::chrono::steady_clock::now() + std::chrono::milliseconds(ms);
try {
Expand Down
15 changes: 4 additions & 11 deletions src/libstore/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -115,17 +115,10 @@ boost = dependency(
# put in `deps_other`.
deps_other += boost

curl = dependency('libcurl', 'curl', version : '>= 7.75.0')
if curl.version().version_compare('>=8.16.0') and curl.version().version_compare(
'<8.17.0',
)
# Out of precaution, avoid building with libcurl version that suffer from https://github.com/curl/curl/issues/19334.
error(
'curl @0@ has issues with write pausing, please use libcurl < 8.16 or >= 8.17, see https://github.com/curl/curl/issues/19334'.format(
curl.version(),
),
)
endif
# This is quite new, but curl has a bunch of known issues with write pausing and decompression.
# Please use libcurl >= 8.17. See https://github.com/curl/curl/issues/19334, https://github.com/curl/curl/issues/16280, https://github.com/curl/curl/issues/16955 if in doubt.
# Patch out this check at your own risk.
curl = dependency('libcurl', 'curl', version : '>= 8.17.0')

deps_private += curl

Expand Down
4 changes: 2 additions & 2 deletions tests/nixos/content-encoding.nix
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ in

# Serve .narinfo files with gzip encoding
location ~ \.narinfo$ {
add_header Content-Encoding gzip;
add_header Content-Encoding x-gzip;
default_type "text/x-nix-narinfo";
}

Expand Down Expand Up @@ -172,7 +172,7 @@ in

# Check Content-Encoding headers on the download endpoint
narinfo_headers = machine.succeed(f"curl -I http://localhost/cache/{narinfoHash}.narinfo 2>&1")
assert "content-encoding: gzip" in narinfo_headers.lower(), f"Expected 'content-encoding: gzip' for .narinfo file, but headers were: {narinfo_headers}"
assert "content-encoding: x-gzip" in narinfo_headers.lower(), f"Expected 'content-encoding: x-gzip' for .narinfo file, but headers were: {narinfo_headers}"

ls_headers = machine.succeed(f"curl -I http://localhost/cache/{narinfoHash}.ls 2>&1")
assert "content-encoding: gzip" in ls_headers.lower(), f"Expected 'content-encoding: gzip' for .ls file, but headers were: {ls_headers}"
Expand Down
16 changes: 8 additions & 8 deletions tests/nixos/s3-binary-cache-store.nix
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ in
def verify_no_compression(machine, bucket, object_path):
"""Verify S3 object has no compression headers"""
stat = machine.succeed(f"mc stat minio/{bucket}/{object_path}")
if "Content-Encoding" in stat and ("gzip" in stat or "xz" in stat):
if "Content-Encoding" in stat and ("gzip" in stat or "br" in stat):
print(f"mc stat output for {object_path}:")
print(stat)
raise Exception(f"Object {object_path} should not have compression Content-Encoding")
Expand Down Expand Up @@ -537,20 +537,20 @@ in

@setup_s3()
def test_compression_mixed(bucket):
"""Test mixed compression (narinfo=xz, ls=gzip)"""
print("\n=== Testing Compression: mixed (narinfo=xz, ls=gzip) ===")
"""Test mixed compression (narinfo=br, ls=gzip)"""
print("\n=== Testing Compression: mixed (narinfo=br, ls=gzip) ===")

store_url = make_s3_url(
bucket,
**{'narinfo-compression': 'xz', 'write-nar-listing': 'true', 'ls-compression': 'gzip'}
**{'narinfo-compression': 'br', 'write-nar-listing': 'true', 'ls-compression': 'gzip'}
)

server.succeed(f"{ENV_WITH_CREDS} nix copy --to '{store_url}' {PKGS['C']}")

pkg_hash = get_package_hash(PKGS['C'])

# Verify .narinfo has xz compression
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using xz here was clearly a mistake, because no other http client can actually decompress such stuff...

verify_content_encoding(server, bucket, f"{pkg_hash}.narinfo", "xz")
# Verify .narinfo has br compression
verify_content_encoding(server, bucket, f"{pkg_hash}.narinfo", "br")

# Verify .ls has gzip compression
verify_content_encoding(server, bucket, f"{pkg_hash}.ls", "gzip")
Expand Down Expand Up @@ -673,7 +673,7 @@ in
).strip()

chunk_size = 5 * 1024 * 1024
expected_parts = 3 # 10 MB raw becomes ~10.5 MB compressed (NAR + xz overhead)
expected_parts = 3 # 10 MB raw becomes ~10.5 MB compressed (NAR + br overhead)

store_url = make_s3_url(
bucket,
Expand Down Expand Up @@ -755,7 +755,7 @@ in
"multipart-upload": "true",
"multipart-threshold": str(5 * 1024 * 1024),
"multipart-chunk-size": str(5 * 1024 * 1024),
"log-compression": "xz",
"log-compression": "br",
}
)

Expand Down
Loading