-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Uses std::optional<CompressionAlgo> enum in NarInfo #15238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| #include "nix/store/nar-info-disk-cache.hh" | ||
| #include "nix/util/compression-algo.hh" | ||
| #include "nix/util/users.hh" | ||
| #include "nix/util/sync.hh" | ||
| #include "nix/store/sqlite.hh" | ||
|
|
@@ -269,7 +270,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache | |
| auto narInfo = make_ref<NarInfo>( | ||
| cache.storeDir, StorePath(hashPart + "-" + namePart), Hash::parseAnyPrefixed(queryNAR.getStr(6))); | ||
| narInfo->url = queryNAR.getStr(2); | ||
| narInfo->compression = queryNAR.getStr(3); | ||
| narInfo->compression = parseCompressionAlgo(queryNAR.getStr(3)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of caution, shouldn't we continue treating empty strings as
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe drop a FIXME so that we could remove that later? |
||
| if (!queryNAR.isNull(4)) | ||
| narInfo->fileHash = Hash::parseAnyPrefixed(queryNAR.getStr(4)); | ||
| narInfo->fileSize = queryNAR.getInt(5); | ||
|
|
@@ -334,7 +335,8 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache | |
|
|
||
| state->insertNAR | ||
| .use()(cache.id)(hashPart) (std::string(info->path.name()))( | ||
| narInfo ? narInfo->url : "", narInfo != 0)(narInfo ? narInfo->compression : "", narInfo != 0)( | ||
| narInfo ? narInfo->url : "", | ||
| narInfo != 0)(narInfo ? showCompressionAlgo(narInfo->compression.value()) : "", narInfo != 0)( | ||
| narInfo && narInfo->fileHash ? narInfo->fileHash->to_string(HashFormat::Nix32, true) : "", | ||
| narInfo && narInfo->fileHash)( | ||
| narInfo ? narInfo->fileSize : 0, narInfo != 0 && narInfo->fileSize)(info->narHash.to_string( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,7 +16,8 @@ TEST(compress, noneMethodDoesNothingToTheInput) | |
|
|
||
| TEST(decompress, decompressNoneCompressed) | ||
| { | ||
| auto method = "none"; | ||
|
|
||
| auto method = CompressionAlgo::none; | ||
| auto str = "slfja;sljfklsa;jfklsjfkl;sdjfkl;sadjfkl;sdjf;lsdfjsadlf"; | ||
| auto o = decompress(method, str); | ||
|
|
||
|
|
@@ -27,7 +28,7 @@ TEST(decompress, decompressEmptyCompressed) | |
| { | ||
| // Empty-method decompression used e.g. by S3 store | ||
| // (Content-Encoding == ""). | ||
| auto method = ""; | ||
| auto method = CompressionAlgo::none; // Do we handle this in S3 store??? | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about cases where we have Content-Encoding as Empty string? Do we want to map it to CompressionAlgo::none? |
||
| auto str = "slfja;sljfklsa;jfklsjfkl;sdjfkl;sadjfkl;sdjf;lsdfjsadlf"; | ||
| auto o = decompress(method, str); | ||
|
|
||
|
|
@@ -36,7 +37,7 @@ TEST(decompress, decompressEmptyCompressed) | |
|
|
||
| TEST(decompress, decompressXzCompressed) | ||
| { | ||
| auto method = "xz"; | ||
| auto method = CompressionAlgo::xz; | ||
| auto str = "slfja;sljfklsa;jfklsjfkl;sdjfkl;sadjfkl;sdjf;lsdfjsadlf"; | ||
| auto o = decompress(method, compress(CompressionAlgo::xz, str)); | ||
|
|
||
|
|
@@ -45,7 +46,7 @@ TEST(decompress, decompressXzCompressed) | |
|
|
||
| TEST(decompress, decompressBzip2Compressed) | ||
| { | ||
| auto method = "bzip2"; | ||
| auto method = CompressionAlgo::bzip2; | ||
| auto str = "slfja;sljfklsa;jfklsjfkl;sdjfkl;sadjfkl;sdjf;lsdfjsadlf"; | ||
| auto o = decompress(method, compress(CompressionAlgo::bzip2, str)); | ||
|
|
||
|
|
@@ -54,7 +55,7 @@ TEST(decompress, decompressBzip2Compressed) | |
|
|
||
| TEST(decompress, decompressBrCompressed) | ||
| { | ||
| auto method = "br"; | ||
| auto method = CompressionAlgo::brotli; | ||
| auto str = "slfja;sljfklsa;jfklsjfkl;sdjfkl;sadjfkl;sdjf;lsdfjsadlf"; | ||
| auto o = decompress(method, compress(CompressionAlgo::brotli, str)); | ||
|
|
||
|
|
@@ -63,7 +64,7 @@ TEST(decompress, decompressBrCompressed) | |
|
|
||
| TEST(decompress, decompressInvalidInputThrowsCompressionError) | ||
| { | ||
| auto method = "bzip2"; | ||
| auto method = CompressionAlgo::bzip2; | ||
| auto str = "this is a string that does not qualify as valid bzip2 data"; | ||
|
|
||
| ASSERT_THROW(decompress(method, str), CompressionError); | ||
|
|
@@ -88,7 +89,7 @@ TEST(makeCompressionSink, compressAndDecompress) | |
| { | ||
| StringSink strSink; | ||
| auto inputString = "slfja;sljfklsa;jfklsjfkl;sdjfkl;sadjfkl;sdjf;lsdfjsadlf"; | ||
| auto decompressionSink = makeDecompressionSink("bzip2", strSink); | ||
| auto decompressionSink = makeDecompressionSink(CompressionAlgo::bzip2, strSink); | ||
| auto sink = makeCompressionSink(CompressionAlgo::bzip2, *decompressionSink); | ||
|
|
||
| (*sink)(inputString); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ | |
|
|
||
| namespace nix { | ||
|
|
||
| // Do we want to add Identity to the list??? | ||
| #define NIX_FOR_EACH_COMPRESSION_ALGO(MACRO) \ | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to add Content-Encoding might contain identity as part of http header.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thing is that Content-Encoding should be parsed by a very separate function. It's specified by HTTP spec (or rather refers to another RFC), is case-insensitive and has a much more limited range of supported values + some legacy ones like x-gzip. I was going to rewrite this code to better handle Content-Encodinf in a more compliant way. Better to not use the parsing logic here for now. I suppose I can put up a PR to address that soon and we can rebase your one on top of that? Or could you hold off on replacing strings with enums in the filetransfer code for now?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would this be a major change or is it something I can look into and implement? If you guide me on any resources, I can take a shot at it.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be pretty small - it would just take some of your changes from this PR for the decompression sink. |
||
| MACRO("none", none) \ | ||
| MACRO("br", brotli) \ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,8 @@ | ||
| #include <archive.h> | ||
| #include <archive_entry.h> | ||
| #include <optional> | ||
|
|
||
| #include "nix/util/compression-algo.hh" | ||
| #include "nix/util/finally.hh" | ||
| #include "nix/util/serialise.hh" | ||
| #include "nix/util/tarfile.hh" | ||
|
|
@@ -57,11 +59,12 @@ void TarArchive::check(int err, const std::string & reason) | |
| /// Instead it's necessary to use this kludge to convert method -> code and | ||
| /// then use archive_read_support_filter_by_code. Arguably this is better than | ||
| /// hand-rolling the equivalent function that is better implemented in libarchive. | ||
| int getArchiveFilterCodeByName(const std::string & method) | ||
| int getArchiveFilterCodeByName(const std::optional<CompressionAlgo> & method) | ||
| { | ||
| auto * ar = archive_write_new(); | ||
| auto cleanup = Finally{[&ar]() { checkLibArchive(ar, archive_write_close(ar), "failed to close archive: %s"); }}; | ||
| auto err = archive_write_add_filter_by_name(ar, method.c_str()); | ||
| auto err = archive_write_add_filter_by_name( | ||
| ar, showCompressionAlgo(method.value()).c_str()); // method.value_or(CompressionAlgo::none) | ||
| checkLibArchive(ar, err, "failed to get libarchive filter by name: %s"); | ||
| auto code = archive_filter_code(ar, 0); | ||
| return code; | ||
|
Comment on lines
64
to
70
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, not this whole function should be possible to delete. It was more of a hack anyway.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By this I mean that we can replace this with direct calls to |
||
|
|
@@ -78,7 +81,7 @@ static void enableSupportedFormats(struct archive * archive) | |
| archive_read_support_format_empty(archive); | ||
| } | ||
|
|
||
| TarArchive::TarArchive(Source & source, bool raw, std::optional<std::string> compression_method) | ||
| TarArchive::TarArchive(Source & source, bool raw, std::optional<CompressionAlgo> compression_method) | ||
| : archive{archive_read_new()} | ||
| , source{&source} | ||
| , buffer(defaultBufferSize) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@xokdvium, I'm planning to create a function here that parses the content-encoding and returns the CompressionAlgo enum.
Is there an exhaustive list of all the content-encoding? I only see 3 tokens being used in the RFC spec
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's all quite messy, and we are certainly not spec compliant here – we'd want to tighten up our handling here. Good news is: there are not so many non-standard encodings that we'd want to continue supporting.
Basically (all case insensitive parsing):
gzip,x-gzip,compress,x-compress(those are in the spec),deflate(doesn't seem like we can support this via libarchive),br,zstd. As for non-standard ones, we can probably stick with supportingbzip2, since it seemingly did appear in practice.xzand everything should probably error out (but we do for whatever reason send it in Accept-Encoding - that was clearly a mistake). If we want to be extra cautious, we can probably keep acceptingxzand issue a warning.Everything else also mostly doesnt work, because we currently error out if libarchive tries to decompress/compress something that hasnt been linked with appropriate dependencies (it tries to fall back to an external program, issues a warning and we treat it as an error).
Stacked encodings when multiple filters are applied (like
Content-Encoding: gzip, deflate) should also error out, but can think about supporting those in the future. That will require having multiple chained decompression sinks.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also
identity.noneand empty strings should be rejected - those are the compression algo encodings used by the NAR compression algorithms.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense. I’ll open another PR to parse Content-Encoding (case-insensitive) and support:
gzip, x-gzip, compress, x-compress, br, zstd, bzip2, and identity.
I’ll error out on unknown encodings and reject stacked encodings for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That you so much! This has been on my radar for a while! Thanks for tackling it ❤️