diff --git a/doc/manual/package.nix b/doc/manual/package.nix index eb20f8714bb..140fa98494a 100644 --- a/doc/manual/package.nix +++ b/doc/manual/package.nix @@ -35,6 +35,7 @@ mkMesonDerivation (finalAttrs: { ../../.version # For example JSON ../../src/libutil-tests/data/hash + ../../src/libstore-tests/data/content-address ../../src/libstore-tests/data/derived-path # Too many different types of files to filter for now ../../doc/manual diff --git a/doc/manual/source/SUMMARY.md.in b/doc/manual/source/SUMMARY.md.in index b4796f652b2..abd9422cd40 100644 --- a/doc/manual/source/SUMMARY.md.in +++ b/doc/manual/source/SUMMARY.md.in @@ -118,6 +118,7 @@ - [Formats and Protocols](protocols/index.md) - [JSON Formats](protocols/json/index.md) - [Hash](protocols/json/hash.md) + - [Content Address](protocols/json/content-address.md) - [Store Object Info](protocols/json/store-object-info.md) - [Derivation](protocols/json/derivation.md) - [Deriving Path](protocols/json/deriving-path.md) diff --git a/doc/manual/source/protocols/json/content-address.md b/doc/manual/source/protocols/json/content-address.md new file mode 100644 index 00000000000..2284e30aa6d --- /dev/null +++ b/doc/manual/source/protocols/json/content-address.md @@ -0,0 +1,21 @@ +{{#include content-address-v1-fixed.md}} + +## Examples + +### [Text](@docroot@/store/store-object/content-address.html#method-text) method + +```json +{{#include schema/content-address-v1/text.json}} +``` + +### [Nix Archive](@docroot@/store/store-object/content-address.html#method-nix-archive) method + +```json +{{#include schema/content-address-v1/nar.json}} +``` + + diff --git a/doc/manual/source/protocols/json/fixup-json-schema-generated-doc.sed b/doc/manual/source/protocols/json/fixup-json-schema-generated-doc.sed index 126e666e9c0..27895d42a03 100644 --- a/doc/manual/source/protocols/json/fixup-json-schema-generated-doc.sed +++ b/doc/manual/source/protocols/json/fixup-json-schema-generated-doc.sed @@ -12,3 +12,6 @@ s/\\`/`/g # As we have more such relative links, more replacements of this nature # should appear below. s^\(./hash-v1.yaml\)\?#/$defs/algorithm^[JSON format for `Hash`](./hash.html#algorithm)^g +s^\(./hash-v1.yaml\)^[JSON format for `Hash`](./hash.html)^g +s^\(./content-address-v1.yaml\)\?#/$defs/method^[JSON format for `ContentAddress`](./content-address.html#method)^g +s^\(./content-address-v1.yaml\)^[JSON format for `ContentAddress`](./content-address.html)^g diff --git a/doc/manual/source/protocols/json/meson.build b/doc/manual/source/protocols/json/meson.build index 191ec6dbede..f7966796116 100644 --- a/doc/manual/source/protocols/json/meson.build +++ b/doc/manual/source/protocols/json/meson.build @@ -10,6 +10,7 @@ json_schema_config = files('json-schema-for-humans-config.yaml') schemas = [ 'hash-v1', + 'content-address-v1', 'derivation-v3', 'deriving-path-v1', ] diff --git a/doc/manual/source/protocols/json/schema/content-address-v1 b/doc/manual/source/protocols/json/schema/content-address-v1 new file mode 120000 index 00000000000..35a0dd865d4 --- /dev/null +++ b/doc/manual/source/protocols/json/schema/content-address-v1 @@ -0,0 +1 @@ +../../../../../../src/libstore-tests/data/content-address \ No newline at end of file diff --git a/doc/manual/source/protocols/json/schema/content-address-v1.yaml b/doc/manual/source/protocols/json/schema/content-address-v1.yaml new file mode 100644 index 00000000000..d0f75920184 --- /dev/null +++ b/doc/manual/source/protocols/json/schema/content-address-v1.yaml @@ -0,0 +1,55 @@ +"$schema": "http://json-schema.org/draft-04/schema" +"$id": "https://nix.dev/manual/nix/latest/protocols/json/schema/content-address-v1.json" +title: Content Address +description: | + This schema describes the JSON representation of Nix's `ContentAddress` type, which conveys information about [content-addressing store objects](@docroot@/store/store-object/content-address.md). + + > **Note** + > + > For current methods of content addressing, this data type is a bit suspicious, because it is neither simply a content address of a file system object (the `method` is richer), nor simply a content address of a store object (the `hash` doesn't account for the references). + > It should thus only be used in contexts where the references are also known / otherwise made tamper-resistant. + + + +type: object +properties: + method: + "$ref": "#/$defs/method" + hash: + title: Content Address + description: | + This would be the content-address itself. + + For all current methods, this is just a content address of the file system object of the store object, [as described in the store chapter](@docroot@/store/file-system-object/content-address.md), and not of the store object as a whole. + In particular, the references of the store object are *not* taken into account with this hash (and currently-supported methods). + "$ref": "./hash-v1.yaml" +required: +- method +- hash +additionalProperties: false +"$defs": + method: + type: string + enum: [flat, nar, text, git] + title: Content-Addressing Method + description: | + A string representing the [method](@docroot@/store/store-object/content-address.md) of content addressing that is chosen. + + Valid method strings are: + + - [`flat`](@docroot@/store/store-object/content-address.md#method-flat) (provided the contents are a single file) + - [`nar`](@docroot@/store/store-object/content-address.md#method-nix-archive) + - [`text`](@docroot@/store/store-object/content-address.md#method-text) + - [`git`](@docroot@/store/store-object/content-address.md#method-git) diff --git a/doc/manual/source/protocols/json/schema/derivation-v3.yaml b/doc/manual/source/protocols/json/schema/derivation-v3.yaml index 7c92d475dde..c950b839fe9 100644 --- a/doc/manual/source/protocols/json/schema/derivation-v3.yaml +++ b/doc/manual/source/protocols/json/schema/derivation-v3.yaml @@ -1,5 +1,5 @@ -"$schema": http://json-schema.org/draft-04/schema# -"$id": https://nix.dev/manual/nix/latest/protocols/json/schema/derivation-v3.json +"$schema": "http://json-schema.org/draft-04/schema" +"$id": "https://nix.dev/manual/nix/latest/protocols/json/schema/derivation-v3.json" title: Derivation description: | Experimental JSON representation of a Nix derivation (version 3). @@ -154,19 +154,10 @@ properties: The output path, if known in advance. method: - type: string - title: Content addressing method - enum: [flat, nar, text, git] + "$ref": "./content-address-v1.yaml#/$defs/method" description: | For an output which will be [content addressed](@docroot@/store/derivation/outputs/content-address.md), a string representing the [method](@docroot@/store/store-object/content-address.md) of content addressing that is chosen. - - Valid method strings are: - - - [`flat`](@docroot@/store/store-object/content-address.md#method-flat) - - [`nar`](@docroot@/store/store-object/content-address.md#method-nix-archive) - - [`text`](@docroot@/store/store-object/content-address.md#method-text) - - [`git`](@docroot@/store/store-object/content-address.md#method-git) - + See the linked original definition for further details. hashAlgo: title: Hash algorithm "$ref": "./hash-v1.yaml#/$defs/algorithm" diff --git a/doc/manual/source/protocols/json/schema/deriving-path-v1.yaml b/doc/manual/source/protocols/json/schema/deriving-path-v1.yaml index 9c0350d3d34..7fd74941e19 100644 --- a/doc/manual/source/protocols/json/schema/deriving-path-v1.yaml +++ b/doc/manual/source/protocols/json/schema/deriving-path-v1.yaml @@ -1,5 +1,5 @@ -"$schema": http://json-schema.org/draft-04/schema# -"$id": https://nix.dev/manual/nix/latest/protocols/json/schema/deriving-path-v1.json +"$schema": "http://json-schema.org/draft-04/schema" +"$id": "https://nix.dev/manual/nix/latest/protocols/json/schema/deriving-path-v1.json" title: Deriving Path description: | This schema describes the JSON representation of Nix's [Deriving Path](@docroot@/store/derivation/index.md#deriving-path). diff --git a/doc/manual/source/protocols/json/schema/hash-v1.yaml b/doc/manual/source/protocols/json/schema/hash-v1.yaml index 844959bcd15..316fb6d73a1 100644 --- a/doc/manual/source/protocols/json/schema/hash-v1.yaml +++ b/doc/manual/source/protocols/json/schema/hash-v1.yaml @@ -1,5 +1,5 @@ -"$schema": http://json-schema.org/draft-04/schema# -"$id": https://nix.dev/manual/nix/latest/protocols/json/schema/hash-v1.json +"$schema": "http://json-schema.org/draft-04/schema" +"$id": "https://nix.dev/manual/nix/latest/protocols/json/schema/hash-v1.json" title: Hash description: | A cryptographic hash value used throughout Nix for content addressing and integrity verification. diff --git a/src/json-schema-checks/content-address b/src/json-schema-checks/content-address new file mode 120000 index 00000000000..194a265a1f7 --- /dev/null +++ b/src/json-schema-checks/content-address @@ -0,0 +1 @@ +../../src/libstore-tests/data/content-address \ No newline at end of file diff --git a/src/json-schema-checks/meson.build b/src/json-schema-checks/meson.build index 09da8770bf8..745fb5ffa19 100644 --- a/src/json-schema-checks/meson.build +++ b/src/json-schema-checks/meson.build @@ -30,6 +30,14 @@ schemas = [ 'blake3-base64.json', ], }, + { + 'stem' : 'content-address', + 'schema' : schema_dir / 'content-address-v1.yaml', + 'files' : [ + 'text.json', + 'nar.json', + ], + }, { 'stem' : 'derivation', 'schema' : schema_dir / 'derivation-v3.yaml', @@ -73,8 +81,6 @@ foreach schema : schemas stem + '-schema-valid', jv, args : [ - '--map', - './hash-v1.yaml=' + schema_dir / 'hash-v1.yaml', 'http://json-schema.org/draft-04/schema', schema_file, ], diff --git a/src/json-schema-checks/package.nix b/src/json-schema-checks/package.nix index cf4e4cb19f5..6a76c8b2897 100644 --- a/src/json-schema-checks/package.nix +++ b/src/json-schema-checks/package.nix @@ -21,6 +21,7 @@ mkMesonDerivation (finalAttrs: { ../../.version ../../doc/manual/source/protocols/json/schema ../../src/libutil-tests/data/hash + ../../src/libstore-tests/data/content-address ../../src/libstore-tests/data/derivation ../../src/libstore-tests/data/derived-path ./. diff --git a/src/libexpr/include/nix/expr/nixexpr.hh b/src/libexpr/include/nix/expr/nixexpr.hh index 86ad0150419..26d5addd565 100644 --- a/src/libexpr/include/nix/expr/nixexpr.hh +++ b/src/libexpr/include/nix/expr/nixexpr.hh @@ -442,8 +442,14 @@ struct ExprAttrs : Expr struct ExprList : Expr { - std::vector elems; - ExprList() {}; + std::span elems; + + ExprList(std::pmr::polymorphic_allocator & alloc, std::vector exprs) + : elems({alloc.allocate_object(exprs.size()), exprs.size()}) + { + std::ranges::copy(exprs, elems.begin()); + }; + COMMON_METHODS Value * maybeThunk(EvalState & state, Env & env) override; diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index cf563e86923..51c82efe55a 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -129,7 +129,7 @@ static Expr * makeCall(PosIdx pos, Expr * fn, Expr * arg) { %type start expr expr_function expr_if expr_op %type expr_select expr_simple expr_app %type expr_pipe_from expr_pipe_into -%type expr_list +%type > list %type binds binds1 %type formals formal_set %type formal @@ -334,7 +334,7 @@ expr_simple { $2->pos = CUR_POS; $$ = $2; } | '{' '}' { $$ = new ExprAttrs(CUR_POS); } - | '[' expr_list ']' { $$ = $2; } + | '[' list ']' { $$ = new ExprList(state->alloc, std::move($2)); } ; string_parts @@ -484,9 +484,9 @@ string_attr | DOLLAR_CURLY expr '}' { $$ = $2; } ; -expr_list - : expr_list expr_select { $$ = $1; $1->elems.push_back($2); /* !!! dangerous */; $2->warnIfCursedOr(state->symbols, state->positions); } - | { $$ = new ExprList; } +list + : list expr_select { $$ = std::move($1); $$.push_back($2); /* !!! dangerous */; $2->warnIfCursedOr(state->symbols, state->positions); } + | { } ; formal_set diff --git a/src/libstore-tests/content-address.cc b/src/libstore-tests/content-address.cc index 51d591c3853..0474fb2e0c7 100644 --- a/src/libstore-tests/content-address.cc +++ b/src/libstore-tests/content-address.cc @@ -1,6 +1,7 @@ #include #include "nix/store/content-address.hh" +#include "nix/util/tests/json-characterization.hh" namespace nix { @@ -8,33 +9,93 @@ namespace nix { * ContentAddressMethod::parse, ContentAddressMethod::render * --------------------------------------------------------------------------*/ -TEST(ContentAddressMethod, testRoundTripPrintParse_1) +static auto methods = ::testing::Values( + std::pair{ContentAddressMethod::Raw::Text, "text"}, + std::pair{ContentAddressMethod::Raw::Flat, "flat"}, + std::pair{ContentAddressMethod::Raw::NixArchive, "nar"}, + std::pair{ContentAddressMethod::Raw::Git, "git"}); + +struct ContentAddressMethodTest : ::testing::Test, + ::testing::WithParamInterface> +{}; + +TEST_P(ContentAddressMethodTest, testRoundTripPrintParse_1) { - for (ContentAddressMethod cam : { - ContentAddressMethod::Raw::Text, - ContentAddressMethod::Raw::Flat, - ContentAddressMethod::Raw::NixArchive, - ContentAddressMethod::Raw::Git, - }) { - EXPECT_EQ(ContentAddressMethod::parse(cam.render()), cam); - } + auto & [cam, _] = GetParam(); + EXPECT_EQ(ContentAddressMethod::parse(cam.render()), cam); } -TEST(ContentAddressMethod, testRoundTripPrintParse_2) +TEST_P(ContentAddressMethodTest, testRoundTripPrintParse_2) { - for (const std::string_view camS : { - "text", - "flat", - "nar", - "git", - }) { - EXPECT_EQ(ContentAddressMethod::parse(camS).render(), camS); - } + auto & [cam, camS] = GetParam(); + EXPECT_EQ(ContentAddressMethod::parse(camS).render(), camS); } +INSTANTIATE_TEST_SUITE_P(ContentAddressMethod, ContentAddressMethodTest, methods); + TEST(ContentAddressMethod, testParseContentAddressMethodOptException) { EXPECT_THROW(ContentAddressMethod::parse("narwhal"), UsageError); } +/* ---------------------------------------------------------------------------- + * JSON + * --------------------------------------------------------------------------*/ + +class ContentAddressTest : public virtual CharacterizationTest +{ + std::filesystem::path unitTestData = getUnitTestData() / "content-address"; + +public: + + /** + * We set these in tests rather than the regular globals so we don't have + * to worry about race conditions if the tests run concurrently. + */ + ExperimentalFeatureSettings mockXpSettings; + + std::filesystem::path goldenMaster(std::string_view testStem) const override + { + return unitTestData / testStem; + } +}; + +using nlohmann::json; + +struct ContentAddressJsonTest : ContentAddressTest, + JsonCharacterizationTest, + ::testing::WithParamInterface> +{}; + +TEST_P(ContentAddressJsonTest, from_json) +{ + auto & [name, expected] = GetParam(); + readJsonTest(name, expected); +} + +TEST_P(ContentAddressJsonTest, to_json) +{ + auto & [name, value] = GetParam(); + writeJsonTest(name, value); +} + +INSTANTIATE_TEST_SUITE_P( + ContentAddressJSON, + ContentAddressJsonTest, + ::testing::Values( + std::pair{ + "text", + ContentAddress{ + .method = ContentAddressMethod::Raw::Text, + .hash = hashString(HashAlgorithm::SHA256, "asdf"), + }, + }, + std::pair{ + "nar", + ContentAddress{ + .method = ContentAddressMethod::Raw::NixArchive, + .hash = hashString(HashAlgorithm::SHA256, "qwer"), + }, + })); + } // namespace nix diff --git a/src/libstore-tests/data/content-address/nar.json b/src/libstore-tests/data/content-address/nar.json new file mode 100644 index 00000000000..21e065cd31e --- /dev/null +++ b/src/libstore-tests/data/content-address/nar.json @@ -0,0 +1,8 @@ +{ + "hash": { + "algorithm": "sha256", + "format": "base64", + "hash": "9vLqj0XYoFfJVmoz+ZR02i5camYE1zYSFlDicwxvsKM=" + }, + "method": "nar" +} diff --git a/src/libstore-tests/data/content-address/text.json b/src/libstore-tests/data/content-address/text.json new file mode 100644 index 00000000000..04bc8ac205c --- /dev/null +++ b/src/libstore-tests/data/content-address/text.json @@ -0,0 +1,8 @@ +{ + "hash": { + "algorithm": "sha256", + "format": "base64", + "hash": "8OTC92xYkW7CWPJGhRvqCR0U1CR6L8PhhpRGGxgW4Ts=" + }, + "method": "text" +} diff --git a/src/libstore-tests/references.cc b/src/libstore-tests/references.cc index 27ecad08fbf..9cecd573e55 100644 --- a/src/libstore-tests/references.cc +++ b/src/libstore-tests/references.cc @@ -1,4 +1,6 @@ #include "nix/store/references.hh" +#include "nix/store/path-references.hh" +#include "nix/util/memory-source-accessor.hh" #include @@ -79,4 +81,145 @@ TEST(references, scan) } } +TEST(references, scanForReferencesDeep) +{ + using File = MemorySourceAccessor::File; + + // Create store paths to search for + StorePath path1{"dc04vv14dak1c1r48qa0m23vr9jy8sm0-foo"}; + StorePath path2{"zc842j0rz61mjsp3h3wp5ly71ak6qgdn-bar"}; + StorePath path3{"a5cn2i4b83gnsm60d38l3kgb8qfplm11-baz"}; + + StorePathSet refs{path1, path2, path3}; + + std::string_view hash1 = path1.hashPart(); + std::string_view hash2 = path2.hashPart(); + std::string_view hash3 = path3.hashPart(); + + // Create an in-memory file system with various reference patterns + auto accessor = make_ref(); + accessor->root = File::Directory{ + .contents{ + { + // file1.txt: contains hash1 + "file1.txt", + File::Regular{ + .contents = "This file references " + hash1 + " in its content", + }, + }, + { + // file2.txt: contains hash2 and hash3 + "file2.txt", + File::Regular{ + .contents = "Multiple refs: " + hash2 + " and also " + hash3, + }, + }, + { + // file3.txt: contains no references + "file3.txt", + File::Regular{ + .contents = "This file has no store path references at all", + }, + }, + { + // subdir: a subdirectory + "subdir", + File::Directory{ + .contents{ + { + // subdir/file4.txt: contains hash1 again + "file4.txt", + File::Regular{ + .contents = "Subdirectory file with " + hash1, + }, + }, + }, + }, + }, + { + // link1: a symlink that contains a reference in its target + "link1", + File::Symlink{ + .target = hash2 + "-target", + }, + }, + }, + }; + + // Test the callback-based API + { + std::map foundRefs; + + scanForReferencesDeep(*accessor, CanonPath::root, refs, [&](FileRefScanResult result) { + foundRefs[std::move(result.filePath)] = std::move(result.foundRefs); + }); + + // Verify we found the expected references + EXPECT_EQ(foundRefs.size(), 4); // file1, file2, file4, link1 + + // Check file1.txt found path1 + { + CanonPath f1Path("/file1.txt"); + auto it = foundRefs.find(f1Path); + ASSERT_TRUE(it != foundRefs.end()); + EXPECT_EQ(it->second.size(), 1); + EXPECT_TRUE(it->second.count(path1)); + } + + // Check file2.txt found path2 and path3 + { + CanonPath f2Path("/file2.txt"); + auto it = foundRefs.find(f2Path); + ASSERT_TRUE(it != foundRefs.end()); + EXPECT_EQ(it->second.size(), 2); + EXPECT_TRUE(it->second.count(path2)); + EXPECT_TRUE(it->second.count(path3)); + } + + // Check file3.txt is not in results (no refs) + { + CanonPath f3Path("/file3.txt"); + EXPECT_FALSE(foundRefs.count(f3Path)); + } + + // Check subdir/file4.txt found path1 + { + CanonPath f4Path("/subdir/file4.txt"); + auto it = foundRefs.find(f4Path); + ASSERT_TRUE(it != foundRefs.end()); + EXPECT_EQ(it->second.size(), 1); + EXPECT_TRUE(it->second.count(path1)); + } + + // Check symlink found path2 + { + CanonPath linkPath("/link1"); + auto it = foundRefs.find(linkPath); + ASSERT_TRUE(it != foundRefs.end()); + EXPECT_EQ(it->second.size(), 1); + EXPECT_TRUE(it->second.count(path2)); + } + } + + // Test the map-based convenience API + { + auto results = scanForReferencesDeep(*accessor, CanonPath::root, refs); + + EXPECT_EQ(results.size(), 4); // file1, file2, file4, link1 + + // Verify all expected files are in the results + EXPECT_TRUE(results.count(CanonPath("/file1.txt"))); + EXPECT_TRUE(results.count(CanonPath("/file2.txt"))); + EXPECT_TRUE(results.count(CanonPath("/subdir/file4.txt"))); + EXPECT_TRUE(results.count(CanonPath("/link1"))); + EXPECT_FALSE(results.count(CanonPath("/file3.txt"))); + + // Verify the references found in each file are correct + EXPECT_EQ(results.at(CanonPath("/file1.txt")), StorePathSet{path1}); + EXPECT_EQ(results.at(CanonPath("/file2.txt")), StorePathSet({path2, path3})); + EXPECT_EQ(results.at(CanonPath("/subdir/file4.txt")), StorePathSet{path1}); + EXPECT_EQ(results.at(CanonPath("/link1")), StorePathSet{path2}); + } +} + } // namespace nix diff --git a/src/libstore/content-address.cc b/src/libstore/content-address.cc index 9a57e3aa618..497c2c5b47c 100644 --- a/src/libstore/content-address.cc +++ b/src/libstore/content-address.cc @@ -1,6 +1,7 @@ #include "nix/util/args.hh" #include "nix/store/content-address.hh" #include "nix/util/split.hh" +#include "nix/util/json-utils.hh" namespace nix { @@ -300,3 +301,36 @@ Hash ContentAddressWithReferences::getHash() const } } // namespace nix + +namespace nlohmann { + +using namespace nix; + +ContentAddressMethod adl_serializer::from_json(const json & json) +{ + return ContentAddressMethod::parse(getString(json)); +} + +void adl_serializer::to_json(json & json, const ContentAddressMethod & m) +{ + json = m.render(); +} + +ContentAddress adl_serializer::from_json(const json & json) +{ + auto obj = getObject(json); + return { + .method = adl_serializer::from_json(valueAt(obj, "method")), + .hash = valueAt(obj, "hash"), + }; +} + +void adl_serializer::to_json(json & json, const ContentAddress & ca) +{ + json = { + {"method", ca.method}, + {"hash", ca.hash}, + }; +} + +} // namespace nlohmann diff --git a/src/libstore/include/nix/store/content-address.hh b/src/libstore/include/nix/store/content-address.hh index 0a3dc79bd9c..41ccc69aeb3 100644 --- a/src/libstore/include/nix/store/content-address.hh +++ b/src/libstore/include/nix/store/content-address.hh @@ -6,6 +6,7 @@ #include "nix/store/path.hh" #include "nix/util/file-content-address.hh" #include "nix/util/variant-wrapper.hh" +#include "nix/util/json-impls.hh" namespace nix { @@ -308,4 +309,15 @@ struct ContentAddressWithReferences Hash getHash() const; }; +template<> +struct json_avoids_null : std::true_type +{}; + +template<> +struct json_avoids_null : std::true_type +{}; + } // namespace nix + +JSON_IMPL(nix::ContentAddressMethod) +JSON_IMPL(nix::ContentAddress) diff --git a/src/libstore/include/nix/store/path-references.hh b/src/libstore/include/nix/store/path-references.hh index 66d0da2683f..6aa506da4a3 100644 --- a/src/libstore/include/nix/store/path-references.hh +++ b/src/libstore/include/nix/store/path-references.hh @@ -3,6 +3,10 @@ #include "nix/store/references.hh" #include "nix/store/path.hh" +#include "nix/util/source-accessor.hh" + +#include +#include namespace nix { @@ -21,4 +25,57 @@ public: StorePathSet getResultPaths(); }; +/** + * Result of scanning a single file for references. + */ +struct FileRefScanResult +{ + CanonPath filePath; ///< The file that was scanned + StorePathSet foundRefs; ///< Which store paths were found in this file +}; + +/** + * Scan a store path tree and report which references appear in which files. + * + * This is like scanForReferences() but provides per-file granularity. + * Useful for cycle detection and detailed dependency analysis like `nix why-depends --precise`. + * + * The function walks the tree using the provided accessor and streams each file's + * contents through a RefScanSink to detect hash references. For each file that + * contains at least one reference, a callback is invoked with the file path and + * the set of references found. + * + * Note: This function only searches for the hash part of store paths (e.g., + * "dc04vv14dak1c1r48qa0m23vr9jy8sm0"), not the name part. A store path like + * "/nix/store/dc04vv14dak1c1r48qa0m23vr9jy8sm0-foo" will be detected if the + * hash appears anywhere in the scanned content, regardless of the "-foo" suffix. + * + * @param accessor Source accessor to read the tree + * @param rootPath Root path to scan + * @param refs Set of store paths to search for + * @param callback Called for each file that contains at least one reference + */ +void scanForReferencesDeep( + SourceAccessor & accessor, + const CanonPath & rootPath, + const StorePathSet & refs, + std::function callback); + +/** + * Scan a store path tree and return which references appear in which files. + * + * This is a convenience wrapper around the callback-based scanForReferencesDeep() + * that collects all results into a map for efficient lookups. + * + * Note: This function only searches for the hash part of store paths, not the name part. + * See the callback-based overload for details. + * + * @param accessor Source accessor to read the tree + * @param rootPath Root path to scan + * @param refs Set of store paths to search for + * @return Map from file paths to the set of references found in each file + */ +std::map +scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs); + } // namespace nix diff --git a/src/libstore/path-references.cc b/src/libstore/path-references.cc index 8b167e9026c..3d783bbe4be 100644 --- a/src/libstore/path-references.cc +++ b/src/libstore/path-references.cc @@ -1,11 +1,15 @@ #include "nix/store/path-references.hh" #include "nix/util/hash.hh" #include "nix/util/archive.hh" +#include "nix/util/source-accessor.hh" +#include "nix/util/canon-path.hh" +#include "nix/util/logging.hh" #include #include #include #include +#include namespace nix { @@ -54,4 +58,90 @@ StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathS return refsSink.getResultPaths(); } +void scanForReferencesDeep( + SourceAccessor & accessor, + const CanonPath & rootPath, + const StorePathSet & refs, + std::function callback) +{ + // Recursive tree walker + auto walk = [&](this auto & self, const CanonPath & path) -> void { + auto stat = accessor.lstat(path); + + switch (stat.type) { + case SourceAccessor::tRegular: { + // Create a fresh sink for each file to independently detect references. + // RefScanSink accumulates found hashes globally - once a hash is found, + // it remains in the result set. If we reused the same sink across files, + // we couldn't distinguish which files contain which references, as a hash + // found in an earlier file wouldn't be reported when found in later files. + PathRefScanSink sink = PathRefScanSink::fromPaths(refs); + + // Scan this file by streaming its contents through the sink + accessor.readFile(path, sink); + + // Get the references found in this file + auto foundRefs = sink.getResultPaths(); + + // Report if we found anything in this file + if (!foundRefs.empty()) { + debug("scanForReferencesDeep: found %d references in %s", foundRefs.size(), path.abs()); + callback(FileRefScanResult{.filePath = path, .foundRefs = std::move(foundRefs)}); + } + break; + } + + case SourceAccessor::tDirectory: { + // Recursively scan directory contents + auto entries = accessor.readDirectory(path); + for (const auto & [name, entryType] : entries) { + self(path / name); + } + break; + } + + case SourceAccessor::tSymlink: { + // Create a fresh sink for the symlink target (same reason as regular files) + PathRefScanSink sink = PathRefScanSink::fromPaths(refs); + + // Scan symlink target for references + auto target = accessor.readLink(path); + sink(std::string_view(target)); + + // Get the references found in this symlink target + auto foundRefs = sink.getResultPaths(); + + if (!foundRefs.empty()) { + debug("scanForReferencesDeep: found %d references in symlink %s", foundRefs.size(), path.abs()); + callback(FileRefScanResult{.filePath = path, .foundRefs = std::move(foundRefs)}); + } + break; + } + + case SourceAccessor::tChar: + case SourceAccessor::tBlock: + case SourceAccessor::tSocket: + case SourceAccessor::tFifo: + case SourceAccessor::tUnknown: + default: + throw Error("file '%s' has an unsupported type", path.abs()); + } + }; + + // Start the recursive walk from the root + walk(rootPath); +} + +std::map +scanForReferencesDeep(SourceAccessor & accessor, const CanonPath & rootPath, const StorePathSet & refs) +{ + std::map results; + + scanForReferencesDeep(accessor, rootPath, refs, [&](FileRefScanResult result) { + results[std::move(result.filePath)] = std::move(result.foundRefs); + }); + + return results; +} + } // namespace nix diff --git a/src/libstore/s3-binary-cache-store.cc b/src/libstore/s3-binary-cache-store.cc index 5d97fb0fdbd..58cb727768f 100644 --- a/src/libstore/s3-binary-cache-store.cc +++ b/src/libstore/s3-binary-cache-store.cc @@ -4,6 +4,7 @@ #include #include +#include namespace nix { @@ -26,6 +27,23 @@ class S3BinaryCacheStore : public virtual HttpBinaryCacheStore private: ref s3Config; + + /** + * Creates a multipart upload for large objects to S3. + * + * @see + * https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html#API_CreateMultipartUpload_RequestSyntax + */ + std::string createMultipartUpload( + std::string_view key, std::string_view mimeType, std::optional contentEncoding); + + /** + * Abort a multipart upload + * + * @see + * https://docs.aws.amazon.com/AmazonS3/latest/API/API_AbortMultipartUpload.html#API_AbortMultipartUpload_RequestSyntax + */ + void abortMultipartUpload(std::string_view key, std::string_view uploadId); }; void S3BinaryCacheStore::upsertFile( @@ -37,6 +55,52 @@ void S3BinaryCacheStore::upsertFile( HttpBinaryCacheStore::upsertFile(path, istream, mimeType, sizeHint); } +std::string S3BinaryCacheStore::createMultipartUpload( + std::string_view key, std::string_view mimeType, std::optional contentEncoding) +{ + auto req = makeRequest(key); + + // setupForS3() converts s3:// to https:// but strips query parameters + // So we call it first, then add our multipart parameters + req.setupForS3(); + + auto url = req.uri.parsed(); + url.query["uploads"] = ""; + req.uri = VerbatimURL(url); + + req.method = HttpMethod::POST; + req.data = ""; + req.mimeType = mimeType; + + if (contentEncoding) { + req.headers.emplace_back("Content-Encoding", *contentEncoding); + } + + auto result = getFileTransfer()->enqueueFileTransfer(req).get(); + + std::regex uploadIdRegex("([^<]+)"); + std::smatch match; + + if (std::regex_search(result.data, match, uploadIdRegex)) { + return match[1]; + } + + throw Error("S3 CreateMultipartUpload response missing "); +} + +void S3BinaryCacheStore::abortMultipartUpload(std::string_view key, std::string_view uploadId) +{ + auto req = makeRequest(key); + req.setupForS3(); + + auto url = req.uri.parsed(); + url.query["uploadId"] = uploadId; + req.uri = VerbatimURL(url); + req.method = HttpMethod::DELETE; + + getFileTransfer()->enqueueFileTransfer(req).get(); +} + StringSet S3BinaryCacheStoreConfig::uriSchemes() { return {"s3"}; diff --git a/src/nix/why-depends.cc b/src/nix/why-depends.cc index dc30fabd7ae..29da9e953e8 100644 --- a/src/nix/why-depends.cc +++ b/src/nix/why-depends.cc @@ -1,5 +1,6 @@ #include "nix/cmd/command.hh" #include "nix/store/store-api.hh" +#include "nix/store/path-references.hh" #include "nix/util/source-accessor.hh" #include "nix/main/shared.hh" @@ -191,7 +192,7 @@ struct CmdWhyDepends : SourceExprCommand, MixOperateOnOptions /* Sort the references by distance to `dependency` to ensure that the shortest path is printed first. */ std::multimap refs; - StringSet hashes; + StorePathSet refPaths; for (auto & ref : node.refs) { if (ref == node.path && packagePath != dependencyPath) @@ -200,7 +201,7 @@ struct CmdWhyDepends : SourceExprCommand, MixOperateOnOptions if (node2.dist == inf) continue; refs.emplace(node2.dist, &node2); - hashes.insert(std::string(node2.path.hashPart())); + refPaths.insert(node2.path); } /* For each reference, find the files and symlinks that @@ -209,58 +210,50 @@ struct CmdWhyDepends : SourceExprCommand, MixOperateOnOptions auto accessor = store->requireStoreObjectAccessor(node.path); - auto visitPath = [&](this auto && recur, const CanonPath & p) -> void { - auto st = accessor->maybeLstat(p); - assert(st); - - auto p2 = p.isRoot() ? p.abs() : p.rel(); - - auto getColour = [&](const std::string & hash) { - return hash == dependencyPathHash ? ANSI_GREEN : ANSI_BLUE; - }; - - if (st->type == SourceAccessor::Type::tDirectory) { - auto names = accessor->readDirectory(p); - for (auto & [name, type] : names) - recur(p / name); - } - - else if (st->type == SourceAccessor::Type::tRegular) { - auto contents = accessor->readFile(p); + auto getColour = [&](const std::string & hash) { + return hash == dependencyPathHash ? ANSI_GREEN : ANSI_BLUE; + }; - for (auto & hash : hashes) { - auto pos = contents.find(hash); - if (pos != std::string::npos) { - size_t margin = 32; - auto pos2 = pos >= margin ? pos - margin : 0; - hits[hash].emplace_back( - fmt("%s: …%s…", + if (precise) { + // Use scanForReferencesDeep to find files containing references + scanForReferencesDeep(*accessor, CanonPath::root, refPaths, [&](FileRefScanResult result) { + auto p2 = result.filePath.isRoot() ? result.filePath.abs() : result.filePath.rel(); + auto st = accessor->lstat(result.filePath); + + if (st.type == SourceAccessor::Type::tRegular) { + auto contents = accessor->readFile(result.filePath); + + // For each reference found in this file, extract context + for (auto & foundRef : result.foundRefs) { + std::string hash(foundRef.hashPart()); + auto pos = contents.find(hash); + if (pos != std::string::npos) { + size_t margin = 32; + auto pos2 = pos >= margin ? pos - margin : 0; + hits[hash].emplace_back(fmt( + "%s: …%s…", p2, hilite( filterPrintable(std::string(contents, pos2, pos - pos2 + hash.size() + margin)), pos - pos2, StorePath::HashLen, getColour(hash)))); + } + } + } else if (st.type == SourceAccessor::Type::tSymlink) { + auto target = accessor->readLink(result.filePath); + + // For each reference found in this symlink, show it + for (auto & foundRef : result.foundRefs) { + std::string hash(foundRef.hashPart()); + auto pos = target.find(hash); + if (pos != std::string::npos) + hits[hash].emplace_back( + fmt("%s -> %s", p2, hilite(target, pos, StorePath::HashLen, getColour(hash)))); } } - } - - else if (st->type == SourceAccessor::Type::tSymlink) { - auto target = accessor->readLink(p); - - for (auto & hash : hashes) { - auto pos = target.find(hash); - if (pos != std::string::npos) - hits[hash].emplace_back( - fmt("%s -> %s", p2, hilite(target, pos, StorePath::HashLen, getColour(hash)))); - } - } - }; - - // FIXME: should use scanForReferences(). - - if (precise) - visitPath(CanonPath::root); + }); + } for (auto & ref : refs) { std::string hash(ref.second->path.hashPart());