diff --git a/src/libexpr/flake/flake.cc b/src/libexpr/flake/flake.cc index 33d253eee79..c30119482e0 100644 --- a/src/libexpr/flake/flake.cc +++ b/src/libexpr/flake/flake.cc @@ -727,9 +727,10 @@ Fingerprint LockedFlake::getFingerprint() const // and we haven't changed it, then it's sufficient to use // flake.sourceInfo.storePath for the fingerprint. return hashString(htSHA256, - fmt("%s;%s;%d;%d;%s", + fmt("%s;%s;%s;%d;%d;%s", flake.sourceInfo->storePath.to_string(), flake.lockedRef.subdir, + flake.lockedRef.input.getModules().value_or(""), flake.lockedRef.input.getRevCount().value_or(0), flake.lockedRef.input.getLastModified().value_or(0), lockFile)); diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index 079513873ec..6d7676a37d7 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -9,6 +9,8 @@ #include #include +#include + namespace nix { void emitTreeAttrs( @@ -20,8 +22,7 @@ void emitTreeAttrs( bool forceDirty) { assert(input.isImmutable()); - - state.mkAttrs(v, 8); + state.mkAttrs(v, 9); auto storePath = state.store->printStorePath(tree.storePath); @@ -34,9 +35,22 @@ void emitTreeAttrs( mkString(*state.allocAttr(v, state.symbols.create("narHash")), narHash->to_string(SRI, true)); - if (input.getType() == "git") + if (input.getType() == "git") { + Value *modules = state.allocAttr(v, state.symbols.create("modules")); + + auto modulesJson = fetchers::getStrAttr(input.attrs, "modules"); + auto modulesInfo = fetchers::jsonToAttrs(nlohmann::json::parse(modulesJson)); + + state.mkAttrs(*modules, modulesInfo.size()); + for (auto & [path, url] : modulesInfo) { + Value *vUrl = state.allocValue(); + mkString(*vUrl, std::get(url).c_str()); + modules->attrs->push_back(Attr(state.symbols.create(path), vUrl)); + } + modules->attrs->sort(); mkBool(*state.allocAttr(v, state.symbols.create("submodules")), fetchers::maybeGetBoolAttr(input.attrs, "submodules").value_or(false)); + } if (!forceDirty) { diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index e158d914bb3..8fd7366ae9e 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -257,6 +257,13 @@ std::optional Input::getLastModified() const return {}; } +std::optional Input::getModules() const +{ + if (auto s = maybeGetStrAttr(attrs, "modules")) + return *s; + return {}; +} + ParsedURL InputScheme::toURL(const Input & input) { throw Error("don't know how to convert input '%s' to a URL", attrsToJSON(input.attrs)); diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index c43b047a772..92588d8a9b6 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -95,6 +95,7 @@ public: std::optional getRev() const; std::optional getRevCount() const; std::optional getLastModified() const; + std::optional getModules() const; }; diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 544d2ffbf62..57a5f58fc1e 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -5,10 +5,13 @@ #include "store-api.hh" #include "url-parts.hh" #include "pathlocks.hh" +#include "archive.hh" #include #include +#include + using namespace std::string_literals; namespace nix::fetchers { @@ -24,6 +27,212 @@ static std::string readHead(const Path & path) return chomp(runProgram("git", true, { "-C", path, "rev-parse", "--abbrev-ref", "HEAD" })); } +static string getRootTree(const Path & path, const string & gitrev) { + auto commitOutput = runProgram("git", true, { "-C", path, "cat-file", "-p", gitrev }); + auto lines = tokenizeString>(commitOutput, "\n"); + auto words = tokenizeString>(lines[0]); + auto rootTree = words[1]; + return rootTree; +} + +enum gitType { + blob, + tree, + commit +}; + +string getBlob(const Path & path, const string & gitrev, const string & blobhash) { + return runProgram("git", true, { "-C", path, "cat-file", "-p", blobhash }); +} + +std::pair getActualUrl(const Input &input) { + // file:// URIs are normally not cloned (but otherwise treated the + // same as remote URIs, i.e. we don't use the working tree or + // HEAD). Exception: If _NIX_FORCE_HTTP is set, or the repo is a bare git + // repo, treat as a remote URI to force a clone. + static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing + auto url = parseURL(getStrAttr(input.attrs, "url")); + bool isBareRepository = + url.scheme == "file" && !pathExists(url.path + "/.git"); + bool isLocal = url.scheme == "file" && !forceHttp && !isBareRepository; + return {isLocal, isLocal ? url.path : url.base}; +} + +std::string getGitDir(const std::string & actualUrl) { + auto gitDir = actualUrl + "/.git"; + auto commonGitDir = chomp(runProgram( + "git", true, {"-C", actualUrl, "rev-parse", "--path-format=absolute", "--git-common-dir"})); + if (commonGitDir != ".git") + gitDir = commonGitDir; + + return gitDir; +} + +/* Check whether this repo has any commits. There are + probably better ways to do this. */ +bool hasCommits(const std::string & actualUrl) { + auto gitDir = getGitDir(actualUrl); + + printTalkative("in hasCommits\nactualUrl %s\ngitDir %s", actualUrl, gitDir); + return !readDirectory(gitDir + "/refs/heads").empty(); +} + +Tree copyTrackedFiles(ref store, const std::string & inputName, const std::string & actualUrl, bool submodules) { + auto gitDir = getGitDir(actualUrl); + auto gitOpts = Strings({"-C", actualUrl, "ls-files", "-z"}); + + if (submodules) + gitOpts.emplace_back("--recurse-submodules"); + + auto files = tokenizeString>( + runProgram("git", true, gitOpts), "\0"s); + + PathFilter filter = [&](const Path &p) -> bool { + assert(hasPrefix(p, actualUrl)); + std::string file(p, actualUrl.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = + store->addToStore(inputName, actualUrl, + FileIngestionMethod::Recursive, htSHA256, filter); + + return Tree(store->toRealPath(storePath), std::move(storePath)); +} + +static std::map> gitTreeList(const Path & path, const string & treehash) { + auto treeOutput = runProgram("git", true, { "-C", path, "cat-file", "-p", treehash }); + auto lines = tokenizeString>(treeOutput, "\n"); + std::map> results; + for (auto line : lines) { + auto words = tokenizeString>(line); + auto hash = words[2]; + auto name = words[3]; + gitType type; + if (words[1] == "blob") type = blob; + else if (words[1] == "tree") type = tree; + else if (words[1] == "commit") type = commit; + else assert(0); // FIXME + results.insert(std::pair{name, std::pair{type, hash}}); + } + + return results; +} + +std::map parseSubmodules(const string & contents) { + auto lines = tokenizeString>(contents, "\n"); + std::map results; + std::optional path, url, branch; + for (auto line : lines) { + auto words = tokenizeString>(line); + if (words[1] != "=") { + if (line != lines[0]) { + printTalkative("Saving %s\t%s\t%s\n", *path, *url, branch.value_or("master")); + + results.insert(std::pair{*path, *url + "?ref=" + branch.value_or("master")}); + path = url = branch = {}; + } + continue; + } + + if (words[0] == "path") path = words[2]; + if (words[0] == "branch") branch = words[2]; + if (words[0] == "url") url = words[2]; + + } + printTalkative("Saving %s\t%s\t%s\n", *path, *url, branch.value_or("master")); + results.insert(std::pair{*path, *url + "?ref=" + branch.value_or("master")}); + return results; +} + +bool isClean(const string & actualUrl) { + bool haveCommits = hasCommits(actualUrl); + + try { + if (haveCommits) { + runProgram("git", true, + {"-C", actualUrl, "diff-index", "--quiet", "HEAD", "--"}); + return true; + } + } catch (ExecError &e) { + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) + throw; + } + return false; +} + +string findCommitHash(const Path & path, const std::map> & _entries, const string & pathToFind) { + auto entries = _entries; + + auto tokens = tokenizeString>(pathToFind, "/"); + std::pair x; + for (auto token : tokens) { + auto i = entries.find(token); + if (i == entries.end()) return "fail"; + + x = i->second; + if (x.first == tree) entries = gitTreeList(path, x.second); + } + if (x.first != commit) return "fail"; + + return x.second; +} + +static Attrs readSubmodules(const Path & path, const string & gitrev) +{ + auto rootTree = getRootTree(path, gitrev); + printTalkative("root tree is %s", rootTree); + auto entries = gitTreeList(path, rootTree); + Attrs attrs; + + auto i = entries.find(".gitmodules"); + + if (i == entries.end()) return attrs; + + auto submodules = getBlob(path, gitrev, i->second.second); + + printTalkative("submodule file\n %s", submodules); + + auto parsedModules = parseSubmodules(submodules); + for (auto & [subPath, url] : parsedModules) { + auto fullPath = path + "/" + subPath; + auto initialized = pathExists(fullPath) && !readDirectory(fullPath).empty(); + + if (!initialized || isClean(fullPath)) { + printTalkative("submodule %s fetched from %s", subPath, url); + auto commitHash = findCommitHash(path, entries, subPath); + printTalkative("found %s", commitHash); + + static const std::regex barePathRegex("^/.*$"); + std::string prefix = "git+"; + if (std::regex_match(url, barePathRegex)) + prefix += "file://"; + + // std::string suffix = "?allRefs=1&rev=" + commitHash; + std::string suffix = "&rev=" + commitHash; + attrs.emplace(subPath, prefix + url + suffix); + } else { + StringSink sink; + dumpPath(fullPath, sink); + + auto narHash = hashString(htSHA256, *sink.s); + attrs.emplace(subPath, "path://" + fullPath + "?narHash=" + narHash.to_string(SRI, false)); + printTalkative("DIRTY\nsubPath: %s\nurl: %s\ndirtyUrl: %s\n", subPath, url, "path://" + fullPath + "?narHash=" + narHash.to_string(SRI, false)); + } + } + + return attrs; +} + static bool isNotDotGitDirectory(const Path & path) { static const std::regex gitDirRegex("^(?:.*/)?\\.git$"); @@ -67,7 +276,7 @@ struct GitInputScheme : InputScheme if (maybeGetStrAttr(attrs, "type") != "git") return {}; for (auto & [name, value] : attrs) - if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "lastModified" && name != "revCount" && name != "narHash" && name != "allRefs" && name != "name") + if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "lastModified" && name != "revCount" && name != "narHash" && name != "allRefs" && name != "name" && name != "modules") throw Error("unsupported Git input attribute '%s'", name); parseURL(getStrAttr(attrs, "url")); @@ -159,19 +368,6 @@ struct GitInputScheme : InputScheme { "-C", *sourcePath, "commit", std::string(file), "-m", *commitMsg }); } - std::pair getActualUrl(const Input & input) const - { - // file:// URIs are normally not cloned (but otherwise treated the - // same as remote URIs, i.e. we don't use the working tree or - // HEAD). Exception: If _NIX_FORCE_HTTP is set, or the repo is a bare git - // repo, treat as a remote URI to force a clone. - static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing - auto url = parseURL(getStrAttr(input.attrs, "url")); - bool isBareRepository = url.scheme == "file" && !pathExists(url.path + "/.git"); - bool isLocal = url.scheme == "file" && !forceHttp && !isBareRepository; - return {isLocal, isLocal ? url.path : url.base}; - } - std::pair fetch(ref store, const Input & _input) override { Input input(_input); @@ -187,6 +383,8 @@ struct GitInputScheme : InputScheme if (submodules) cacheType += "-submodules"; if (allRefs) cacheType += "-all-refs"; + printTalkative("fetch is getting ran"); + auto getImmutableAttrs = [&]() { return Attrs({ @@ -204,6 +402,7 @@ struct GitInputScheme : InputScheme if (!shallow) input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount")); input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified")); + input.attrs.insert_or_assign("modules", getStrAttr(infoAttrs, "modules")); return { Tree(store->toRealPath(storePath), std::move(storePath)), input @@ -211,8 +410,9 @@ struct GitInputScheme : InputScheme }; if (input.getRev()) { - if (auto res = getCache()->lookup(store, getImmutableAttrs())) + if (auto res = getCache()->lookup(store, getImmutableAttrs())) { return makeResult(res->first, std::move(res->second)); + } } auto [isLocal, actualUrl_] = getActualUrl(input); @@ -221,31 +421,7 @@ struct GitInputScheme : InputScheme // If this is a local directory and no ref or revision is // given, then allow the use of an unclean working tree. if (!input.getRef() && !input.getRev() && isLocal) { - bool clean = false; - - /* Check whether this repo has any commits. There are - probably better ways to do this. */ - auto gitDir = actualUrl + "/.git"; - auto commonGitDir = chomp(runProgram( - "git", - true, - { "-C", actualUrl, "rev-parse", "--git-common-dir" } - )); - if (commonGitDir != ".git") - gitDir = commonGitDir; - - bool haveCommits = !readDirectory(gitDir + "/refs/heads").empty(); - - try { - if (haveCommits) { - runProgram("git", true, { "-C", actualUrl, "diff-index", "--quiet", "HEAD", "--" }); - clean = true; - } - } catch (ExecError & e) { - if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; - } - - if (!clean) { + if (!isClean(actualUrl)) { /* This is an unclean working tree. So copy all tracked files. */ @@ -255,40 +431,21 @@ struct GitInputScheme : InputScheme if (settings.warnDirty) warn("Git tree '%s' is dirty", actualUrl); - auto gitOpts = Strings({ "-C", actualUrl, "ls-files", "-z" }); - if (submodules) - gitOpts.emplace_back("--recurse-submodules"); - - auto files = tokenizeString>( - runProgram("git", true, gitOpts), "\0"s); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, actualUrl)); - std::string file(p, actualUrl.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; - - auto storePath = store->addToStore(input.getName(), actualUrl, FileIngestionMethod::Recursive, htSHA256, filter); + auto tree = copyTrackedFiles(store, input.getName(), actualUrl, submodules); // FIXME: maybe we should use the timestamp of the last // modified dirty file? + bool haveCommits = hasCommits(actualUrl); input.attrs.insert_or_assign( "lastModified", haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); - return { - Tree(store->toRealPath(storePath), std::move(storePath)), - input - }; + input.attrs.insert_or_assign( + "modules", + haveCommits ? + attrsToJSON(readSubmodules(actualUrl, "HEAD")).dump() : "{}"); + + return { tree, input }; } } @@ -410,8 +567,9 @@ struct GitInputScheme : InputScheme /* Now that we know the ref, check again whether we have it in the store. */ - if (auto res = getCache()->lookup(store, getImmutableAttrs())) + if (auto res = getCache()->lookup(store, getImmutableAttrs())) { return makeResult(res->first, std::move(res->second)); + } Path tmpDir = createTempDir(); AutoDelete delTmpDir(tmpDir, true); @@ -469,15 +627,18 @@ struct GitInputScheme : InputScheme auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter); auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() })); + auto rev = input.getRev()->gitRev(); + auto modulesInfo = readSubmodules(repoDir, rev); Attrs infoAttrs({ - {"rev", input.getRev()->gitRev()}, + {"rev", rev}, {"lastModified", lastModified}, + {"modules", attrsToJSON(modulesInfo).dump()}, }); if (!shallow) infoAttrs.insert_or_assign("revCount", - std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input.getRev()->gitRev() }))); + std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", rev }))); if (!_input.getRev()) getCache()->add( diff --git a/tests/fetchGit.sh b/tests/fetchGit.sh index 89294d8d2bc..84e5026e5c9 100644 --- a/tests/fetchGit.sh +++ b/tests/fetchGit.sh @@ -37,6 +37,8 @@ path0_=$(nix eval --impure --raw --expr "(builtins.fetchTree { type = \"git\"; u export _NIX_FORCE_HTTP=1 [[ $(tail -n 1 $path0/hello) = "hello" ]] +ls -l $repo +nix eval --debug --impure --raw --expr "(builtins.fetchGit file://$repo).outPath" # Fetch the default branch. path=$(nix eval --impure --raw --expr "(builtins.fetchGit file://$repo).outPath") [[ $(cat $path/hello) = world ]] diff --git a/tests/fetchGitSubmodules.sh b/tests/fetchGitSubmodules.sh index 5f104355f69..0a037d815e1 100644 --- a/tests/fetchGitSubmodules.sh +++ b/tests/fetchGitSubmodules.sh @@ -30,6 +30,7 @@ initGitRepo $subRepo addGitContent $subRepo initGitRepo $rootRepo +addGitContent $rootRepo git -C $rootRepo submodule init git -C $rootRepo submodule add $subRepo sub @@ -95,3 +96,15 @@ noSubmoduleRepoBaseline=$(nix eval --raw --expr "(builtins.fetchGit { url = file noSubmoduleRepo=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$subRepo; rev = \"$subRev\"; submodules = true; }).outPath") [[ $noSubmoduleRepoBaseline == $noSubmoduleRepo ]] + +subUrl=$(nix eval --raw --expr "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; }).modules.sub") + +[[ $subUrl =~ rev=$subRev$ ]] + +# Beschmutzigen... +echo etwa dreck > $rootRepo/content + +subUrl=$(nix eval --impure --raw --expr "(builtins.fetchGit $rootRepo).modules.sub") + +# Submodule is still clean so should be the same as above +[[ $subUrl =~ rev=$subRev$ ]] diff --git a/tests/flakes-with-submodules.sh b/tests/flakes-with-submodules.sh new file mode 100644 index 00000000000..904c433f707 --- /dev/null +++ b/tests/flakes-with-submodules.sh @@ -0,0 +1,149 @@ +source common.sh + +if [[ -z $(type -p git) ]]; then + echo "Git not installed; skipping flake tests" + exit 99 +fi + +clearStore +rm -rf $TEST_HOME/.{cache,config} + +registry=$TEST_ROOT/registry.json + +nonFlakeDir=$TEST_ROOT/nonFlake +flakeDir=$TEST_ROOT/flake +flakeWithSubmodules=$TEST_ROOT/flakeWithSubmodules + +for repo in $flakeDir $nonFlakeDir $flakeWithSubmodules; do + rm -rf $repo $repo.tmp + mkdir -p $repo + git -C $repo init + git -C $repo config user.email "foobar@example.com" + git -C $repo config user.name "Foobar" + echo FNORD > $repo/README.md + git -C $repo add README.md + git -C $repo commit -m 'Initial' +done + +cp config.nix $flakeDir + +cat > $flakeDir/flake.nix < \$bin + echo '\${script}' >> \$bin + chmod +x \$bin + ''; + }; + in { + packages.$system = { + cat-own-readme = writeShellScriptBin "cat-own-readme" '' + cat \${self}/README.md + ''; + }; + + defaultPackage.$system = self.packages.$system.cat-own-readme; + }; +} +EOF + +git -C $flakeDir add . +git -C $flakeDir commit -m'add flake.nix' + +[[ $(nix run $flakeDir#cat-own-readme) == "FNORD" ]] + +git -C $flakeWithSubmodules submodule add $nonFlakeDir +git -C $flakeWithSubmodules submodule add $flakeDir +git -C $flakeWithSubmodules add . +git -C $flakeWithSubmodules commit -m'add submodules' + +cp config.nix $flakeWithSubmodules + +cat > $flakeWithSubmodules/flake.nix < \$bin + echo '\${script}' >> \$bin + chmod +x \$bin + ''; + }; + + combineModules = self: let + srcs = builtins.mapAttrs (_: url: fetchTree (traceVal url)) self.modules; + in mkDerivation { + name = "source-with-submodules"; + buildCommand = '' + cp -r \${self} \$out + chmod -R +w \$_ + + \${ + pipe srcs [ + (mapAttrsToList (path: src: "cp -rT \${src} \$out/\${path}")) + (builtins.concatStringsSep "\n") + ] + } + ''; + }; + in { + packages.$system = { + cat-submodule-readme = writeShellScriptBin "cat-submodule-readme" '' + cat \${nonFlake}/README.md + ''; + use-submodule-as-flake = flake.packages.$system.cat-own-readme; + + cat-own-readme = writeShellScriptBin "cat-own-readme" '' + cat \${self}/README.md + ''; + + source-with-submodules = combineModules self; + }; + + defaultPackage.$system = self.packages.$system.cat-submodule-readme; + }; +} +EOF + +git -C $flakeWithSubmodules add . +git -C $flakeWithSubmodules commit -m'add flake.nix' + +[[ $(nix run $flakeWithSubmodules#cat-own-readme) == "FNORD" ]] +[[ $(nix run $flakeWithSubmodules#cat-submodule-readme) == "FNORD" ]] +[[ $(nix run $flakeWithSubmodules#use-submodule-as-flake) == "FNORD" ]] +nix build -o $TEST_ROOT/result $flakeWithSubmodules#source-with-submodules +[[ $(cat $TEST_ROOT/result/nonFlake/README.md) == "FNORD" ]] + +echo FOST > $flakeWithSubmodules/README.md + +# apply dirt +echo FSUD > $flakeWithSubmodules/nonFlake/README.md +[[ $(nix run $flakeWithSubmodules#cat-submodule-readme) == "FSUD" ]] +nix build -o $TEST_ROOT/result $flakeWithSubmodules#source-with-submodules +[[ $(cat $TEST_ROOT/result/nonFlake/README.md) == "FSUD" ]] + +# should work for flake as well +echo FSUD > $flakeWithSubmodules/flake/README.md +[[ $(nix run $flakeWithSubmodules#use-submodule-as-flake) == "FSUD" ]] diff --git a/tests/local.mk b/tests/local.mk index 936b72c2a09..a7d43e1a6e5 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -47,6 +47,7 @@ nix_tests = \ describe-stores.sh \ flakes.sh \ flake-local-settings.sh \ + flakes-with-submodules.sh \ build.sh \ repl.sh ca/repl.sh \ ca/build.sh \