From 32c182b3fbc635ae74dae29d17c2c217494d1029 Mon Sep 17 00:00:00 2001 From: Gabriella Gonzalez Date: Wed, 23 Nov 2022 15:56:22 -0800 Subject: [PATCH] Add optional `date` argument to `builtins.fetchGit` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows users to specify an absolute or relative date (basically, anything that git accepts as a date specification) when fetching a repository. The motivation for this change is to enable support for incremental builds for Haskell packages for this Nixpkgs branch: https://github.com/MercuryTechnologies/nixpkgs/tree/gabriella/incremental … inspired by this blog post: https://harry.garrood.me/blog/easy-incremental-haskell-ci-builds-with-ghc-9.4/ Keep in mind that this new feature can power incremental builds for other package managers, too. There is not much that is Haskell-specific about this feature. The basic idea is that instead of Nix doing a full build for a package, we split every build into two builds: - A full build at an older point in time e.g. a daily or weekly time boundary - An incremental build relative to the last full build This incremental build reuses the build products left over from the most recent full build. In order to do this, though, we need a way to "snap" a package's `git` source input to an earlier point in time (e.g. a daily boundary or weekly boundary). This would allow multiple incremental builds to share the same full rebuild if they snap to the same time boundary. The two main approaches I considered were: - Approach 1 (this PR) Patch Nix to add a `date` argument to `builtins.fetchGit` - Approach 2 - Patch `nix-prefetch-git` to support a new `--date` option - Disable the sandbox - Run `nix-prefetch-git` at evaluation time using import-from-derivation to fetch and rehash the repository at an older point in time Approach 1 seemed the more desirable of the two. --- src/libexpr/primops/fetchTree.cc | 5 ++++ src/libfetchers/fetchers.cc | 7 ++++++ src/libfetchers/fetchers.hh | 1 + src/libfetchers/git.cc | 42 +++++++++++++++++++++++++------- tests/fetchGit.sh | 17 +++++++++++++ 5 files changed, 63 insertions(+), 9 deletions(-) diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index 84e7f5c02ee..8d3029319b7 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -373,6 +373,11 @@ static RegisterPrimOp primop_fetchGit({ true, it's possible to load a `rev` from *any* `ref` (by default only `rev`s from the specified `ref` are supported). + - date\ + A `git` date specification which can specify an absolute date (e.g. + `2000-01-01`) or a date relative to the specified reference (e.g. + `1 week ago`) + Here are some examples of how to use `fetchGit`. - To fetch a private repository over SSH: diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index 6957d2da408..022df739dc7 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -236,6 +236,13 @@ std::optional Input::getRef() const return {}; } +std::optional Input::getDate() const +{ + if (auto s = maybeGetStrAttr(attrs, "date")) + return *s; + return {}; +} + std::optional Input::getRev() const { std::optional hash = {}; diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index bc9a76b0bb4..a9e18205fde 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -93,6 +93,7 @@ public: std::string getType() const; std::optional getNarHash() const; std::optional getRef() const; + std::optional getDate() const; std::optional getRev() const; std::optional getRevCount() const; std::optional getLastModified() const; diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 7b7a1be35ac..2e10877e5dd 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -284,7 +284,7 @@ struct GitInputScheme : InputScheme if (maybeGetStrAttr(attrs, "type") != "git") return {}; for (auto & [name, value] : attrs) - if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "lastModified" && name != "revCount" && name != "narHash" && name != "allRefs" && name != "name") + if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "lastModified" && name != "revCount" && name != "narHash" && name != "allRefs" && name != "name" && name != "date") throw Error("unsupported Git input attribute '%s'", name); parseURL(getStrAttr(attrs, "url")); @@ -442,9 +442,9 @@ struct GitInputScheme : InputScheme auto [isLocal, actualUrl_] = getActualUrl(input); auto actualUrl = actualUrl_; // work around clang bug - /* If this is a local directory and no ref or revision is given, + /* If this is a local directory and no ref, revision, or date is given, allow fetching directly from a dirty workdir. */ - if (!input.getRef() && !input.getRev() && isLocal) { + if (!input.getRef() && !input.getRev() && !input.getDate() && isLocal) { auto workdirInfo = getWorkdirInfo(input, actualUrl); if (!workdirInfo.clean) { return fetchFromWorkdir(store, input, actualUrl, workdirInfo); @@ -470,11 +470,20 @@ struct GitInputScheme : InputScheme unlockedAttrs.insert_or_assign("ref", *head); } - if (!input.getRev()) - input.attrs.insert_or_assign("rev", - Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev()); - repoDir = actualUrl; + + if (!input.getRev()) { + std::string rev = ""; + if (input.getDate()) { + rev = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--before", *input.getDate(), "-1", *input.getRef() })); + if (rev == "") { + rev = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--max-parents=0", "-1", *input.getRef() })); + } + } else { + rev = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-parse", *input.getRef() })); + } + input.attrs.insert_or_assign("rev", Hash::parseAny(rev, htSHA1).gitRev()); + } } else { const bool useHeadRef = !input.getRef(); if (useHeadRef) { @@ -491,6 +500,10 @@ struct GitInputScheme : InputScheme } } + if (input.getDate()) { + unlockedAttrs.insert_or_assign("date", input.getDate().value()); + } + if (auto res = getCache()->lookup(store, unlockedAttrs)) { auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1); if (!input.getRev() || input.getRev() == rev2) { @@ -569,8 +582,19 @@ struct GitInputScheme : InputScheme warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl); } - if (!input.getRev()) - input.attrs.insert_or_assign("rev", Hash::parseAny(chomp(readFile(localRefFile)), htSHA1).gitRev()); + std::string rev = ""; + if (!input.getRev()) { + auto startingRevision = chomp(readFile(localRefFile)); + if (input.getDate()) { + rev = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--before", *input.getDate(), "-1", startingRevision })); + if (rev == "") { + rev = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--max-parents=0", "-1", startingRevision })); + } + } else { + rev = startingRevision; + } + input.attrs.insert_or_assign("rev", Hash::parseAny(rev, htSHA1).gitRev()); + } // cache dir lock is removed at scope end; we will only use read-only operations on specific revisions in the remainder } diff --git a/tests/fetchGit.sh b/tests/fetchGit.sh index 4ceba029329..7a36450c3ff 100644 --- a/tests/fetchGit.sh +++ b/tests/fetchGit.sh @@ -229,6 +229,23 @@ rev_tag2=$(git -C $repo rev-parse refs/tags/tag2) [[ $rev_tag2_nix = $rev_tag2 ]] unset _NIX_FORCE_HTTP +# The date argument works for both local repos and "remote" repos, returning the +# first commit preceding the specified commit +timestamp="$(date '+%s')" +sleep 1 +git -C $repo commit -m 'Bla6' --allow-empty + +rev4_date=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; date = \"$timestamp\"; }).rev") +[[ $rev4 = $rev4_date ]] +export _NIX_FORCE_HTTP=1 +rev4_date=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; date = \"$timestamp\"; }).rev") +[[ $rev4 = $rev4_date ]] +unset _NIX_FORCE_HTTP + +# A date prior to the first commit returns the first commit instead of failing +rev1_date=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; date = \"1 year ago\"; }).rev") +[[ $rev1 = $rev1_date ]] + # should fail if there is no repo rm -rf $repo/.git (! nix eval --impure --raw --expr "(builtins.fetchGit \"file://$repo\").outPath")