diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index f040a35109a..4d23ff29a63 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -368,9 +368,9 @@ static RegisterPrimOp primop_fetchGit({ The URL of the repo. - - `name` (default: *basename of the URL*) + - `name` (default: `source`) - The name of the directory the repo should be exported to in the store. + The name used for the generated store path. - `rev` (default: *the tip of `ref`*) @@ -381,27 +381,52 @@ static RegisterPrimOp primop_fetchGit({ - `ref` (default: `HEAD`) - The [Git reference] under which to look for the requested revision. - This is often a branch or tag name. + The [Git reference] to fetch. Has no effect if `rev` is specified. [Git reference]: https://git-scm.com/book/en/v2/Git-Internals-Git-References - By default, the `ref` value is prefixed with `refs/heads/`. - As of 2.3.0, Nix will not prefix `refs/heads/` if `ref` starts with `refs/`. + The `ref` value can be an abbreviated ref (e.g. `master` or `v0.1.2`), a full + ref (e.g. `refs/heads/master` or `refs/tags/v0.1.2`), or the special reference + `HEAD`. + + Abbreviated refs are resolved by using `git ls-remote url ABBREVIATED_REF`. + If the abbreviated ref is ambiguous, the first result is used. + + When the cache contains a unexpired ref that matches the abbreviated ref, + the remote is not queried. This may lead to unexpected results if there are + tags named like branches. To avoid this, use a full ref for tags. - `submodules` (default: `false`) - A Boolean parameter that specifies whether submodules should be checked out. + A Boolean parameter that specifies whether [submodules] should be checked out. + + [submodules]: https://git-scm.com/docs/gitsubmodules - `shallow` (default: `false`) - A Boolean parameter that specifies whether fetching a shallow clone is allowed. + A Boolean parameter that specifies whether to fetch as a [shallow commit]. + This can significantly speed up the checkout of large repositories. If set + the output will not contain the `revCount` attribute. - - `allRefs` + [shallow commit]: https://git-scm.com/docs/shallow - Whether to fetch all references of the repository. - With this argument being true, it's possible to load a `rev` from *any* `ref` - (by default only `rev`s from the specified `ref` are supported). + Required if the source is a shallow repository. + + > **Note** + > + > Setting this is almost always a good idea, as it significantly reduces + > the amount of data to fetch. The only two cases when you should not set + > this are when you fetch a lot of revisions in reverse order or when you + > need the `revCount` attribute on the output. + + If the URL points to a local directory and no `ref` or `rev` is + given `fetchGit` will use the current state of the directory, which + will include modified tracked files and staged changes. + + Fetched references, objects and revisions are cached. The cache is located at + `~/.cache/nix/gitv3/`. Commits and git objects stay cached indefinitely, + refs are expire according to the [`tarball-ttl`](@docroot@/command-ref/conf-file.md#conf-tarball-ttl) + setting. There is a separate cache for every combination of `url` and `shallow`. Here are some examples of how to use `fetchGit`. @@ -410,7 +435,6 @@ static RegisterPrimOp primop_fetchGit({ ```nix builtins.fetchGit { url = "git@github.com:my-secret/repository.git"; - ref = "master"; rev = "adab8b916a45068c044658c4158d81878f9ed1c3"; } ``` @@ -424,33 +448,7 @@ static RegisterPrimOp primop_fetchGit({ } ``` - - If the revision you're looking for is in the default branch of - the git repository you don't strictly need to specify the branch - name in the `ref` attribute. - - However, if the revision you're looking for is in a future - branch for the non-default branch you will need to specify the - the `ref` attribute as well. - - ```nix - builtins.fetchGit { - url = "https://github.com/nixos/nix.git"; - rev = "841fcbd04755c7a2865c51c1e2d3b045976b7452"; - ref = "1.11-maintenance"; - } - ``` - - > **Note** - > - > It is nice to always specify the branch which a revision - > belongs to. Without the branch being specified, the fetcher - > might fail if the default branch changes. Additionally, it can - > be confusing to try a commit from a non-default branch and see - > the fetch fail. If the branch is specified the fault is much - > more obvious. - - - If the revision you're looking for is in the default branch of - the git repository you may omit the `ref` attribute. + - To fetch a revision/commit: ```nix builtins.fetchGit { @@ -486,11 +484,6 @@ static RegisterPrimOp primop_fetchGit({ ```nix builtins.fetchGit ./work-dir ``` - - If the URL points to a local directory, and no `ref` or `rev` is - given, `fetchGit` will use the current content of the checked-out - files, even if they are not committed or added to Git's index. It will - only consider files added to the Git repository, as listed by `git ls-files`. )", .fun = prim_fetchGit, }); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index f8d89ab2fcd..856abdfc53c 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -10,6 +10,7 @@ #include "fetch-settings.hh" +#include #include #include #include @@ -21,692 +22,761 @@ namespace nix::fetchers { namespace { -// Explicit initial branch of our bare repo to suppress warnings from new version of git. -// The value itself does not matter, since we always fetch a specific revision or branch. -// It is set with `-c init.defaultBranch=` instead of `--initial-branch=` to stay compatible with -// old version of git, which will ignore unrecognized `-c` options. +/// Explicit initial branch of our bare repo to suppress warnings from new version of git. +/// The value itself does not matter, since we always fetch a specific revision. +/// It is set with `-c init.defaultBranch=` instead of `--initial-branch=` to stay compatible with +/// old version of git, which will ignore unrecognized `-c` options. const std::string gitInitialBranch = "__nix_dummy_branch"; - -bool isCacheFileWithinTtl(time_t now, const struct stat & st) -{ - return st.st_mtime + settings.tarballTtl > now; +/// For caching reasons a reference is created for every revision we fetch. +/// These references are stored like `refs/__nix_refs_for_revs/COMMIT_HASH`. +std::string const referencesForRevisionsPrefix = "refs/__nix_refs_for_revs/"; +/// For various reasons we can not use HEAD in the cache. +/// The following reference is used instead. +const std::string cachedHeadReference = "refs/heads/__nix_cache_HEAD"; + +/// Get the git hash of an empty tree +/// +/// The hash of the empty tree is a bit special in that git knows some things about it even though it is not an object in the repo. +/// For example we can run git diff or git archive with it. +/// +/// @param gitDir The directory of the git repository. Determines the hashing algorithm. +/// @return The hash of the empty tree. +std::string getEmptyTreeHash(const Path &gitDir) { return chomp(runProgram("git", true, {"-C", gitDir, "hash-object", "-t", "tree", "/dev/null"})); } + +bool isCacheFileWithinTtl(const struct stat &st) { + time_t now = time(0); + return st.st_mtime + settings.tarballTtl > now; } -bool touchCacheFile(const Path & path, time_t touch_time) -{ - struct timeval times[2]; - times[0].tv_sec = touch_time; - times[0].tv_usec = 0; - times[1].tv_sec = touch_time; - times[1].tv_usec = 0; - - return lutimes(path.c_str(), times) == 0; +/// Get a path to a bare git repo in the nix git cache +/// +/// The shallow and the normal versions can not share +/// +/// @param url The git url used as cache key. +/// @param shallow The shallow versio +/// @return The cache path. You should lock it before writing to it. +Path getGitCachePath(std::string_view url, bool shallow = false) { + auto cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, url).to_string(Base32, false) + (shallow ? "_shallow" : ""); + // Create the repo if it does not exist + if (!pathExists(cacheDir)) { + createDirs(dirOf(cacheDir)); + PathLocks cacheDirLock({cacheDir + ".lock"}); + runProgram("git", true, {"-c", "init.defaultBranch=" + gitInitialBranch, "init", "--bare", cacheDir}); + } + return cacheDir; } -Path getCachePath(std::string_view key) -{ - return getCacheDir() + "/nix/gitv3/" + - hashString(htSHA256, key).to_string(Base32, false); +struct RevisionInCacheResult { + std::string revision; + std::string fullReference; + bool fresh; +}; + +/// Resolves the revision and full reference for a given reference in a git repo +/// +/// If a abbreviated reference is passed (e.g. 'master') it is also expanded to a full reference (e.g. 'refs/heads/master') +/// +/// Cached references expire according to the `isCacheFileWithinTtl` function. +/// +/// If a fresh reference exists, the first fresh reference is returned. Otherwise the first expired reference of nullopt is returned. +/// +/// @param repoDir The path of a local cached git directory obtained with `getGitCachePath`. +/// @param reference A git reference. +/// @param uncachedRepository Set to true if repoDir is not a cached repo. If set the first expired cache entry is returned. If set the function also works with +/// packed references. +/// @return The resolved revision, full reference and information about its freshness. +std::optional resolveReference(const Path &repoDir, const std::string &reference, bool uncachedRepository = false) { + // Run ls-remote to find a revision for the reference + auto [status, output] = runProgram(RunOptions{ + .program = "git", + .args = {"ls-remote", repoDir, reference}, + .isInteractive = true, + }); + if (status != 0) + return std::nullopt; + + std::optional firstExpiredCacheResult = std::nullopt; + + auto lines = tokenizeString>(output, "\n"); + for (auto line : lines) { + if (const auto parseResult = git::parseLsRemoteLine(line)) { + if (parseResult->kind == git::LsRemoteRefLine::Kind::Symbolic) { + throw Error("git should never resolve a symbolic revision without '--symref'"); + } + + // parseResult->reference is always defined if parseResult->kind is not git::LsRemoteRefLine::Kind::Symbolic + // which we asserted above + auto const fullReference = parseResult->reference.value(); + auto const revision = parseResult->target; + + if (!firstExpiredCacheResult.has_value()) { + firstExpiredCacheResult = RevisionInCacheResult{ + .revision = revision, + .fullReference = fullReference, + .fresh = false, + }; + if (uncachedRepository) + return firstExpiredCacheResult; + } + + auto referenceFile = repoDir + "/" + fullReference; + struct stat st; + stat(referenceFile.c_str(), &st); + auto cacheIsFresh = isCacheFileWithinTtl(st); + + if (cacheIsFresh) { + debug("resolved reference '%s' in repo '%s' to revision '%s' and full reference '%s'", reference, repoDir, revision, fullReference); + return RevisionInCacheResult{ + .revision = revision, + .fullReference = fullReference, + .fresh = true, + }; + } + } + } + + return firstExpiredCacheResult; } -// Returns the name of the HEAD branch. -// -// Returns the head branch name as reported by git ls-remote --symref, e.g., if -// ls-remote returns the output below, "main" is returned based on the ref line. -// -// ref: refs/heads/main HEAD -// ... -std::optional readHead(const Path & path) -{ - auto [status, output] = runProgram(RunOptions { - .program = "git", - // FIXME: use 'HEAD' to avoid returning all refs - .args = {"ls-remote", "--symref", path}, - .isInteractive = true, - }); - if (status != 0) return std::nullopt; - - std::string_view line = output; - line = line.substr(0, line.find("\n")); - if (const auto parseResult = git::parseLsRemoteLine(line)) { - switch (parseResult->kind) { - case git::LsRemoteRefLine::Kind::Symbolic: - debug("resolved HEAD ref '%s' for repo '%s'", parseResult->target, path); - break; - case git::LsRemoteRefLine::Kind::Object: - debug("resolved HEAD rev '%s' for repo '%s'", parseResult->target, path); - break; - } - return parseResult->target; - } - return std::nullopt; +/// Check if a revision is present in a git repository +/// +/// @param gitDir A path to a bare git repository or .git directory +/// @param revision A git revision. Usually a commit hash +/// @return true if the revision is present in the repository +bool revisionIsInRepo(const Path gitDir, const std::string revision) { + try { + runProgram("git", true, {"-C", gitDir, "cat-file", "-e", revision}); + return true; + } catch (ExecError &e) { + if (!WIFEXITED(e.status)) { + throw; + } + return false; + } } -// Persist the HEAD ref from the remote repo in the local cached repo. -bool storeCachedHead(const std::string & actualUrl, const std::string & headRef) -{ - Path cacheDir = getCachePath(actualUrl); - try { - runProgram("git", true, { "-C", cacheDir, "--git-dir", ".", "symbolic-ref", "--", "HEAD", headRef }); - } catch (ExecError &e) { - if (!WIFEXITED(e.status)) throw; - return false; - } - /* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */ - return true; +/// Get a path to a bare git repo containing the specified revision +/// +/// If the cached repo already contains the revision it just returns the path to the repo. +/// Otherwise it fetches the revision into the repo and returns the path to the repo. +/// +/// The returned path is a cache dir path without a look. That is fine as long as there are only read-only operations on the fetched revision. +/// +/// @param gitUrl A [git url](https://git-scm.com/docs/git-fetch#_git_urls) to the repository. +/// @param revision A git revision. Usually a commit hash. +/// @param shallow Fetch only the single commit. Less data transfer but does not benefit from already fetched commits. +/// @return A path to a bare git repo containing the specified revision. The path is to be treated as read-only. +Path fetchRevisionIntoCache(const std::string &gitUrl, const std::string &revision, bool shallow) { + Path repoDir = getGitCachePath(gitUrl, shallow); + + if (revisionIsInRepo(repoDir, revision)) { + return repoDir; + } + + Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", gitUrl)); + PathLocks cacheDirLock({repoDir + ".lock"}); + try { + // Fetch the revision into the local repo + // + // When using `git fetch` git tries to detect which revisions we already have and only fetch the ones we dont have. However git only considers revisions + // that are the ancestor of a reference. We want that git considers every revision we already have. By creating a reference for every revision when we + // fetch it we can be sure that every revision we have locally is a ancestor of a reference. This is not optimal as we do not remove a reference if we + // later get a reference to their children. This could lead to a lot of unnecessary references but that is probably not a real problem. + auto options = Strings{"-C", + repoDir, + "-c", + "fetch.negotiationAlgorithm=consecutive", + "fetch", + "--no-tags", + "--recurse-submodules=no", + "--quiet", + "--force", + "--no-write-fetch-head", + "--refmap="}; + if (shallow) { + options.push_back("--depth=1"); + } + options.push_back("--"); + options.push_back(gitUrl); + options.push_back(revision + ":" + referencesForRevisionsPrefix + revision); + runProgram("git", true, options, {}, true); + } catch (Error &e) { + // Failing the fetch is always fatal. We do not want to continue with a partial repo. + throw Error("failed to fetch revision '%s' from '%s'", revision, gitUrl); + } + + return repoDir; } -std::optional readHeadCached(const std::string & actualUrl) -{ - // Create a cache path to store the branch of the HEAD ref. Append something - // in front of the URL to prevent collision with the repository itself. - Path cacheDir = getCachePath(actualUrl); - Path headRefFile = cacheDir + "/HEAD"; - - time_t now = time(0); - struct stat st; - std::optional cachedRef; - if (stat(headRefFile.c_str(), &st) == 0) { - cachedRef = readHead(cacheDir); - if (cachedRef != std::nullopt && - *cachedRef != gitInitialBranch && - isCacheFileWithinTtl(now, st)) - { - debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl); - return cachedRef; - } - } - - auto ref = readHead(actualUrl); - if (ref) return ref; - - if (cachedRef) { - // If the cached git ref is expired in fetch() below, and the 'git fetch' - // fails, it falls back to continuing with the most recent version. - // This function must behave the same way, so we return the expired - // cached ref here. - warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef); - return *cachedRef; - } - - return std::nullopt; +/// Fetch a specific reference into the git cache for a given git url. +/// +/// This function uses `git fetch` to fetch and resolve the reference in one go. +/// +/// If the reference is already in the cache and the cache entry is not expired (See `isCacheFileWithinTtl`) it is not fetched again. +/// If the cached reference is expired and the reference can not be fetched the expired revision is used and a warning is printed. +/// +/// @param gitUrl A git url that will be used with `git fetch` to resolve the revision. +/// @param reference A abbreviated git reference or the special reference HEAD. +/// @param shallow Fetch only the single commit. +/// @return The revision of the reference +std::string fetchAndResolveReferenceIntoCache(const std::string &gitUrl, const std::string &reference, bool shallow) { + Path repoDir = getGitCachePath(gitUrl, shallow); + + auto cachedRef = reference == "HEAD" ? cachedHeadReference : reference; + auto resolvedCachedRevision = resolveReference(repoDir, cachedRef); + auto freshCacheEntry = resolvedCachedRevision.has_value() && resolvedCachedRevision.value().fresh ? resolvedCachedRevision : std::nullopt; + auto expiredCacheEntry = resolvedCachedRevision.has_value() && !resolvedCachedRevision.value().fresh ? resolvedCachedRevision : std::nullopt; + + if (freshCacheEntry.has_value()) { + return freshCacheEntry.value().revision; + } + + Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", gitUrl)); + PathLocks cacheDirLock({repoDir + ".lock"}); + // Fetch the revision into the local repo + auto options = Strings{"-C", + repoDir, + "-c", + "fetch.negotiationAlgorithm=consecutive", + "fetch", + "--no-tags", + "--recurse-submodules=no", + "--quiet", + "--force", + "--write-fetch-head", + "--refmap=", + "--verbose"}; + if (shallow) { + options.push_back("--depth=1"); + } + options.push_back("--"); + options.push_back(gitUrl); + auto refspec = reference == "HEAD" ? ("HEAD:" + cachedHeadReference) + : ((reference.substr(0, 5) == "refs/") ? (reference + ":" + reference) : ("*/" + reference + ":" + "*/" + reference)); + options.push_back(refspec); + + // Will be set to the correct revision + std::string newRevision; + // Will be set to the full reference + std::string fullReference; + try { + runProgram("git", true, options, {}, true); + auto fetchHead = readFile(repoDir + "/FETCH_HEAD"); + auto lines = tokenizeString>(fetchHead, "\n"); + + for (auto line : lines) { + if (line.empty()) + continue; + + auto revision = line.substr(0, line.find('\t')); + auto remainder = line.substr(line.find_last_of('\t') + 1); + auto firstQuote = remainder.find_first_of('\''); + auto secondQuote = remainder.find_first_of('\'', firstQuote + 1); + auto fullRef = remainder.substr(firstQuote + 1, secondQuote - firstQuote - 1); + if (remainder.substr(0, 7) == "branch ") { + fullRef = "refs/heads/" + fullRef; + } + if (remainder.substr(0, 4) == "tag ") { + fullRef = "refs/tags/" + fullRef; + } + + // Write the reference manually to ensure it is not a packed reference and that its timestamp is updated + Path cachedReferenceFile = repoDir + "/" + fullRef; + createDirs(dirOf(cachedReferenceFile)); + writeFile(cachedReferenceFile, revision); + + // Write a reference file for the fetched revision + Path referenceFileForRevision = repoDir + "/" + referencesForRevisionsPrefix + revision; + createDirs(dirOf(referenceFileForRevision)); + writeFile(referenceFileForRevision, revision); + + if (newRevision.empty()) { + newRevision = revision; + fullReference = fullRef; + } + } + } catch (Error &e) { + // If the fetch failed, newRevision will still be empty. We handle that below. + } + + if (!newRevision.size()) { + // No revision was fetched, so we use the expired cache entry if we have one + if (expiredCacheEntry.has_value()) { + auto expiredRevision = expiredCacheEntry.value().revision; + auto expiredFullReference = expiredCacheEntry.value().fullReference; + warn("failed to resolve revision for reference '%s' in repository '%s'; using expired cached revision '%s'", reference, gitUrl, expiredRevision); + return expiredRevision; + } + throw Error("failed to read revision for reference '%s' from '%s'", reference, gitUrl); + } + return newRevision; } -bool isNotDotGitDirectory(const Path & path) -{ - return baseNameOf(path) != ".git"; +/// Resolves the revision for a given reference in a git repo. +/// +/// Tries a lookup in the local git cache first. If the revision is not in the cache it is resolved using the repository at gitUrl. +/// +/// Abbreviated reference (e.g. 'something') are looked up using `git ls-remote something` or `git fetch gitUrl '*/something'`. +/// The only supported special reference is 'HEAD'. +/// +/// @param gitUrl The url of the git repo. Can be any [git url](https://git-scm.com/docs/git-fetch#_git_urls). +/// @param reference A git reference. +/// @param isLocal Skip the cache and look up directly in the local repository. +/// @return The resolved revision and full reference. nullopt if the reference could not be resolved. +std::string resolveReferenceAndPrepareCache(const std::string &gitUrl, const std::optional &reference, bool isLocal, bool shallow) { + Path cacheDir = getGitCachePath(gitUrl); + + // TODO: Currently every input reference is treated as a /ref/heads reference if it is no full ref. + // This means that tag references need to be prefixed with 'refs/tags/' otherwise they would not work. + // We theoretically fully support abbreviated references, but that may break compatibility with older versions + // On the other hand reference resolution is always impure so it probably can be changed without breaking anything. + std::string referenceOrHead = reference.value_or("HEAD"); + // Path fullRef = referenceOrHead.compare(0, 5, "refs/") == 0 ? referenceOrHead : referenceOrHead == "HEAD" ? "HEAD" : "refs/heads/" + referenceOrHead; + + if (isLocal) { + auto const localRevision = resolveReference(gitUrl, referenceOrHead, true); + if (localRevision) { + return localRevision->revision; + } + if (referenceOrHead == "HEAD") { + // If HEAD can not be found the repository has probably no commits. + return getEmptyTreeHash(gitUrl); + } + throw Error("failed resolve revision for reference '%s' in local repository '%s'", referenceOrHead, gitUrl); + } + + auto revision = fetchAndResolveReferenceIntoCache(gitUrl, referenceOrHead, shallow); + + return revision; } -struct WorkdirInfo -{ - bool clean = false; - bool hasHead = false; +/// All information that is required to fetch a submodule +struct SubmoduleInfo { + /// The name of the submodule + std::string name; + /// The url of the submodule + /// + /// If the url in .gitmodules is relative it is relative to the origin of the superrepo. + std::string url; + /// The path of the submodule relative to the root of the superrepo + std::string path; + /// The revision of the submodule + std::string revision; + /// Whether the submodule url points to a local possibly dirty submodule + /// If this is set revision can be ignored + bool dirtyLocal; }; -// Returns whether a git workdir is clean and has commits. -WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir) -{ - const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); - std::string gitDir(".git"); - - auto env = getEnv(); - // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong - // that way unknown errors can lead to a failure instead of continuing through the wrong code path - env["LC_ALL"] = "C"; - - /* Check whether HEAD points to something that looks like a commit, - since that is the refrence we want to use later on. */ - auto result = runProgram(RunOptions { - .program = "git", - .args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, - .environment = env, - .mergeStderrToStdout = true - }); - auto exitCode = WEXITSTATUS(result.first); - auto errorMessage = result.second; - - if (errorMessage.find("fatal: not a git repository") != std::string::npos) { - throw Error("'%s' is not a Git repository", workdir); - } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { - // indicates that the repo does not have any commits - // we want to proceed and will consider it dirty later - } else if (exitCode != 0) { - // any other errors should lead to a failure - throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage); - } - - bool clean = false; - bool hasHead = exitCode == 0; - - try { - if (hasHead) { - // Using git diff is preferrable over lower-level operations here, - // because its conceptually simpler and we only need the exit code anyways. - auto gitDiffOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "diff", "HEAD", "--quiet"}); - if (!submodules) { - // Changes in submodules should only make the tree dirty - // when those submodules will be copied as well. - gitDiffOpts.emplace_back("--ignore-submodules"); - } - gitDiffOpts.emplace_back("--"); - runProgram("git", true, gitDiffOpts); - - clean = true; - } - } catch (ExecError & e) { - if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; - } - - return WorkdirInfo { .clean = clean, .hasHead = hasHead }; +/// Extracts information about the submodules at a given revision in a git repo +/// +/// `revision` and `gitDir` specify the repo to extract the submodule information from. +/// +/// If the source is a dirty local worktree you can specify the path to that worktree as localWorkdir. +/// If you do and the submodule is available in the local worktree the submodule url will be set to the local path. +/// In that case the return value will also contain a dirtyLocal flag. +/// +/// @param revision A git revision. Usually a commit hash. +/// @param gitDir A path to a bare git repository or .git directory +/// @param localWorkdir A path to a local git worktree where the repo is checked out. +/// @return A vector of submodule information. Empty if there are no submodules. +std::vector readGitmodules(const std::string &revision, const Path &gitDir, const std::optional &localWorkdir) { + auto gitmodulesFileFlag = "--blob=" + revision + ":.gitmodules"; + // Run ls-remote to find a revision for the reference + auto [status, output] = runProgram(RunOptions{ + .program = "git", + .args = {"-C", gitDir, "config", gitmodulesFileFlag, "--name-only", "--get-regexp", "path"}, + }); + if (status != 0) { + return {}; + } + + std::string_view sv = output; + auto lines = tokenizeString>(sv, "\n"); + std::vector submodules; + for (std::string_view line : lines) { + if (line.length() == 0) { + continue; + } + const static std::regex line_regex("^submodule[.](.+)[.]path$"); + std::match_results match; + if (!std::regex_match(line.cbegin(), line.cend(), match, line_regex)) { + throw Error(".gitmodules file seems invalid"); + } + + std::string submoduleName = match[1]; + + std::string path; + std::string url; + std::string submoduleRevision; + bool dirtyLocal = false; + + { + auto output = runProgram("git", true, Strings{"-C", gitDir, "config", gitmodulesFileFlag, "--get", "submodule." + submoduleName + ".path"}, + std::nullopt, true); + path = output.substr(0, output.find("\n")); + } + + { + auto output = runProgram("git", true, Strings{"-C", gitDir, "config", gitmodulesFileFlag, "--get", "submodule." + submoduleName + ".url"}, + std::nullopt, true); + url = output.substr(0, output.find("\n")); + + if (url.rfind("./", 0) == 0 || url.rfind("../", 0) == 0) { + // If the submodule is relative its URL is relative to the origin of the superrepo + auto [status, output] = runProgram(RunOptions{ + .program = "git", + .args = {"-C", gitDir, "remote", "get-url", "origin"}, + .isInteractive = true, + }); + auto line = chomp(output); + auto relativeSubmoduleRoot = line.size() ? line : gitDir; + url = relativeSubmoduleRoot + "/" + url; + } + } + + { + if (localWorkdir) { + auto localRepodir = localWorkdir.value(); + auto output = runProgram("git", true, Strings{"-C", localRepodir, "submodule", "status", path}); + auto line = output.substr(0, output.find("\n")); + auto prefix = line.substr(0, 1); + auto hash = chomp(line.substr(1, line.find(" ", 1))); + + if (prefix != "-") { + // Submodule is available in the local worktree + url = localRepodir + "/" + path; + dirtyLocal = true; + } + submoduleRevision = hash; + } else { + auto output = runProgram("git", true, Strings{"-C", gitDir, "ls-tree", "--object-only", revision, path}, {}, true); + auto line = output.substr(0, output.find("\n")); + if (line.size() == 0) { + throw Error("failed to resolve submodule '%s' in revision '%s'", submoduleName, revision); + } + submoduleRevision = line; + } + } + + SubmoduleInfo info({submoduleName, url, path, submoduleRevision, dirtyLocal}); + submodules.push_back(info); + } + + return submodules; } -std::pair fetchFromWorkdir(ref store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo) -{ - const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); - auto gitDir = ".git"; - - if (!fetchSettings.allowDirty) - throw Error("Git tree '%s' is dirty", workdir); - - if (fetchSettings.warnDirty) - warn("Git tree '%s' is dirty", workdir); - - auto gitOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "ls-files", "-z" }); - if (submodules) - gitOpts.emplace_back("--recurse-submodules"); - - auto files = tokenizeString>( - runProgram("git", true, gitOpts), "\0"s); - - Path actualPath(absPath(workdir)); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, actualPath)); - std::string file(p, actualPath.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; +/// Get the diff between a revision and a dirty workdir +/// +/// A workdir is dirty if it has staged changes, or unstaged changes to tracked files. +/// +/// @param workDir A path to a git repo or workdir +/// @param headRevision The revision that is currently HEAD of the workdir +/// @param submodules Also check if submodules are changed or dirty +/// @return A diff between the `headRevision` and the dirty workdir if the workdir is dirty. +std::optional getWorkdirDiff(const Path &workDir, const std::string &headRevision, bool submodules) { + auto gitStatusArguments = + Strings({"-C", workDir, "diff-index", "--binary", std::string("--ignore-submodules=") + (submodules ? "none" : "all"), "-p", headRevision}); + auto output = runProgram("git", true, gitStatusArguments); + + return chomp(output).length() > 0 ? std::optional(output) : std::nullopt; +} - auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); +/// Get a path to a bare git repo containing the specified revision +/// +/// If the cached repo already contains the revision it just returns the path to the repo. +/// Otherwise it fetches the revision into the repo and returns the path to the repo. +/// +/// The returned path is a cache dir path without a look. That is fine as long as there are only read-only operations on the fetched revision. +/// +/// @param gitUrl A [git url](https://git-scm.com/docs/git-fetch#_git_urls) to the repository. +/// @param revision A git revision. Usually a commit hash. +/// @return A path to a bare git repo containing the specified revision. The path is to be treated as read-only. +std::pair getLocalRepoContainingRevision(const std::string gitUrl, const std::string revision, bool local, bool allowDirty, bool submodules, + bool shallow) { + if (!local) { + Path repoDir = fetchRevisionIntoCache(gitUrl, revision, shallow); + return {absPath(repoDir), revision}; + } + + if (!allowDirty) { + return {absPath(gitUrl), revision}; + } + + auto dirtyDiff = getWorkdirDiff(gitUrl, revision, submodules); + if (!dirtyDiff.has_value()) { + // Not dirty + return {absPath(gitUrl), revision}; + } + + Path repoDir = fetchRevisionIntoCache(gitUrl, revision, shallow); + PathLocks cacheDirLock({repoDir + ".lock"}); + runProgram("git", true, {"-C", repoDir, "read-tree", revision}); + runProgram("git", true, Strings{"-C", repoDir, "apply", "--cached", "--binary", "-"}, dirtyDiff); + auto treeHash = chomp(runProgram("git", true, {"-C", repoDir, "write-tree"})); + + return {absPath(repoDir), treeHash}; +} - // FIXME: maybe we should use the timestamp of the last - // modified dirty file? - input.attrs.insert_or_assign( - "lastModified", - workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); +/// Copy all files from at a specific revision in a git repo to a target directory +/// +/// @param gitDir A path to a git working tree +/// @param targetDir The tree will be placed here +/// @param revision The revision that will get copied +void copyAllFilesFromRevision(const Path &gitDir, const Path &targetDir, const std::string &revision) { + auto source = sinkToSource([&](Sink &sink) { runProgram2({.program = "git", .args = {"-C", gitDir, "archive", revision}, .standardOut = &sink}); }); + unpackTarfile(*source, targetDir); +} - if (workdirInfo.hasHead) { - input.attrs.insert_or_assign("dirtyRev", chomp( - runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "rev-parse", "--verify", "HEAD" })) + "-dirty"); - input.attrs.insert_or_assign("dirtyShortRev", chomp( - runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "rev-parse", "--verify", "--short", "HEAD" })) + "-dirty"); - } +/// Place the tree of a git repo at a given revision at a given path +/// +/// @param url A [git url](https://git-scm.com/docs/git-fetch#_git_urls) to the repository. +/// @param targetDir The path to place the tree at. +/// @param revision A git revision. Usually a commit hash. +/// @param submodules Whether to recursively fetch submodules. +/// @param shallow Whether to accept shallow git repositories. +/// @param isLocal Whether the repo is local or not. If set the repository is not put into cache. +/// @param allowDirty Whether to use a dirty worktree if available. +/// @return A path to a bare git repo containing the specified revision. If the repo is local it is just the path to the repo. The path is to be treated as +/// read-only. +std::pair placeRevisionTreeAtPath(const std::string &url, const Path &targetDir, const std::string &inputRevision, const bool submodules, + const bool shallow, const bool isLocal = false, const bool allowDirty = false) { + printTalkative("using revision %s of repo '%s'", inputRevision, url); + + auto [gitDir, revision] = getLocalRepoContainingRevision(url, inputRevision, isLocal, allowDirty, submodules, shallow); + auto isLocalAndDirty = isLocal && allowDirty && revision != inputRevision; + + bool isShallow = chomp(runProgram("git", true, {"-C", gitDir, "rev-parse", "--is-shallow-repository"})) == "true"; + if (isShallow && !shallow) + throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified.", gitDir); + + if (isLocalAndDirty) { + if (!fetchSettings.allowDirty) + throw Error("Git tree '%s' is dirty", url); + + if (fetchSettings.warnDirty) + warn("Git tree '%s' is dirty", url); + } + + copyAllFilesFromRevision(gitDir, targetDir, revision); + + // Also fetch and place all gitmodules + if (submodules) { + auto gitmodules = readGitmodules(revision, gitDir, isLocalAndDirty ? std::optional(url) : std::nullopt); + for (auto gitmodule : gitmodules) { + auto submoduleDir = targetDir + "/" + gitmodule.path; + createDirs(submoduleDir); + placeRevisionTreeAtPath(gitmodule.url, submoduleDir, gitmodule.revision, submodules, shallow, gitmodule.dirtyLocal, gitmodule.dirtyLocal); + } + } + + return {gitDir, isLocalAndDirty}; +} - return {std::move(storePath), input}; +/// Create a result for the fetch function +std::pair makeResult(const Input &input, StorePath &&storePath, const Attrs &infoAttrs, const std::string &revision, bool shallow, + bool dirty) { + Input _input = Input(input); + if (dirty) { + _input.attrs.insert_or_assign("dirtyRev", revision + "-dirty"); + // TODO: Think about removing dirtyShortRev. It is not really used and is inconsistent with the non dirty path, as just shortRev does not exist + _input.attrs.insert_or_assign("dirtyShortRev", revision.substr(0, 7) + "-dirty"); + } else { + _input.attrs.insert_or_assign("rev", revision); + } + + if (!shallow) + _input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount")); + _input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified")); + return {std::move(storePath), std::move(_input)}; } -} // end namespace - -struct GitInputScheme : InputScheme -{ - std::optional inputFromURL(const ParsedURL & url, bool requireTree) const override - { - if (url.scheme != "git" && - url.scheme != "git+http" && - url.scheme != "git+https" && - url.scheme != "git+ssh" && - url.scheme != "git+file") return {}; - - auto url2(url); - if (hasPrefix(url2.scheme, "git+")) url2.scheme = std::string(url2.scheme, 4); - url2.query.clear(); - - Attrs attrs; - attrs.emplace("type", "git"); - - for (auto & [name, value] : url.query) { - if (name == "rev" || name == "ref") - attrs.emplace(name, value); - else if (name == "shallow" || name == "submodules" || name == "allRefs") - attrs.emplace(name, Explicit { value == "1" }); - else - url2.query.emplace(name, value); - } - - attrs.emplace("url", url2.to_string()); - - return inputFromAttrs(attrs); - } - - std::optional inputFromAttrs(const Attrs & attrs) const override - { - if (maybeGetStrAttr(attrs, "type") != "git") return {}; - - for (auto & [name, value] : attrs) - if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "lastModified" && name != "revCount" && name != "narHash" && name != "allRefs" && name != "name" && name != "dirtyRev" && name != "dirtyShortRev") - throw Error("unsupported Git input attribute '%s'", name); - - parseURL(getStrAttr(attrs, "url")); - maybeGetBoolAttr(attrs, "shallow"); - maybeGetBoolAttr(attrs, "submodules"); - maybeGetBoolAttr(attrs, "allRefs"); - - if (auto ref = maybeGetStrAttr(attrs, "ref")) { - if (std::regex_search(*ref, badGitRefRegex)) - throw BadURL("invalid Git branch/tag name '%s'", *ref); - } - - Input input; - input.attrs = attrs; - return input; - } - - ParsedURL toURL(const Input & input) const override - { - auto url = parseURL(getStrAttr(input.attrs, "url")); - if (url.scheme != "git") url.scheme = "git+" + url.scheme; - if (auto rev = input.getRev()) url.query.insert_or_assign("rev", rev->gitRev()); - if (auto ref = input.getRef()) url.query.insert_or_assign("ref", *ref); - if (maybeGetBoolAttr(input.attrs, "shallow").value_or(false)) - url.query.insert_or_assign("shallow", "1"); - return url; - } - - bool hasAllInfo(const Input & input) const override - { - bool maybeDirty = !input.getRef(); - bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false); - return - maybeGetIntAttr(input.attrs, "lastModified") - && (shallow || maybeDirty || maybeGetIntAttr(input.attrs, "revCount")); - } - - Input applyOverrides( - const Input & input, - std::optional ref, - std::optional rev) const override - { - auto res(input); - if (rev) res.attrs.insert_or_assign("rev", rev->gitRev()); - if (ref) res.attrs.insert_or_assign("ref", *ref); - if (!res.getRef() && res.getRev()) - throw Error("Git input '%s' has a commit hash but no branch/tag name", res.to_string()); - return res; - } - - void clone(const Input & input, const Path & destDir) const override - { - auto [isLocal, actualUrl] = getActualUrl(input); - - Strings args = {"clone"}; - - args.push_back(actualUrl); - - if (auto ref = input.getRef()) { - args.push_back("--branch"); - args.push_back(*ref); - } - - if (input.getRev()) throw UnimplementedError("cloning a specific revision is not implemented"); - - args.push_back(destDir); - - runProgram("git", true, args, {}, true); - } - - std::optional getSourcePath(const Input & input) override - { - auto url = parseURL(getStrAttr(input.attrs, "url")); - if (url.scheme == "file" && !input.getRef() && !input.getRev()) - return url.path; - return {}; - } - - void markChangedFile(const Input & input, std::string_view file, std::optional commitMsg) override - { - auto sourcePath = getSourcePath(input); - assert(sourcePath); - auto gitDir = ".git"; - - runProgram("git", true, - { "-C", *sourcePath, "--git-dir", gitDir, "add", "--intent-to-add", "--", std::string(file) }); - - if (commitMsg) - runProgram("git", true, - { "-C", *sourcePath, "--git-dir", gitDir, "commit", std::string(file), "-m", *commitMsg }); - } - - std::pair getActualUrl(const Input & input) const - { - // file:// URIs are normally not cloned (but otherwise treated the - // same as remote URIs, i.e. we don't use the working tree or - // HEAD). Exception: If _NIX_FORCE_HTTP is set, or the repo is a bare git - // repo, treat as a remote URI to force a clone. - static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing - auto url = parseURL(getStrAttr(input.attrs, "url")); - bool isBareRepository = url.scheme == "file" && !pathExists(url.path + "/.git"); - bool isLocal = url.scheme == "file" && !forceHttp && !isBareRepository; - return {isLocal, isLocal ? url.path : url.base}; - } - - std::pair fetch(ref store, const Input & _input) override - { - Input input(_input); - auto gitDir = ".git"; - - std::string name = input.getName(); - - bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false); - bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); - bool allRefs = maybeGetBoolAttr(input.attrs, "allRefs").value_or(false); - - std::string cacheType = "git"; - if (shallow) cacheType += "-shallow"; - if (submodules) cacheType += "-submodules"; - if (allRefs) cacheType += "-all-refs"; - - auto checkHashType = [&](const std::optional & hash) - { - if (hash.has_value() && !(hash->type == htSHA1 || hash->type == htSHA256)) - throw Error("Hash '%s' is not supported by Git. Supported types are sha1 and sha256.", hash->to_string(Base16, true)); - }; - - auto getLockedAttrs = [&]() - { - checkHashType(input.getRev()); - - return Attrs({ - {"type", cacheType}, - {"name", name}, - {"rev", input.getRev()->gitRev()}, - }); - }; - - auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath) - -> std::pair - { - assert(input.getRev()); - assert(!_input.getRev() || _input.getRev() == input.getRev()); - if (!shallow) - input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount")); - input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified")); - return {std::move(storePath), input}; - }; - - if (input.getRev()) { - if (auto res = getCache()->lookup(store, getLockedAttrs())) - return makeResult(res->first, std::move(res->second)); - } - - auto [isLocal, actualUrl_] = getActualUrl(input); - auto actualUrl = actualUrl_; // work around clang bug - - /* If this is a local directory and no ref or revision is given, - allow fetching directly from a dirty workdir. */ - if (!input.getRef() && !input.getRev() && isLocal) { - auto workdirInfo = getWorkdirInfo(input, actualUrl); - if (!workdirInfo.clean) { - return fetchFromWorkdir(store, input, actualUrl, workdirInfo); - } - } - - Attrs unlockedAttrs({ - {"type", cacheType}, - {"name", name}, - {"url", actualUrl}, - }); - - Path repoDir; - - if (isLocal) { - if (!input.getRef()) { - auto head = readHead(actualUrl); - if (!head) { - warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); - head = "master"; - } - input.attrs.insert_or_assign("ref", *head); - unlockedAttrs.insert_or_assign("ref", *head); - } - - if (!input.getRev()) - input.attrs.insert_or_assign("rev", - Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev()); - - repoDir = actualUrl; - } else { - const bool useHeadRef = !input.getRef(); - if (useHeadRef) { - auto head = readHeadCached(actualUrl); - if (!head) { - warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); - head = "master"; - } - input.attrs.insert_or_assign("ref", *head); - unlockedAttrs.insert_or_assign("ref", *head); - } else { - if (!input.getRev()) { - unlockedAttrs.insert_or_assign("ref", input.getRef().value()); - } - } - - if (auto res = getCache()->lookup(store, unlockedAttrs)) { - auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1); - if (!input.getRev() || input.getRev() == rev2) { - input.attrs.insert_or_assign("rev", rev2.gitRev()); - return makeResult(res->first, std::move(res->second)); - } - } - - Path cacheDir = getCachePath(actualUrl); - repoDir = cacheDir; - gitDir = "."; - - createDirs(dirOf(cacheDir)); - PathLocks cacheDirLock({cacheDir + ".lock"}); - - if (!pathExists(cacheDir)) { - runProgram("git", true, { "-c", "init.defaultBranch=" + gitInitialBranch, "init", "--bare", repoDir }); - } - - Path localRefFile = - input.getRef()->compare(0, 5, "refs/") == 0 - ? cacheDir + "/" + *input.getRef() - : cacheDir + "/refs/heads/" + *input.getRef(); - - bool doFetch; - time_t now = time(0); - - /* If a rev was specified, we need to fetch if it's not in the - repo. */ - if (input.getRev()) { - try { - runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", input.getRev()->gitRev() }); - doFetch = false; - } catch (ExecError & e) { - if (WIFEXITED(e.status)) { - doFetch = true; - } else { - throw; - } - } - } else { - if (allRefs) { - doFetch = true; - } else { - /* If the local ref is older than ‘tarball-ttl’ seconds, do a - git fetch to update the local ref to the remote ref. */ - struct stat st; - doFetch = stat(localRefFile.c_str(), &st) != 0 || - !isCacheFileWithinTtl(now, st); - } - } - - if (doFetch) { - Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", actualUrl)); - - // FIXME: git stderr messes up our progress indicator, so - // we're using --quiet for now. Should process its stderr. - try { - auto ref = input.getRef(); - auto fetchRef = allRefs - ? "refs/*" - : ref->compare(0, 5, "refs/") == 0 - ? *ref - : ref == "HEAD" - ? *ref - : "refs/heads/" + *ref; - runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }, {}, true); - } catch (Error & e) { - if (!pathExists(localRefFile)) throw; - warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); - } - - if (!touchCacheFile(localRefFile, now)) - warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno)); - if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef())) - warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl); - } - - if (!input.getRev()) - input.attrs.insert_or_assign("rev", Hash::parseAny(chomp(readFile(localRefFile)), htSHA1).gitRev()); - - // cache dir lock is removed at scope end; we will only use read-only operations on specific revisions in the remainder - } - - bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-parse", "--is-shallow-repository" })) == "true"; - - if (isShallow && !shallow) - throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified.", actualUrl); - - // FIXME: check whether rev is an ancestor of ref. - - printTalkative("using revision %s of repo '%s'", input.getRev()->gitRev(), actualUrl); - - /* Now that we know the ref, check again whether we have it in - the store. */ - if (auto res = getCache()->lookup(store, getLockedAttrs())) - return makeResult(res->first, std::move(res->second)); - - Path tmpDir = createTempDir(); - AutoDelete delTmpDir(tmpDir, true); - PathFilter filter = defaultPathFilter; - - auto result = runProgram(RunOptions { - .program = "git", - .args = { "-C", repoDir, "--git-dir", gitDir, "cat-file", "commit", input.getRev()->gitRev() }, - .mergeStderrToStdout = true - }); - if (WEXITSTATUS(result.first) == 128 - && result.second.find("bad file") != std::string::npos) - { - throw Error( - "Cannot find Git revision '%s' in ref '%s' of repository '%s'! " - "Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the " - ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD - "allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".", - input.getRev()->gitRev(), - *input.getRef(), - actualUrl - ); - } - - if (submodules) { - Path tmpGitDir = createTempDir(); - AutoDelete delTmpGitDir(tmpGitDir, true); - - runProgram("git", true, { "-c", "init.defaultBranch=" + gitInitialBranch, "init", tmpDir, "--separate-git-dir", tmpGitDir }); - - { - // TODO: repoDir might lack the ref (it only checks if rev - // exists, see FIXME above) so use a big hammer and fetch - // everything to ensure we get the rev. - Activity act(*logger, lvlTalkative, actUnknown, fmt("making temporary clone of '%s'", repoDir)); - runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force", - "--update-head-ok", "--", repoDir, "refs/*:refs/*" }, {}, true); - } - - runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input.getRev()->gitRev() }); - - /* Ensure that we use the correct origin for fetching - submodules. This matters for submodules with relative - URLs. */ - if (isLocal) { - writeFile(tmpGitDir + "/config", readFile(repoDir + "/" + gitDir + "/config")); - - /* Restore the config.bare setting we may have just - copied erroneously from the user's repo. */ - runProgram("git", true, { "-C", tmpDir, "config", "core.bare", "false" }); - } else - runProgram("git", true, { "-C", tmpDir, "config", "remote.origin.url", actualUrl }); - - /* As an optimisation, copy the modules directory of the - source repo if it exists. */ - auto modulesPath = repoDir + "/" + gitDir + "/modules"; - if (pathExists(modulesPath)) { - Activity act(*logger, lvlTalkative, actUnknown, fmt("copying submodules of '%s'", actualUrl)); - runProgram("cp", true, { "-R", "--", modulesPath, tmpGitDir + "/modules" }); - } - - { - Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching submodules of '%s'", actualUrl)); - runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }, {}, true); - } - - filter = isNotDotGitDirectory; - } else { - // FIXME: should pipe this, or find some better way to extract a - // revision. - auto source = sinkToSource([&](Sink & sink) { - runProgram2({ - .program = "git", - .args = { "-C", repoDir, "--git-dir", gitDir, "archive", input.getRev()->gitRev() }, - .standardOut = &sink - }); - }); - - unpackTarfile(*source, tmpDir); - } - - auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter); - - auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() })); - - Attrs infoAttrs({ - {"rev", input.getRev()->gitRev()}, - {"lastModified", lastModified}, - }); - - if (!shallow) - infoAttrs.insert_or_assign("revCount", - std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", input.getRev()->gitRev() }))); - - if (!_input.getRev()) - getCache()->add( - store, - unlockedAttrs, - infoAttrs, - storePath, - false); - - getCache()->add( - store, - getLockedAttrs(), - infoAttrs, - storePath, - true); - - return makeResult(infoAttrs, std::move(storePath)); - } +} // end namespace + +struct GitInputScheme : InputScheme { + std::optional inputFromURL(const ParsedURL &url, bool requireTree) const override { + if (url.scheme != "git" && url.scheme != "git+http" && url.scheme != "git+https" && url.scheme != "git+ssh" && url.scheme != "git+file") + return {}; + + auto url2(url); + if (hasPrefix(url2.scheme, "git+")) + url2.scheme = std::string(url2.scheme, 4); + url2.query.clear(); + + Attrs attrs; + attrs.emplace("type", "git"); + + for (auto &[name, value] : url.query) { + if (name == "rev" || name == "ref") + attrs.emplace(name, value); + else if (name == "shallow" || name == "submodules") + attrs.emplace(name, Explicit{value == "1"}); + else + url2.query.emplace(name, value); + } + + attrs.emplace("url", url2.to_string()); + + return inputFromAttrs(attrs); + } + + std::optional inputFromAttrs(const Attrs &attrs) const override { + if (maybeGetStrAttr(attrs, "type") != "git") + return {}; + + for (auto &[name, value] : attrs) + if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "lastModified" && + name != "revCount" && name != "narHash" && name != "allRefs" && name != "name" && name != "dirtyRev" && name != "dirtyShortRev") + throw Error("unsupported Git input attribute '%s'", name); + + parseURL(getStrAttr(attrs, "url")); + maybeGetBoolAttr(attrs, "shallow"); + maybeGetBoolAttr(attrs, "submodules"); + + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (std::regex_search(*ref, badGitRefRegex)) + throw BadURL("invalid Git branch/tag name '%s'", *ref); + } + + Input input; + input.attrs = attrs; + return input; + } + + ParsedURL toURL(const Input &input) const override { + auto url = parseURL(getStrAttr(input.attrs, "url")); + if (url.scheme != "git") + url.scheme = "git+" + url.scheme; + if (auto rev = input.getRev()) + url.query.insert_or_assign("rev", rev->gitRev()); + if (auto ref = input.getRef()) + url.query.insert_or_assign("ref", *ref); + if (maybeGetBoolAttr(input.attrs, "shallow").value_or(false)) + url.query.insert_or_assign("shallow", "1"); + return url; + } + + bool hasAllInfo(const Input &input) const override { + bool maybeDirty = !input.getRef(); + bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false); + return maybeGetIntAttr(input.attrs, "lastModified") && (shallow || maybeDirty || maybeGetIntAttr(input.attrs, "revCount")); + } + + Input applyOverrides(const Input &input, std::optional ref, std::optional rev) const override { + auto res(input); + if (rev) + res.attrs.insert_or_assign("rev", rev->gitRev()); + if (ref) + res.attrs.insert_or_assign("ref", *ref); + // TODO: I do not understand why this check is required + if (!res.getRef() && res.getRev()) + throw Error("Git input '%s' has a commit hash but no branch/tag name", res.to_string()); + return res; + } + + void clone(const Input &input, const Path &destDir) const override { + auto [isLocal, actualUrl] = getActualUrl(getStrAttr(input.attrs, "url")); + + Strings args = {"clone"}; + + args.push_back(actualUrl); + + if (auto ref = input.getRef()) { + args.push_back("--branch"); + args.push_back(*ref); + } + + if (input.getRev()) + throw UnimplementedError("cloning a specific revision is not implemented"); + + args.push_back(destDir); + + runProgram("git", true, args, {}, true); + } + + std::optional getSourcePath(const Input &input) override { + auto url = parseURL(getStrAttr(input.attrs, "url")); + if (url.scheme == "file" && !input.getRef() && !input.getRev()) + return url.path; + return {}; + } + + void markChangedFile(const Input &input, std::string_view file, std::optional commitMsg) override { + auto sourcePath = getSourcePath(input); + assert(sourcePath); + auto gitDir = ".git"; + + runProgram("git", true, {"-C", *sourcePath, "--git-dir", gitDir, "add", "--intent-to-add", "--", std::string(file)}); + + if (commitMsg) + runProgram("git", true, {"-C", *sourcePath, "--git-dir", gitDir, "commit", std::string(file), "-m", *commitMsg}); + } + + std::pair getActualUrl(const std::string &url) const { + // file:// URIs are normally not cloned (but otherwise treated the + // same as remote URIs, i.e. we don't use the working tree or + // HEAD). Exception: If _NIX_FORCE_HTTP is set, or the repo is a bare git + // repo, treat as a remote URI to force a clone. + static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing + auto parsedUrl = parseURL(url); + bool isBareRepository = parsedUrl.scheme == "file" && !pathExists(parsedUrl.path + "/.git"); + bool isLocal = parsedUrl.scheme == "file" && !forceHttp && !isBareRepository; + return {isLocal, isLocal ? parsedUrl.path : parsedUrl.base}; + } + + std::pair fetch(ref store, const Input &input) override { + // Verify that the hash type is valid if a revision is specified + if (input.getRev().has_value() && !(input.getRev()->type == htSHA1 || input.getRev()->type == htSHA256)) { + throw Error("Hash '%s' is not supported by Git. Supported types are sha1 and sha256.", input.getRev()->to_string(Base16, true)); + } + + // Move important attributes to local variables + std::string name = input.getName(); + std::optional reference = input.getRef(); + std::optional inputRevision = input.getRev() ? std::optional(input.getRev()->gitRev()) : std::nullopt; + bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false); + bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + + // Resolve the actual url + auto [isLocal, actualUrl] = getActualUrl(getStrAttr(input.attrs, "url")); + + // Decide whether we are open to using a dirty local repo + auto allowDirty = !reference && !inputRevision && isLocal; + + // Resolve reference to revision if necessary + std::string revision = inputRevision.has_value() ? inputRevision.value() : resolveReferenceAndPrepareCache(actualUrl, reference, isLocal, shallow); + + // Lookup revision in cache and return if it is there + auto cacheType = std::string("git") + (shallow ? "-shallow" : "") + (submodules ? "-submodules" : ""); + if (!allowDirty) { + if (auto res = getCache()->lookup(store, Attrs({{"name", name}, {"type", cacheType}, {"url", actualUrl}, {"rev", revision}}))) { + return makeResult(input, std::move(res->second), res->first, revision, shallow, false); + } + } + + // Fetch the correct revision (or dirty if we allow it) + Path tmpDir = createTempDir(); + AutoDelete delTmpDir(tmpDir, true); + auto [repoDir, isDirty] = placeRevisionTreeAtPath(actualUrl, tmpDir, revision, submodules, shallow, isLocal, allowDirty); + + // Collect infoAttrs + Attrs infoAttrs({ + {"rev", revision}, + {"lastModified", (isLocal && revision == getEmptyTreeHash(actualUrl)) + ? 0ull + : std::stoull(runProgram("git", true, {"-C", repoDir, "log", "-1", "--format=%ct", "--no-show-signature", revision}))}, + }); + if (!shallow) { + infoAttrs.insert_or_assign("revCount", std::stoull(runProgram("git", true, {"-C", repoDir, "rev-list", "--count", revision}))); + } + + // Add to store and return + auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, defaultPathFilter); + if (!isDirty) { + getCache()->add(store, Attrs({{"name", name}, {"type", cacheType}, {"url", actualUrl}, {"rev", revision}}), infoAttrs, storePath, true); + } + return makeResult(input, std::move(storePath), infoAttrs, revision, shallow, isDirty); + } }; static auto rGitInputScheme = OnStartup([] { registerInputScheme(std::make_unique()); }); -} +} // namespace nix::fetchers diff --git a/src/nix/flake.md b/src/nix/flake.md index 92f477917fd..06113d426fb 100644 --- a/src/nix/flake.md +++ b/src/nix/flake.md @@ -86,10 +86,16 @@ Here are some examples of flake references in their URL-like representation: * `git+https://github.com/NixOS/patchelf`: A Git repository. * `git+https://github.com/NixOS/patchelf?ref=master`: A specific branch of a Git repository. -* `git+https://github.com/NixOS/patchelf?ref=master&rev=f34751b88bd07d7f44f5cd3200fb4122bf916c7e`: - A specific branch *and* revision of a Git repository. +* `git+https://github.com/NixOS/patchelf?ref=refs/tags/0.18.0`: + A specific tag of a Git repository. +* `git+https://github.com/NixOS/patchelf?rev=f34751b88bd07d7f44f5cd3200fb4122bf916c7e`: + A specific revision/commit of a Git repository. * `https://github.com/NixOS/patchelf/archive/master.tar.gz`: A tarball flake. +* `git+file:///path/to/local/flake`: + A local Git repository at the current working tree +* `git+file:///path/to/local/flake?rev=f34751b88bd07d7f44f5cd3200fb4122bf916c7e`: + A local Git repository at a specific revision/commit ## Path-like syntax @@ -170,28 +176,24 @@ Currently the `type` attribute can be one of the following: They have the URL form ``` - git(+http|+https|+ssh|+git|+file|):(//)?(\?)? + git(+http|+https|+ssh|+file|):(//)?(\?)? ``` The `ref` attribute defaults to resolving the `HEAD` reference. + Has no effect if `rev` is specified. - The `rev` attribute must denote a commit that exists in the branch - or tag specified by the `ref` attribute, since Nix doesn't do a full - clone of the remote repository by default (and the Git protocol - doesn't allow fetching a `rev` without a known `ref`). The default + The `rev` attribute denotes a commit/revision. The default is the commit currently pointed to by `ref`. - When `git+file` is used without specifying `ref` or `rev`, files are - fetched directly from the local `path` as long as they have been added - to the Git repository. If there are uncommitted changes, the reference - is treated as dirty and a warning is printed. + When `git+file` is used without specifying `ref` and `rev` and the repository includes modified tracked files or + staged changes, the dirty state of the repository including will be used and a warning will be printed. For example, the following are valid Git flake references: * `git+https://example.org/my/repo` * `git+https://example.org/my/repo?dir=flake1` * `git+ssh://git@github.com/NixOS/nix?ref=v1.2.3` - * `git://github.com/edolstra/dwarffs?ref=unstable&rev=e486d8d40e626a20e06d792db8cc5ac5aba9a5b4` + * `git://github.com/edolstra/dwarffs&rev=e486d8d40e626a20e06d792db8cc5ac5aba9a5b4` * `git+file:///home/my-user/some-repo/some-repo` * `mercurial`: Mercurial repositories. The URL form is similar to the diff --git a/tests/fetchGit.sh b/tests/fetchGit.sh index 418b4f63fc2..57cb50f02b2 100644 --- a/tests/fetchGit.sh +++ b/tests/fetchGit.sh @@ -49,11 +49,9 @@ git -C $repo add differentbranch git -C $repo commit -m 'Test2' git -C $repo checkout master devrev=$(git -C $repo rev-parse devtest) -out=$(nix eval --impure --raw --expr "builtins.fetchGit { url = file://$repo; rev = \"$devrev\"; }" 2>&1) || status=$? -[[ $status == 1 ]] -[[ $out =~ 'Cannot find Git revision' ]] +out=$(nix eval --impure --raw --expr "builtins.fetchGit { url = file://$repo; rev = \"$devrev\"; }" 2>&1) -[[ $(nix eval --raw --expr "builtins.readFile (builtins.fetchGit { url = file://$repo; rev = \"$devrev\"; allRefs = true; } + \"/differentbranch\")") = 'different file' ]] +[[ $(nix eval --raw --expr "builtins.readFile (builtins.fetchGit { url = file://$repo; rev = \"$devrev\"; } + \"/differentbranch\")") = 'different file' ]] # In pure eval mode, fetchGit without a revision should fail. [[ $(nix eval --impure --raw --expr "builtins.readFile (fetchGit file://$repo + \"/hello\")") = world ]]