diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 37f776b11a8..aa9efb88307 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -89,7 +91,7 @@ typedef std::unique_ptr> ObjectDb; typedef std::unique_ptr> PackBuilder; typedef std::unique_ptr> Indexer; -Hash toHash(const git_oid & oid) +static Hash toHash(const git_oid & oid) { #ifdef GIT_EXPERIMENTAL_SHA256 assert(oid.type == GIT_OID_SHA1); @@ -108,7 +110,7 @@ static void initLibGit2() }); } -git_oid hashToOID(const Hash & hash) +static git_oid hashToOID(const Hash & hash) { git_oid oid; if (git_oid_fromstr(&oid, hash.gitRev().c_str())) @@ -116,7 +118,7 @@ git_oid hashToOID(const Hash & hash) return oid; } -Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY) +static Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY) { Object obj; if (git_object_lookup(Setter(obj), repo, &oid, type)) { @@ -127,7 +129,7 @@ Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t typ } template -T peelObject(git_object * obj, git_object_t type) +static T peelObject(git_object * obj, git_object_t type) { T obj2; if (git_object_peel((git_object **) (typename T::pointer *) Setter(obj2), obj, type)) { @@ -138,7 +140,7 @@ T peelObject(git_object * obj, git_object_t type) } template -T dupObject(typename T::pointer obj) +static T dupObject(typename T::pointer obj) { T obj2; if (git_object_dup((git_object **) (typename T::pointer *) Setter(obj2), (git_object *) obj)) @@ -245,9 +247,15 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this * In-memory object store for efficient batched writing to packfiles. * Owned by `repo`. */ - git_odb_backend * mempack_backend; + git_odb_backend * mempackBackend = nullptr; - GitRepoImpl(std::filesystem::path _path, bool create, bool bare) + /** + * On-disk packfile object store. + * Owned by `repo`. + */ + git_odb_backend * packBackend = nullptr; + + GitRepoImpl(std::filesystem::path _path, bool create, bool bare, bool packfilesOnly = false) : path(std::move(_path)) , bare(bare) { @@ -258,15 +266,39 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this throw Error("opening Git repository %s: %s", path, git_error_last()->message); ObjectDb odb; - if (git_repository_odb(Setter(odb), repo.get())) - throw Error("getting Git object database: %s", git_error_last()->message); + if (packfilesOnly) { + /* Create a fresh object database because by default the repo also + loose object backends. We are not using any of those for the + tarball cache, but libgit2 still does a bunch of unnecessary + syscalls that always fail with ENOENT. NOTE: We are only creating + a libgit2 object here and not modifying the repo. Think of this as + enabling the specific backend. + */ + + if (git_odb_new(Setter(odb))) + throw Error("creating Git object database: %s", git_error_last()->message); + + if (git_odb_backend_pack(&packBackend, (path / "objects").string().c_str())) + throw Error("creating pack backend: %s", git_error_last()->message); + + if (git_odb_add_backend(odb.get(), packBackend, 1)) + throw Error("adding pack backend to Git object database: %s", git_error_last()->message); + } else { + if (git_repository_odb(Setter(odb), repo.get())) + throw Error("getting Git object database: %s", git_error_last()->message); + } // mempack_backend will be owned by the repository, so we are not expected to free it ourselves. - if (git_mempack_new(&mempack_backend)) + if (git_mempack_new(&mempackBackend)) throw Error("creating mempack backend: %s", git_error_last()->message); - if (git_odb_add_backend(odb.get(), mempack_backend, 999)) + if (git_odb_add_backend(odb.get(), mempackBackend, 999)) throw Error("adding mempack backend to Git object database: %s", git_error_last()->message); + + if (packfilesOnly) { + if (git_repository_set_odb(repo.get(), odb.get())) + throw Error("setting Git object database: %s", git_error_last()->message); + } } operator git_repository *() @@ -287,7 +319,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this git_packbuilder_set_threads(packBuilder.get(), 0 /* autodetect */); packBuilderContext.handleException( - "preparing packfile", git_mempack_write_thin_pack(mempack_backend, packBuilder.get())); + "preparing packfile", git_mempack_write_thin_pack(mempackBackend, packBuilder.get())); checkInterrupt(); packBuilderContext.handleException("writing packfile", git_packbuilder_write_buf(&buf, packBuilder.get())); checkInterrupt(); @@ -320,7 +352,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this if (git_indexer_commit(indexer.get(), &stats)) throw Error("committing git packfile index: %s", git_error_last()->message); - if (git_mempack_reset(mempack_backend)) + if (git_mempack_reset(mempackBackend)) throw Error("resetting git mempack backend: %s", git_error_last()->message); checkInterrupt(); @@ -553,27 +585,6 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this ref getFileSystemObjectSink() override; - static int sidebandProgressCallback(const char * str, int len, void * payload) - { - auto act = (Activity *) payload; - act->result(resFetchStatus, trim(std::string_view(str, len))); - return getInterrupted() ? -1 : 0; - } - - static int transferProgressCallback(const git_indexer_progress * stats, void * payload) - { - auto act = (Activity *) payload; - act->result( - resFetchStatus, - fmt("%d/%d objects received, %d/%d deltas indexed, %s", - stats->received_objects, - stats->total_objects, - stats->indexed_deltas, - stats->total_deltas, - renderSize(stats->received_bytes))); - return getInterrupted() ? -1 : 0; - } - void fetch(const std::string & url, const std::string & refspec, bool shallow) override { Activity act(*logger, lvlTalkative, actFetchTree, fmt("fetching Git repository '%s'", url)); @@ -701,9 +712,9 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this } }; -ref GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare) +ref GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare, bool packfilesOnly) { - return make_ref(path, create, bare); + return make_ref(path, create, bare, packfilesOnly); } /** @@ -1052,6 +1063,11 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink std::vector pendingDirs; + /** + * Temporary buffer used by createRegularFile for storing small file contents. + */ + std::string regularFileContentsBuffer; + void pushBuilder(std::string name) { const git_tree_entry * entry; @@ -1133,41 +1149,83 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink if (!prepareDirs(pathComponents, false)) return; - git_writestream * stream = nullptr; - if (git_blob_create_from_stream(&stream, *repo, nullptr)) - throw Error("creating a blob stream object: %s", git_error_last()->message); + using WriteStream = std::unique_ptr<::git_writestream, decltype([](::git_writestream * stream) { + if (stream) + stream->free(stream); + })>; + + /* Maximum file size that gets buffered in memory before flushing to a WriteStream, + that's backed by a temporary objects/streamed_git2_* file. We should avoid that + for common cases, since creating (and deleting) a temporary file for each blob + is insanely expensive. */ + static constexpr std::size_t maxBufferSize = 1024 * 1024; /* 1 MiB */ struct CRF : CreateRegularFileSink { const CanonPath & path; GitFileSystemObjectSinkImpl & back; - git_writestream * stream; + WriteStream stream; + std::string & contents; bool executable = false; - CRF(const CanonPath & path, GitFileSystemObjectSinkImpl & back, git_writestream * stream) + CRF(const CanonPath & path, GitFileSystemObjectSinkImpl & back, std::string & regularFileContentsBuffer) : path(path) , back(back) - , stream(stream) + , stream(nullptr) + , contents(regularFileContentsBuffer) { + contents.clear(); } - void operator()(std::string_view data) override + void writeToStream(std::string_view data) { - if (stream->write(stream, data.data(), data.size())) + /* Lazily create the stream. */ + if (!stream) { + ::git_writestream * stream2 = nullptr; + if (git_blob_create_from_stream(&stream2, *back.repo, nullptr)) + throw Error("creating a blob stream object: %s", git_error_last()->message); + stream = WriteStream{stream2}; + assert(stream); + } + + if (stream->write(stream.get(), data.data(), data.size())) throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message); } + void operator()(std::string_view data) override + { + /* Already in slow path. Just write to the slow stream. */ + if (stream) { + writeToStream(data); + return; + } + + contents += data; + if (contents.size() > maxBufferSize) { + writeToStream(contents); /* Will initialize stream. */ + contents.clear(); + } + } + void isExecutable() override { executable = true; } - } crf{path, *this, stream}; + } crf{path, *this, regularFileContentsBuffer}; func(crf); git_oid oid; - if (git_blob_create_from_stream_commit(&oid, stream)) - throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message); + if (crf.stream) { + /* Call .release(), since git_blob_create_from_stream_commit + acquires ownership and frees the stream. */ + if (git_blob_create_from_stream_commit(&oid, crf.stream.release())) + throw Error("creating a blob object for '%s': %s", path, git_error_last()->message); + } else { + if (git_blob_create_from_buffer(&oid, *repo, crf.contents.data(), crf.contents.size())) + throw Error( + "creating a blob object for '%s' from in-memory buffer: %s", path, git_error_last()->message); + } addToTree(*pathComponents.rbegin(), oid, crf.executable ? GIT_FILEMODE_BLOB_EXECUTABLE : GIT_FILEMODE_BLOB); } @@ -1335,7 +1393,7 @@ namespace fetchers { ref Settings::getTarballCache() const { static auto repoDir = std::filesystem::path(getCacheDir()) / "tarball-cache"; - return GitRepo::openRepo(repoDir, true, true); + return GitRepo::openRepo(repoDir, /*create=*/true, /*bare=*/true, /*packfilesOnly=*/true); } } // namespace fetchers diff --git a/src/libfetchers/include/nix/fetchers/git-utils.hh b/src/libfetchers/include/nix/fetchers/git-utils.hh index 8ed48a7b58b..5c79f256e86 100644 --- a/src/libfetchers/include/nix/fetchers/git-utils.hh +++ b/src/libfetchers/include/nix/fetchers/git-utils.hh @@ -32,7 +32,8 @@ struct GitRepo { virtual ~GitRepo() {} - static ref openRepo(const std::filesystem::path & path, bool create = false, bool bare = false); + static ref + openRepo(const std::filesystem::path & path, bool create = false, bool bare = false, bool packfilesOnly = false); virtual uint64_t getRevCount(const Hash & rev) = 0;