Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 106 additions & 48 deletions src/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@
#include <git2/indexer.h>
#include <git2/object.h>
#include <git2/odb.h>
#include <git2/odb_backend.h>
#include <git2/refs.h>
#include <git2/remote.h>
#include <git2/repository.h>
#include <git2/revparse.h>
#include <git2/status.h>
#include <git2/submodule.h>
#include <git2/sys/odb_backend.h>
#include <git2/sys/repository.h>
#include <git2/sys/mempack.h>
#include <git2/tag.h>
#include <git2/tree.h>
Expand Down Expand Up @@ -89,7 +91,7 @@ typedef std::unique_ptr<git_odb, Deleter<git_odb_free>> ObjectDb;
typedef std::unique_ptr<git_packbuilder, Deleter<git_packbuilder_free>> PackBuilder;
typedef std::unique_ptr<git_indexer, Deleter<git_indexer_free>> Indexer;

Hash toHash(const git_oid & oid)
static Hash toHash(const git_oid & oid)
{
#ifdef GIT_EXPERIMENTAL_SHA256
assert(oid.type == GIT_OID_SHA1);
Expand All @@ -108,15 +110,15 @@ static void initLibGit2()
});
}

git_oid hashToOID(const Hash & hash)
static git_oid hashToOID(const Hash & hash)
{
git_oid oid;
if (git_oid_fromstr(&oid, hash.gitRev().c_str()))
throw Error("cannot convert '%s' to a Git OID", hash.gitRev());
return oid;
}

Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY)
static Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY)
{
Object obj;
if (git_object_lookup(Setter(obj), repo, &oid, type)) {
Expand All @@ -127,7 +129,7 @@ Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t typ
}

template<typename T>
T peelObject(git_object * obj, git_object_t type)
static T peelObject(git_object * obj, git_object_t type)
{
T obj2;
if (git_object_peel((git_object **) (typename T::pointer *) Setter(obj2), obj, type)) {
Expand All @@ -138,7 +140,7 @@ T peelObject(git_object * obj, git_object_t type)
}

template<typename T>
T dupObject(typename T::pointer obj)
static T dupObject(typename T::pointer obj)
{
T obj2;
if (git_object_dup((git_object **) (typename T::pointer *) Setter(obj2), (git_object *) obj))
Expand Down Expand Up @@ -245,9 +247,15 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
* In-memory object store for efficient batched writing to packfiles.
* Owned by `repo`.
*/
git_odb_backend * mempack_backend;
git_odb_backend * mempackBackend = nullptr;

GitRepoImpl(std::filesystem::path _path, bool create, bool bare)
/**
* On-disk packfile object store.
* Owned by `repo`.
*/
git_odb_backend * packBackend = nullptr;

GitRepoImpl(std::filesystem::path _path, bool create, bool bare, bool packfilesOnly = false)
: path(std::move(_path))
, bare(bare)
{
Expand All @@ -258,15 +266,39 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
throw Error("opening Git repository %s: %s", path, git_error_last()->message);

ObjectDb odb;
if (git_repository_odb(Setter(odb), repo.get()))
throw Error("getting Git object database: %s", git_error_last()->message);
if (packfilesOnly) {
/* Create a fresh object database because by default the repo also
loose object backends. We are not using any of those for the
tarball cache, but libgit2 still does a bunch of unnecessary
syscalls that always fail with ENOENT. NOTE: We are only creating
a libgit2 object here and not modifying the repo. Think of this as
enabling the specific backend.
*/

if (git_odb_new(Setter(odb)))
throw Error("creating Git object database: %s", git_error_last()->message);

if (git_odb_backend_pack(&packBackend, (path / "objects").string().c_str()))
throw Error("creating pack backend: %s", git_error_last()->message);

if (git_odb_add_backend(odb.get(), packBackend, 1))
throw Error("adding pack backend to Git object database: %s", git_error_last()->message);
} else {
if (git_repository_odb(Setter(odb), repo.get()))
throw Error("getting Git object database: %s", git_error_last()->message);
}

// mempack_backend will be owned by the repository, so we are not expected to free it ourselves.
if (git_mempack_new(&mempack_backend))
if (git_mempack_new(&mempackBackend))
throw Error("creating mempack backend: %s", git_error_last()->message);

if (git_odb_add_backend(odb.get(), mempack_backend, 999))
if (git_odb_add_backend(odb.get(), mempackBackend, 999))
throw Error("adding mempack backend to Git object database: %s", git_error_last()->message);

if (packfilesOnly) {
if (git_repository_set_odb(repo.get(), odb.get()))
throw Error("setting Git object database: %s", git_error_last()->message);
}
}

operator git_repository *()
Expand All @@ -287,7 +319,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
git_packbuilder_set_threads(packBuilder.get(), 0 /* autodetect */);

packBuilderContext.handleException(
"preparing packfile", git_mempack_write_thin_pack(mempack_backend, packBuilder.get()));
"preparing packfile", git_mempack_write_thin_pack(mempackBackend, packBuilder.get()));
checkInterrupt();
packBuilderContext.handleException("writing packfile", git_packbuilder_write_buf(&buf, packBuilder.get()));
checkInterrupt();
Expand Down Expand Up @@ -320,7 +352,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
if (git_indexer_commit(indexer.get(), &stats))
throw Error("committing git packfile index: %s", git_error_last()->message);

if (git_mempack_reset(mempack_backend))
if (git_mempack_reset(mempackBackend))
throw Error("resetting git mempack backend: %s", git_error_last()->message);

checkInterrupt();
Expand Down Expand Up @@ -553,27 +585,6 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>

ref<GitFileSystemObjectSink> getFileSystemObjectSink() override;

static int sidebandProgressCallback(const char * str, int len, void * payload)
{
auto act = (Activity *) payload;
act->result(resFetchStatus, trim(std::string_view(str, len)));
return getInterrupted() ? -1 : 0;
}

static int transferProgressCallback(const git_indexer_progress * stats, void * payload)
{
auto act = (Activity *) payload;
act->result(
resFetchStatus,
fmt("%d/%d objects received, %d/%d deltas indexed, %s",
stats->received_objects,
stats->total_objects,
stats->indexed_deltas,
stats->total_deltas,
renderSize(stats->received_bytes)));
return getInterrupted() ? -1 : 0;
}

void fetch(const std::string & url, const std::string & refspec, bool shallow) override
{
Activity act(*logger, lvlTalkative, actFetchTree, fmt("fetching Git repository '%s'", url));
Expand Down Expand Up @@ -701,9 +712,9 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
}
};

ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare)
ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare, bool packfilesOnly)
{
return make_ref<GitRepoImpl>(path, create, bare);
return make_ref<GitRepoImpl>(path, create, bare, packfilesOnly);
}

/**
Expand Down Expand Up @@ -1052,6 +1063,11 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink

std::vector<PendingDir> pendingDirs;

/**
* Temporary buffer used by createRegularFile for storing small file contents.
*/
std::string regularFileContentsBuffer;

void pushBuilder(std::string name)
{
const git_tree_entry * entry;
Expand Down Expand Up @@ -1133,41 +1149,83 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
if (!prepareDirs(pathComponents, false))
return;

git_writestream * stream = nullptr;
if (git_blob_create_from_stream(&stream, *repo, nullptr))
throw Error("creating a blob stream object: %s", git_error_last()->message);
using WriteStream = std::unique_ptr<::git_writestream, decltype([](::git_writestream * stream) {
if (stream)
stream->free(stream);
})>;

/* Maximum file size that gets buffered in memory before flushing to a WriteStream,
that's backed by a temporary objects/streamed_git2_* file. We should avoid that
for common cases, since creating (and deleting) a temporary file for each blob
is insanely expensive. */
static constexpr std::size_t maxBufferSize = 1024 * 1024; /* 1 MiB */

struct CRF : CreateRegularFileSink
{
const CanonPath & path;
GitFileSystemObjectSinkImpl & back;
git_writestream * stream;
WriteStream stream;
std::string & contents;
bool executable = false;

CRF(const CanonPath & path, GitFileSystemObjectSinkImpl & back, git_writestream * stream)
CRF(const CanonPath & path, GitFileSystemObjectSinkImpl & back, std::string & regularFileContentsBuffer)
: path(path)
, back(back)
, stream(stream)
, stream(nullptr)
, contents(regularFileContentsBuffer)
{
contents.clear();
}

void operator()(std::string_view data) override
void writeToStream(std::string_view data)
{
if (stream->write(stream, data.data(), data.size()))
/* Lazily create the stream. */
if (!stream) {
::git_writestream * stream2 = nullptr;
if (git_blob_create_from_stream(&stream2, *back.repo, nullptr))
throw Error("creating a blob stream object: %s", git_error_last()->message);
stream = WriteStream{stream2};
assert(stream);
}

if (stream->write(stream.get(), data.data(), data.size()))
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
}

void operator()(std::string_view data) override
{
/* Already in slow path. Just write to the slow stream. */
if (stream) {
writeToStream(data);
return;
}

contents += data;
if (contents.size() > maxBufferSize) {
writeToStream(contents); /* Will initialize stream. */
contents.clear();
}
}

void isExecutable() override
{
executable = true;
}
} crf{path, *this, stream};
} crf{path, *this, regularFileContentsBuffer};

func(crf);

git_oid oid;
if (git_blob_create_from_stream_commit(&oid, stream))
throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message);
if (crf.stream) {
/* Call .release(), since git_blob_create_from_stream_commit
acquires ownership and frees the stream. */
if (git_blob_create_from_stream_commit(&oid, crf.stream.release()))
throw Error("creating a blob object for '%s': %s", path, git_error_last()->message);
} else {
if (git_blob_create_from_buffer(&oid, *repo, crf.contents.data(), crf.contents.size()))
throw Error(
"creating a blob object for '%s' from in-memory buffer: %s", path, git_error_last()->message);
}

addToTree(*pathComponents.rbegin(), oid, crf.executable ? GIT_FILEMODE_BLOB_EXECUTABLE : GIT_FILEMODE_BLOB);
}
Expand Down Expand Up @@ -1335,7 +1393,7 @@ namespace fetchers {
ref<GitRepo> Settings::getTarballCache() const
{
static auto repoDir = std::filesystem::path(getCacheDir()) / "tarball-cache";
return GitRepo::openRepo(repoDir, true, true);
return GitRepo::openRepo(repoDir, /*create=*/true, /*bare=*/true, /*packfilesOnly=*/true);
}

} // namespace fetchers
Expand Down
3 changes: 2 additions & 1 deletion src/libfetchers/include/nix/fetchers/git-utils.hh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ struct GitRepo
{
virtual ~GitRepo() {}

static ref<GitRepo> openRepo(const std::filesystem::path & path, bool create = false, bool bare = false);
static ref<GitRepo>
openRepo(const std::filesystem::path & path, bool create = false, bool bare = false, bool packfilesOnly = false);

virtual uint64_t getRevCount(const Hash & rev) = 0;

Expand Down
Loading