From 656e1fc659277e661094b96bea72625070e30f0f Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sun, 18 Jan 2026 01:34:27 +0300 Subject: [PATCH 1/4] nar-accessor: Fix thread safety of seekableGetNarBytes, use Sink Instead of mutating the file pointer we can instead safely do preads. That makes the local-nar-info cache once again thread safe without the overhead of reopening the file that we used to have prior to b9b6defca693e6413668647d216e3aa3c90a7465 which broke the thread safety by persisting the file descriptor. --- src/libutil/include/nix/util/nar-accessor.hh | 2 +- src/libutil/nar-accessor.cc | 58 +++++++++++++++----- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/src/libutil/include/nix/util/nar-accessor.hh b/src/libutil/include/nix/util/nar-accessor.hh index fa20497a851..d01302e901e 100644 --- a/src/libutil/include/nix/util/nar-accessor.hh +++ b/src/libutil/include/nix/util/nar-accessor.hh @@ -35,7 +35,7 @@ ref makeNarAccessor(Source & source); * readFile() method of the accessor to get the contents of files * inside the NAR. */ -using GetNarBytes = std::function; +using GetNarBytes = std::function; /** * The canonical GetNarBytes function for a seekable Source. diff --git a/src/libutil/nar-accessor.cc b/src/libutil/nar-accessor.cc index 10b222f7345..45eeeb6d33a 100644 --- a/src/libutil/nar-accessor.cc +++ b/src/libutil/nar-accessor.cc @@ -1,6 +1,7 @@ #include "nix/util/nar-accessor.hh" #include "nix/util/file-descriptor.hh" #include "nix/util/error.hh" +#include "nix/util/signals.hh" namespace nix { @@ -20,8 +21,16 @@ struct NarAccessorImpl : NarAccessor StringSource source(nar); return parseNarListing(source); }()} - , getNarBytes{ - [nar = std::move(nar)](uint64_t offset, uint64_t length) { return std::string{nar, offset, length}; }} + , getNarBytes{[nar = std::move(nar)](uint64_t offset, uint64_t length, Sink & sink) { + if (offset > nar.size() || length > nar.size() - offset) + throw Error( + "reading invalid NAR bytes range: requested %1% bytes at offset %2%, but NAR has size %3%", + length, + offset, + nar.size()); + StringSource source(std::string_view(nar.data() + offset, length)); + source.drainInto(sink); + }} { } @@ -111,7 +120,7 @@ struct NarAccessorImpl : NarAccessor return res; } - std::string readFile(const CanonPath & path) override + void readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) override { auto & i = get(path); auto * reg = std::get_if(&i.raw); @@ -119,7 +128,8 @@ struct NarAccessorImpl : NarAccessor throw Error("path '%1%' inside NAR file is not a regular file", path); assert(getNarBytes); - return getNarBytes(*reg->contents.narOffset, *reg->contents.fileSize); + sizeCallback(reg->contents.fileSize.value()); + return getNarBytes(reg->contents.narOffset.value(), reg->contents.fileSize.value(), sink); } std::string readLink(const CanonPath & path) override @@ -159,19 +169,41 @@ GetNarBytes seekableGetNarBytes(const std::filesystem::path & path) throw NativeSysError("opening NAR cache file %s", path); return [inner = seekableGetNarBytes(fd.get()), fd = make_ref(std::move(fd))]( - uint64_t offset, uint64_t length) { return inner(offset, length); }; + uint64_t offset, uint64_t length, Sink & sink) { return inner(offset, length, sink); }; } GetNarBytes seekableGetNarBytes(Descriptor fd) { - return [fd](uint64_t offset, uint64_t length) { - if (lseek(fd, offset, SEEK_SET) == -1) - throw SysError("seeking in file"); - - std::string buf(length, 0); - readFull(fd, buf.data(), length); - - return buf; + return [fd](uint64_t offset_, uint64_t left, Sink & sink) { + std::array buf; + + off_t offset = offset_; + + while (left) { + checkInterrupt(); + auto limit = std::min(left, buf.size()); +#ifdef _WIN32 + OVERLAPPED ov = {}; + ov.Offset = static_cast(offset); + ov.OffsetHigh = static_cast(offset >> 32); + DWORD n; + if (!ReadFile(fd, buf.data(), static_cast(limit), &n, &ov)) + throw nix::windows::WinError("reading %1% NAR bytes at offset %2%", left, offset); +#else + ssize_t n = pread(fd, buf.data(), limit, offset); + if (n == -1) { + if (errno == EINTR) + continue; + throw SysError("reading %1% NAR bytes at offset %2%", left, offset); + } +#endif + if (n == 0) + throw EndOfFile("unexpected end-of-file"); + assert(static_cast(n) <= left); + sink(std::string_view(buf.data(), n)); + offset += n; + left -= n; + } }; } From d25ab60d6611b8148946c5c8ba5569e1170d6e78 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sun, 18 Jan 2026 02:51:04 +0300 Subject: [PATCH 2/4] nix/cat: Use streaming version of readFile This reduces the memory overhead of nix store cat down to constant memory with a local-nar-cache. --- src/nix/cat.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nix/cat.cc b/src/nix/cat.cc index 5b2a309a6ef..83dabdb7677 100644 --- a/src/nix/cat.cc +++ b/src/nix/cat.cc @@ -18,7 +18,9 @@ struct MixCat : virtual Args throw Error("path '%1%' is not a regular file", path.abs()); logger->stop(); - writeFull(getStandardOutput(), accessor->readFile(path)); + FdSink output{getStandardOutput()}; + accessor->readFile(path, output); + output.flush(); } }; From 6ba468805ba926f01aba26912252dfda414d42fa Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sun, 18 Jan 2026 23:14:34 +0300 Subject: [PATCH 3/4] libutil: Factor out copyFdRange This factors out the helper function from seekableGetNarBytes into copyFdRange and adds some more sanity checks for offset/length truncation/wrapping at that API boundary where we work with NAR-style offsets and convert to native off_t. --- .../include/nix/util/file-descriptor.hh | 9 +++++ src/libutil/nar-accessor.cc | 36 ++++--------------- src/libutil/unix/file-descriptor.cc | 23 ++++++++++++ src/libutil/windows/file-descriptor.cc | 24 +++++++++++++ 4 files changed, 62 insertions(+), 30 deletions(-) diff --git a/src/libutil/include/nix/util/file-descriptor.hh b/src/libutil/include/nix/util/file-descriptor.hh index cfb0fb8ee13..a6b2c64fa0a 100644 --- a/src/libutil/include/nix/util/file-descriptor.hh +++ b/src/libutil/include/nix/util/file-descriptor.hh @@ -67,6 +67,15 @@ static inline int fromDescriptorReadOnly(Descriptor fd) */ std::string readFile(Descriptor fd); +/** + * Read \ref nbytes starting at \ref offset from a seekable file into a sink. + * + * @throws SystemError if fd is not seekable or any operation fails + * @throws Interrupted if the operation was interrupted + * @throws EndOfFile if an EOF was reached before reading \ref nbytes + */ +void copyFdRange(Descriptor fd, off_t offset, size_t nbytes, Sink & sink); + /** * Wrappers around read()/write() that read/write exactly the * requested number of bytes. diff --git a/src/libutil/nar-accessor.cc b/src/libutil/nar-accessor.cc index 45eeeb6d33a..abdb1671e00 100644 --- a/src/libutil/nar-accessor.cc +++ b/src/libutil/nar-accessor.cc @@ -174,36 +174,12 @@ GetNarBytes seekableGetNarBytes(const std::filesystem::path & path) GetNarBytes seekableGetNarBytes(Descriptor fd) { - return [fd](uint64_t offset_, uint64_t left, Sink & sink) { - std::array buf; - - off_t offset = offset_; - - while (left) { - checkInterrupt(); - auto limit = std::min(left, buf.size()); -#ifdef _WIN32 - OVERLAPPED ov = {}; - ov.Offset = static_cast(offset); - ov.OffsetHigh = static_cast(offset >> 32); - DWORD n; - if (!ReadFile(fd, buf.data(), static_cast(limit), &n, &ov)) - throw nix::windows::WinError("reading %1% NAR bytes at offset %2%", left, offset); -#else - ssize_t n = pread(fd, buf.data(), limit, offset); - if (n == -1) { - if (errno == EINTR) - continue; - throw SysError("reading %1% NAR bytes at offset %2%", left, offset); - } -#endif - if (n == 0) - throw EndOfFile("unexpected end-of-file"); - assert(static_cast(n) <= left); - sink(std::string_view(buf.data(), n)); - offset += n; - left -= n; - } + return [fd](uint64_t offset, uint64_t length, Sink & sink) { + if (offset >= std::numeric_limits::max()) /* Just in case off_t is not 64 bits. */ + throw Error("can't read %1% NAR bytes from offset %2%: offset too big", length, offset); + if (length >= std::numeric_limits::max()) /* Just in case size_t is 32 bits. */ + throw Error("can't read %1% NAR bytes from offset %2%: length is too big", length, offset); + copyFdRange(fd, static_cast(offset), static_cast(length), sink); }; } diff --git a/src/libutil/unix/file-descriptor.cc b/src/libutil/unix/file-descriptor.cc index ef26880df42..4296da6c088 100644 --- a/src/libutil/unix/file-descriptor.cc +++ b/src/libutil/unix/file-descriptor.cc @@ -166,6 +166,29 @@ void drainFD(int fd, Sink & sink, bool block) } } +void copyFdRange(Descriptor fd, off_t offset, size_t nbytes, Sink & sink) +{ + auto left = nbytes; + std::array buf; + + while (left) { + checkInterrupt(); + auto limit = std::min(left, buf.size()); + ssize_t n = pread(fd, buf.data(), limit, offset); + if (n == -1) { + if (errno == EINTR) + continue; + throw SysError("pread of %1% bytes at offset %2%", left, offset); + } + if (n == 0) + throw EndOfFile("unexpected end-of-file"); + assert(static_cast(n) <= left); + sink(std::string_view(buf.data(), n)); + offset += n; + left -= n; + } +} + ////////////////////////////////////////////////////////////////////// void Pipe::create() diff --git a/src/libutil/windows/file-descriptor.cc b/src/libutil/windows/file-descriptor.cc index cf4bccc1124..eeed38d8216 100644 --- a/src/libutil/windows/file-descriptor.cc +++ b/src/libutil/windows/file-descriptor.cc @@ -99,6 +99,30 @@ void drainFD(HANDLE handle, Sink & sink /*, bool block*/) } } +void copyFdRange(Descriptor fd, off_t offset, size_t nbytes, Sink & sink) +{ + auto left = nbytes; + std::array buf; + + while (left) { + checkInterrupt(); + auto limit = std::min(left, buf.size()); + OVERLAPPED ov = {}; + ov.Offset = static_cast(offset); + if constexpr (sizeof(offset) > 4) /* We don't build with 32 bit off_t, but let's be safe. */ + ov.OffsetHigh = static_cast(offset >> 32); + DWORD n; + if (!ReadFile(fd, buf.data(), static_cast(limit), &n, &ov)) + throw nix::windows::WinError("ReadFile of %1% bytes at offset %2%", left, offset); + if (n == 0) + throw EndOfFile("unexpected end-of-file"); + assert(static_cast(n) <= left); + sink(std::string_view(buf.data(), n)); + offset += n; + left -= n; + } +} + ////////////////////////////////////////////////////////////////////// void Pipe::create() From 4db68c28c1d01b48fa6860f9c3763cac2bda683b Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Mon, 19 Jan 2026 00:49:41 +0300 Subject: [PATCH 4/4] treewide: Add missing overrides of streaming readFile, make readFile non-virtual This makes all addToStore operations that use these source accessors constant memory regardless of file sizes. Also make the other overload altogether and relegate it to the base class as a non-virtual method to avoid such mistakes. --- src/libfetchers/filtering-source-accessor.cc | 6 ------ src/libfetchers/git-utils.cc | 20 +++++++++++++------ .../nix/fetchers/filtering-source-accessor.hh | 2 +- src/libstore/dummy-store.cc | 6 ------ .../include/nix/store/remote-fs-accessor.hh | 4 +++- src/libstore/remote-fs-accessor.cc | 4 ++-- .../nix/util/memory-source-accessor.hh | 4 +++- .../include/nix/util/posix-source-accessor.hh | 2 ++ .../include/nix/util/source-accessor.hh | 2 +- src/libutil/memory-source-accessor.cc | 10 ++++++---- src/libutil/mounted-source-accessor.cc | 4 ++-- src/libutil/union-source-accessor.cc | 8 +++++--- 12 files changed, 39 insertions(+), 33 deletions(-) diff --git a/src/libfetchers/filtering-source-accessor.cc b/src/libfetchers/filtering-source-accessor.cc index 8f1b50eb937..29f0be3080e 100644 --- a/src/libfetchers/filtering-source-accessor.cc +++ b/src/libfetchers/filtering-source-accessor.cc @@ -10,12 +10,6 @@ std::optional FilteringSourceAccessor::getPhysicalPath(co return next->getPhysicalPath(prefix / path); } -std::string FilteringSourceAccessor::readFile(const CanonPath & path) -{ - checkAccess(path); - return next->readFile(prefix / path); -} - void FilteringSourceAccessor::readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) { checkAccess(path); diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 4ad90a63689..9f799c77e1b 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -766,7 +766,7 @@ struct GitSourceAccessor : SourceAccessor { } - std::string readBlob(const CanonPath & path, bool symlink) + void readBlob(const CanonPath & path, bool symlink, Sink & sink, std::function sizeCallback) { auto state(state_.lock()); @@ -785,16 +785,22 @@ struct GitSourceAccessor : SourceAccessor e.addTrace({}, "while smudging git-lfs file '%s'", path); throw; } - return s.s; + sizeCallback(s.s.size()); + StringSource source{s.s}; + source.drainInto(sink); + return; } } - return std::string((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get())); + auto view = std::string_view((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get())); + sizeCallback(view.size()); + StringSource source{view}; + source.drainInto(sink); } - std::string readFile(const CanonPath & path) override + void readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) override { - return readBlob(path, false); + return readBlob(path, false, sink, sizeCallback); } bool pathExists(const CanonPath & path) override @@ -861,7 +867,9 @@ struct GitSourceAccessor : SourceAccessor std::string readLink(const CanonPath & path) override { - return readBlob(path, true); + StringSink s; + readBlob(path, true, s, [&](uint64_t size) { s.s.reserve(size); }); + return std::move(s.s); } /** diff --git a/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh b/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh index 5e98caa5816..396a184fe72 100644 --- a/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh +++ b/src/libfetchers/include/nix/fetchers/filtering-source-accessor.hh @@ -34,7 +34,7 @@ struct FilteringSourceAccessor : SourceAccessor std::optional getPhysicalPath(const CanonPath & path) override; - std::string readFile(const CanonPath & path) override; + using SourceAccessor::readFile; void readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) override; diff --git a/src/libstore/dummy-store.cc b/src/libstore/dummy-store.cc index 375ed7b2d24..ba42c773bd5 100644 --- a/src/libstore/dummy-store.cc +++ b/src/libstore/dummy-store.cc @@ -80,12 +80,6 @@ class WholeStoreViewAccessor : public SourceAccessor subdirs.emplace(baseName, std::move(accessor)); } - std::string readFile(const CanonPath & path) override - { - return callWithAccessorForPath( - path, [](SourceAccessor & accessor, const CanonPath & path) { return accessor.readFile(path); }); - } - void readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) override { return callWithAccessorForPath(path, [&](SourceAccessor & accessor, const CanonPath & path) { diff --git a/src/libstore/include/nix/store/remote-fs-accessor.hh b/src/libstore/include/nix/store/remote-fs-accessor.hh index 479a7393207..ef2882e6d99 100644 --- a/src/libstore/include/nix/store/remote-fs-accessor.hh +++ b/src/libstore/include/nix/store/remote-fs-accessor.hh @@ -45,7 +45,9 @@ public: DirEntries readDirectory(const CanonPath & path) override; - std::string readFile(const CanonPath & path) override; + void readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) override; + + using SourceAccessor::readFile; std::string readLink(const CanonPath & path) override; }; diff --git a/src/libstore/remote-fs-accessor.cc b/src/libstore/remote-fs-accessor.cc index 7297723e16b..5c46912121d 100644 --- a/src/libstore/remote-fs-accessor.cc +++ b/src/libstore/remote-fs-accessor.cc @@ -108,10 +108,10 @@ SourceAccessor::DirEntries RemoteFSAccessor::readDirectory(const CanonPath & pat return res.first->readDirectory(res.second); } -std::string RemoteFSAccessor::readFile(const CanonPath & path) +void RemoteFSAccessor::readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) { auto res = fetch(path); - return res.first->readFile(res.second); + res.first->readFile(res.second, sink, sizeCallback); } std::string RemoteFSAccessor::readLink(const CanonPath & path) diff --git a/src/libutil/include/nix/util/memory-source-accessor.hh b/src/libutil/include/nix/util/memory-source-accessor.hh index fc00f34d9c0..2fe3e7dfb8c 100644 --- a/src/libutil/include/nix/util/memory-source-accessor.hh +++ b/src/libutil/include/nix/util/memory-source-accessor.hh @@ -119,7 +119,9 @@ struct MemorySourceAccessor : virtual SourceAccessor return root < other.root; } - std::string readFile(const CanonPath & path) override; + void readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) override; + using SourceAccessor::readFile; + bool pathExists(const CanonPath & path) override; std::optional maybeLstat(const CanonPath & path) override; DirEntries readDirectory(const CanonPath & path) override; diff --git a/src/libutil/include/nix/util/posix-source-accessor.hh b/src/libutil/include/nix/util/posix-source-accessor.hh index 29561a3daaf..2edfd8f3cb0 100644 --- a/src/libutil/include/nix/util/posix-source-accessor.hh +++ b/src/libutil/include/nix/util/posix-source-accessor.hh @@ -33,6 +33,8 @@ public: void readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) override; + using SourceAccessor::readFile; + bool pathExists(const CanonPath & path) override; std::optional maybeLstat(const CanonPath & path) override; diff --git a/src/libutil/include/nix/util/source-accessor.hh b/src/libutil/include/nix/util/source-accessor.hh index 808fcf622b6..ee09b182b7b 100644 --- a/src/libutil/include/nix/util/source-accessor.hh +++ b/src/libutil/include/nix/util/source-accessor.hh @@ -62,7 +62,7 @@ struct SourceAccessor : std::enable_shared_from_this * targets of symlinks should only occasionally be done, and only * with care. */ - virtual std::string readFile(const CanonPath & path); + std::string readFile(const CanonPath & path); /** * Write the contents of a file as a sink. `sizeCallback` must be diff --git a/src/libutil/memory-source-accessor.cc b/src/libutil/memory-source-accessor.cc index fee0454fda5..5b9708dc2b8 100644 --- a/src/libutil/memory-source-accessor.cc +++ b/src/libutil/memory-source-accessor.cc @@ -53,14 +53,16 @@ MemorySourceAccessor::File * MemorySourceAccessor::open(const CanonPath & path, return cur; } -std::string MemorySourceAccessor::readFile(const CanonPath & path) +void MemorySourceAccessor::readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) { auto * f = open(path, std::nullopt); if (!f) throw FileNotFound("file '%s' does not exist", showPath(path)); - if (auto * r = std::get_if(&f->raw)) - return r->contents; - else + if (auto * r = std::get_if(&f->raw)) { + sizeCallback(r->contents.size()); + StringSource source{r->contents}; + source.drainInto(sink); + } else throw NotARegularFile("file '%s' is not a regular file", showPath(path)); } diff --git a/src/libutil/mounted-source-accessor.cc b/src/libutil/mounted-source-accessor.cc index d9398045cc5..4122fc78c12 100644 --- a/src/libutil/mounted-source-accessor.cc +++ b/src/libutil/mounted-source-accessor.cc @@ -21,10 +21,10 @@ struct MountedSourceAccessorImpl : MountedSourceAccessor // FIXME: return dummy parent directories automatically? } - std::string readFile(const CanonPath & path) override + void readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) override { auto [accessor, subpath] = resolve(path); - return accessor->readFile(subpath); + return accessor->readFile(subpath, sink, sizeCallback); } Stat lstat(const CanonPath & path) override diff --git a/src/libutil/union-source-accessor.cc b/src/libutil/union-source-accessor.cc index ca239563c66..d19400303e2 100644 --- a/src/libutil/union-source-accessor.cc +++ b/src/libutil/union-source-accessor.cc @@ -12,12 +12,14 @@ struct UnionSourceAccessor : SourceAccessor displayPrefix.clear(); } - std::string readFile(const CanonPath & path) override + void readFile(const CanonPath & path, Sink & sink, std::function sizeCallback) override { for (auto & accessor : accessors) { auto st = accessor->maybeLstat(path); - if (st) - return accessor->readFile(path); + if (st) { + accessor->readFile(path, sink, sizeCallback); + return; + } } throw FileNotFound("path '%s' does not exist", showPath(path)); }