Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions src/libfetchers/cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,14 @@ struct CacheImpl : Cache
const Attrs & inAttrs) override
{
if (auto res = lookupExpired(store, inAttrs)) {
if (!res->expired)
if (!res->expired && res->storePathValid)
return std::make_pair(std::move(res->infoAttrs), std::move(res->storePath));
debug("ignoring expired cache entry '%s'",
attrsToJSON(inAttrs).dump());
if (!res->storePathValid) {
auto inAttrsJSON = attrsToJSON(inAttrs).dump();
debug("ignoring disappeared cache entry '%s'", inAttrsJSON);
} else
debug("ignoring expired cache entry '%s'",
attrsToJSON(inAttrs).dump());
}
return {};
}
Expand All @@ -153,19 +157,16 @@ struct CacheImpl : Cache
auto timestamp = stmt.getInt(3);

store.addTempRoot(storePath);
if (!store.isValidPath(storePath)) {
// FIXME: we could try to substitute 'storePath'.
debug("ignoring disappeared cache entry '%s'", inAttrsJSON);
return {};
}
auto storePathValid = store.isValidPath(storePath);

debug("using cache entry '%s' -> '%s', '%s'",
inAttrsJSON, infoJSON, store.printStorePath(storePath));
debug("using cache entry '%s' -> '%s', '%s', is valid: %s",
inAttrsJSON, infoJSON, store.printStorePath(storePath), storePathValid);

return Result {
.expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)),
.storePathValid = storePathValid,
.infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJSON)),
.storePath = std::move(storePath)
.storePath = std::move(storePath),
};
}
};
Expand Down
1 change: 1 addition & 0 deletions src/libfetchers/cache.hh
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ struct Cache
struct Result
{
bool expired = false;
bool storePathValid;
Attrs infoAttrs;
StorePath storePath;
};
Expand Down
24 changes: 18 additions & 6 deletions src/libfetchers/tarball.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ DownloadFileResult downloadFile(
const std::string & url,
const std::string & name,
bool locked,
const Headers & headers)
const Headers & headers,
bool onlyIfChanged)
{
// FIXME: check store

Expand All @@ -32,25 +33,27 @@ DownloadFileResult downloadFile(
auto useCached = [&]() -> DownloadFileResult
{
return {
.storePathValid = cached->storePathValid,
.storePath = std::move(cached->storePath),
.etag = getStrAttr(cached->infoAttrs, "etag"),
.effectiveUrl = getStrAttr(cached->infoAttrs, "url"),
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
};
};

if (cached && !cached->expired)
bool storePathUseable = cached && (cached->storePathValid || onlyIfChanged);
if (storePathUseable && !cached->expired)
return useCached();

FileTransferRequest request(url);
request.headers = headers;
if (cached)
if (storePathUseable)
request.expectedETag = getStrAttr(cached->infoAttrs, "etag");
FileTransferResult res;
try {
res = getFileTransfer()->download(request);
} catch (FileTransferError & e) {
if (cached) {
if (storePathUseable) {
warn("%s; using cached version", e.msg());
return useCached();
} else
Expand All @@ -69,7 +72,7 @@ DownloadFileResult downloadFile(
std::optional<StorePath> storePath;

if (res.cached) {
assert(cached);
assert(storePathUseable);
storePath = std::move(cached->storePath);
} else {
StringSink sink;
Expand Down Expand Up @@ -111,6 +114,7 @@ DownloadFileResult downloadFile(
locked);

return {
.storePathValid = true,
.storePath = std::move(*storePath),
.etag = res.etag,
.effectiveUrl = res.effectiveUri,
Expand All @@ -132,6 +136,8 @@ DownloadTarballResult downloadTarball(
});

auto cached = getCache()->lookupExpired(*store, inAttrs);
if (!cached->storePathValid)
cached = std::nullopt;

if (cached && !cached->expired)
return {
Expand All @@ -140,7 +146,7 @@ DownloadTarballResult downloadTarball(
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
};

auto res = downloadFile(store, url, name, locked, headers);
auto res = downloadFile(store, url, name, locked, headers, cached.has_value());

std::optional<StorePath> unpackedStorePath;
time_t lastModified;
Expand All @@ -151,6 +157,11 @@ DownloadTarballResult downloadTarball(
} else {
Path tmpDir = createTempDir();
AutoDelete autoDelete(tmpDir, true);
if (!res.storePathValid) {
debug("source etag didn't match unpacked etag, or server returned 304 with a different etag.");
res = downloadFile(store, url, name, locked);
assert(res.storePathValid);
}
unpackTarfile(store->toRealPath(res.storePath), tmpDir);
auto members = readDirectory(tmpDir);
if (members.size() != 1)
Expand Down Expand Up @@ -278,6 +289,7 @@ struct FileInputScheme : CurlInputScheme
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
{
auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false);
assert(file.storePathValid);
return {std::move(file.storePath), input};
}
};
Expand Down
4 changes: 3 additions & 1 deletion src/libfetchers/tarball.hh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ namespace nix::fetchers {

struct DownloadFileResult
{
bool storePathValid;
StorePath storePath;
std::string etag;
std::string effectiveUrl;
Expand All @@ -24,7 +25,8 @@ DownloadFileResult downloadFile(
const std::string & url,
const std::string & name,
bool locked,
const Headers & headers = {});
const Headers & headers = {},
bool onlyIfChanged = false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When do we want to download if not changed?

Copy link
Contributor Author

@yshui yshui Jan 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Errr, it's fully explained in my comments above

assuming the file at the source URL hasn't changed, when I run nix flake update, if both paths exist, all is good. nix will fetch the source URL with an etag, and get a 304 response, and do nothing. but if the source file has been deleted from the store, then nix will redownload the source file, regardless if the unpacked store path is still valid.

to summarize, if the unpacked tarball is available in store, we don't need to download the source tarball if not changed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

err sorry I might have gotten the negation backwards. What you propose of downloading if and if needed sounds reasonable. I am asking when would one not want the semantics you proposed? e.g. When would one want to download again if we already have?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

personally i can't come up with a scenario where re-downloading would be preferable. do you have something in mind?

Copy link
Contributor Author

@yshui yshui Jan 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

errrr, did i misunderstand your question?

in code, onlyIfChanged would be false for a "normal" downloadFile call. so say the user calls builtins.fetchurl then the file will be downloaded if it's unchanged but not in store.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yshui Sorry I am still a bit confused. It seems in both cases you are saying "if we have it in the store and it didn't change we don't need to redownload it". I am missing something --- maybe missing that nix flake update need not care what is in the store?

Copy link
Contributor Author

@yshui yshui Jan 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am missing something

Fine. let me just list all the possible cases here.

So, there's two blobs involved for fetchTarball.

tarball.tar.gz, and unpacked-tarball/. for our purposes, we assume both have a entry in fetcher cache. Now, let's consider different cases where either or both of them are in the store.

  1. both tarball.tar.gz and unpacked-folder/ are in store.
    fetchTarball fetches tarball.tar.gz with etag from cache, gets 304, stops. Which is good.
  2. only tarball.tar.gz is in store.
    fetchTarball fetches tarball.tar.gz with etag from cache, gets 304, unpacks the tarball already in store. This is good too.
  3. neither is in store.
    fetchTarball ignores cache entry for tarball.tar.gz, redownloads it, unpacks it. Good.
  4. only unpacked-tarball/ is in store.
    a. tarball.tar.gz is changed at the specified url
    fetchTarball ignores cache entry for tarball.tar.gz, redownloads it, unpacks it. Good.
    b. tarball.tar.gz is unchanged at the specified url
    fetchTarball ignores cache entry for tarball.tar.gz, redownloads it, then reuse the unpacked-tarball/ in store. NOT good.

This PR avoids redownloading tarball.tar.gz in case (4.b)

OTOH, fetchurl only puts one blob into the store. this is the case where downloadFile is called with onlyIfChanged = false. and the cases for fetchurl are:

  1. file is not in store
    doesn't matter if the file at the source url is changed or not, downloadFile needs to download it. this is why onlyIfChanged should be false.
  2. file is in store
    downloadFile downloads the file if it has changed at the source url. this is the current behavior and is unchanged.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks very much @yshui. The big thing I was forgetting what that there is both the unpack and packed versions.

@edolstra has a PR that changes how the unpacked stuff is downlodaed that we are merging soon. I suspect we should land that first.

My only question left is, why does builtins.fetchurl use the tarball fetcher at all? Shouldn't it just use the file fetcher?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why does builtins.fetchurl use the tarball fetcher at all?

maybe the way i phrased caused some confusion, but builtins.fetchurl doesn't call downloadTarball, it calls downloadFile.

Whereas builtins.fetchTarball will call downloadTarball, and that in turn calls downloadFile


struct DownloadTarballResult
{
Expand Down