Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions doc/manual/rl-next/c-api-recoverable-errors.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
synopsis: "C API: Errors returned from your primops are not treated as recoverable by default"
prs: [13930]
---

Nix 2.32 by default remembers the error in the thunk that triggered it.

Previously the following sequence of events worked:

1. Have a thunk that invokes a primop that's defined through the C API
2. The primop returns an error
3. Force the thunk again
4. The primop returns a value
5. The thunk evaluated successfully

**Resolution**

C API consumers that rely on this must change their recoverable error calls:

```diff
-nix_set_err_msg(context, NIX_ERR_*, msg);
+nix_set_err_msg(context, NIX_ERR_RECOVERABLE, msg);
```
12 changes: 10 additions & 2 deletions src/libexpr-c/nix_api_value.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "nix/expr/attr-set.hh"
#include "nix/expr/eval-error.hh"
#include "nix/util/configuration.hh"
#include "nix/expr/eval.hh"
#include "nix/store/globals.hh"
Expand Down Expand Up @@ -89,8 +90,13 @@ static void nix_c_primop_wrapper(
f(userdata, &ctx, (EvalState *) &state, (nix_value **) args, (nix_value *) &vTmp);

if (ctx.last_err_code != NIX_OK) {
/* TODO: Throw different errors depending on the error code */
state.error<nix::EvalError>("Error from custom function: %s", *ctx.last_err).atPos(pos).debugThrow();
if (ctx.last_err_code == NIX_ERR_RECOVERABLE) {
state.error<nix::RecoverableEvalError>("Recoverable error from custom function: %s", *ctx.last_err)
.atPos(pos)
.debugThrow();
} else {
state.error<nix::EvalError>("Error from custom function: %s", *ctx.last_err).atPos(pos).debugThrow();
}
}

if (!vTmp.isValid()) {
Expand Down Expand Up @@ -177,6 +183,8 @@ ValueType nix_get_type(nix_c_context * context, const nix_value * value)
switch (v.type()) {
case nThunk:
return NIX_TYPE_THUNK;
case nFailed:
return NIX_TYPE_FAILED;
case nInt:
return NIX_TYPE_INT;
case nFloat:
Expand Down
3 changes: 2 additions & 1 deletion src/libexpr-c/nix_api_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ typedef enum {
NIX_TYPE_ATTRS,
NIX_TYPE_LIST,
NIX_TYPE_FUNCTION,
NIX_TYPE_EXTERNAL
NIX_TYPE_EXTERNAL,
NIX_TYPE_FAILED,
} ValueType;

// forward declarations
Expand Down
101 changes: 101 additions & 0 deletions src/libexpr-tests/nix_api_expr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -437,4 +437,105 @@ TEST_F(nix_api_expr_test, nix_value_call_multi_no_args)
assert_ctx_ok();
ASSERT_EQ(3, rInt);
}

// The following is a test case for retryable thunks.
// This is a requirement for the current way in which NixOps4 evaluates its deployment expressions.
// An alternative strategy could be implemented, but unwinding the stack may be a more efficient way to deal with many
// suspensions/resumptions, compared to e.g. using a thread or coroutine stack for each suspended dependency. This test
// models the essential bits of a deployment tool that uses such a strategy.

// State for the retryable primop - simulates deployment resource availability
struct DeploymentResourceState
{
bool vm_created = false;
};

static void primop_load_resource_input(
void * user_data, nix_c_context * context, EvalState * state, nix_value ** args, nix_value * ret)
{
assert(context);
assert(state);
auto * resource_state = static_cast<DeploymentResourceState *>(user_data);

// Get the resource input name argument
std::string input_name;
if (nix_get_string(context, args[0], OBSERVE_STRING(input_name)) != NIX_OK)
return;

// Only handle "vm_id" input - throw for anything else
if (input_name != "vm_id") {
std::string error_msg = "unknown resource input: " + input_name;
nix_set_err_msg(context, NIX_ERR_NIX_ERROR, error_msg.c_str());
return;
}

if (resource_state->vm_created) {
// VM has been created, return the ID
nix_init_string(context, ret, "vm-12345");
} else {
// VM not created yet, fail with dependency error
nix_set_err_msg(context, NIX_ERR_RECOVERABLE, "VM not yet created");
}
}

TEST_F(nix_api_expr_test, nix_expr_thunk_re_evaluation_after_deployment)
{
// This test demonstrates NixOps4's requirement: a thunk calling a primop should be
// re-evaluable when deployment resources become available that were not available initially.

DeploymentResourceState resource_state;

PrimOp * primop = nix_alloc_primop(
ctx,
primop_load_resource_input,
1,
"loadResourceInput",
nullptr,
"load a deployment resource input",
&resource_state);
assert_ctx_ok();

nix_value * primopValue = nix_alloc_value(ctx, state);
assert_ctx_ok();
nix_init_primop(ctx, primopValue, primop);
assert_ctx_ok();

nix_value * inputName = nix_alloc_value(ctx, state);
assert_ctx_ok();
nix_init_string(ctx, inputName, "vm_id");
assert_ctx_ok();

// Create a single thunk by using nix_init_apply instead of nix_value_call
// This creates a lazy application that can be forced multiple times
nix_value * thunk = nix_alloc_value(ctx, state);
assert_ctx_ok();
nix_init_apply(ctx, thunk, primopValue, inputName);
assert_ctx_ok();

// First force: VM not created yet, should fail
nix_value_force(ctx, state, thunk);
ASSERT_EQ(NIX_ERR_NIX_ERROR, nix_err_code(ctx));
ASSERT_THAT(nix_err_msg(nullptr, ctx, nullptr), testing::HasSubstr("VM not yet created"));

// Clear the error context for the next attempt
nix_c_context_free(ctx);
ctx = nix_c_context_create();

// Simulate deployment process: VM gets created
resource_state.vm_created = true;

// Second force of the SAME thunk: this is where the "failed" value issue appears
// With failed value caching, this should fail because the thunk is marked as permanently failed
// Without failed value caching (or with retryable failures), this should succeed
nix_value_force(ctx, state, thunk);

// If we get here without error, the thunk was successfully re-evaluated
assert_ctx_ok();

std::string result;
nix_get_string(ctx, thunk, OBSERVE_STRING(result));
assert_ctx_ok();
ASSERT_STREQ("vm-12345", result.c_str());
}

} // namespace nixC
1 change: 1 addition & 0 deletions src/libexpr/eval-error.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,6 @@ template class EvalErrorBuilder<MissingArgumentError>;
template class EvalErrorBuilder<InfiniteRecursionError>;
template class EvalErrorBuilder<InvalidPathError>;
template class EvalErrorBuilder<IFDError>;
template class EvalErrorBuilder<RecoverableEvalError>;

} // namespace nix
69 changes: 64 additions & 5 deletions src/libexpr/eval.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "nix/expr/eval.hh"
#include "nix/expr/eval-error.hh"
#include "nix/expr/eval-settings.hh"
#include "nix/expr/primops.hh"
#include "nix/expr/print-options.hh"
Expand Down Expand Up @@ -27,6 +28,7 @@
#include "parser-tab.hh"

#include <algorithm>
#include <exception>
#include <iostream>
#include <sstream>
#include <cstring>
Expand Down Expand Up @@ -124,6 +126,8 @@ std::string_view showType(ValueType type, bool withArticle)
return WA("a", "float");
case nThunk:
return WA("a", "thunk");
case nFailed:
return WA("a", "failure");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the term failed is useful and distinct internally, but normally, for users, we used to call them errors.

Suggested change
return WA("a", "failure");
return WA("an", "error");

}
unreachable();
}
Expand Down Expand Up @@ -2060,6 +2064,54 @@ void ExprBlackHole::eval(EvalState & state, [[maybe_unused]] Env & env, Value &
// always force this to be separate, otherwise forceValue may inline it and take
// a massive perf hit
[[gnu::noinline]]
void EvalState::handleEvalExceptionForThunk(Env * env, Expr * expr, Value & v, const PosIdx pos)
{
v.mkThunk(env, expr);
tryFixupBlackHolePos(v, pos);

auto e = std::current_exception();
Value * recovery = nullptr;
try {
std::rethrow_exception(e);
} catch (const RecoverableEvalError & e) {
recovery = allocValue();
} catch (...) {
}
if (recovery) {
recovery->mkThunk(env, expr);
}
v.mkFailed(e, recovery);
}

[[gnu::noinline]]
void EvalState::handleEvalExceptionForApp(Value & v)
{
auto e = std::current_exception();
Value * recovery = nullptr;
try {
std::rethrow_exception(e);
} catch (const RecoverableEvalError & e) {
recovery = allocValue();
} catch (...) {
}
if (recovery) {
*recovery = v;
}
v.mkFailed(e, recovery);
}

[[gnu::noinline]]
void EvalState::handleEvalFailed(Value & v, const PosIdx pos)
{
assert(v.isFailed());
if (auto recoveryValue = v.failed()->recoveryValue) {
v = *recoveryValue;
forceValue(v, pos);
} else {
std::rethrow_exception(v.failed()->ex);
}
}

void EvalState::tryFixupBlackHolePos(Value & v, PosIdx pos)
{
if (!v.isBlackhole())
Expand All @@ -2068,7 +2120,8 @@ void EvalState::tryFixupBlackHolePos(Value & v, PosIdx pos)
try {
std::rethrow_exception(e);
} catch (InfiniteRecursionError & e) {
e.atPos(positions[pos]);
if (!e.hasPos())
e.atPos(positions[pos]);
} catch (...) {
}
}
Expand Down Expand Up @@ -2693,8 +2746,11 @@ void EvalState::assertEqValues(Value & v1, Value & v2, const PosIdx pos, std::st
}
return;

case nThunk: // Must not be left by forceValue
assert(false);
// Cannot be returned by forceValue().
case nThunk:
case nFailed:
unreachable();

default: // Note that we pass compiler flags that should make `default:` unreachable.
// Also note that this probably ran after `eqValues`, which implements
// the same logic more efficiently (without having to unwind stacks),
Expand Down Expand Up @@ -2786,8 +2842,11 @@ bool EvalState::eqValues(Value & v1, Value & v2, const PosIdx pos, std::string_v
// !!!
return v1.fpoint() == v2.fpoint();

case nThunk: // Must not be left by forceValue
assert(false);
// Cannot be returned by forceValue().
case nThunk:
case nFailed:
unreachable();

default: // Note that we pass compiler flags that should make `default:` unreachable.
error<EvalError>("eqValues: cannot compare %1% with %2%", showType(v1), showType(v2))
.withTrace(pos, errorCtx)
Expand Down
8 changes: 8 additions & 0 deletions src/libexpr/include/nix/expr/eval-error.hh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ MakeError(MissingArgumentError, EvalError);
MakeError(InfiniteRecursionError, EvalError);
MakeError(IFDError, EvalBaseError);

/**
* An evaluation error which should be retried instead of rethrown
*
* A RecoverableEvalError is not an EvalError, because we shouldn't cache it in the eval cache, as it should be retried
* anyway.
*/
MakeError(RecoverableEvalError, EvalBaseError);

struct InvalidPathError : public EvalError
{
public:
Expand Down
16 changes: 12 additions & 4 deletions src/libexpr/include/nix/expr/eval-inline.hh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "nix/expr/eval.hh"
#include "nix/expr/eval-error.hh"
#include "nix/expr/eval-settings.hh"
#include <exception>

namespace nix {

Expand Down Expand Up @@ -97,12 +98,19 @@ void EvalState::forceValue(Value & v, const PosIdx pos)
else
ExprBlackHole::throwInfiniteRecursionError(*this, v);
} catch (...) {
v.mkThunk(env, expr);
tryFixupBlackHolePos(v, pos);
handleEvalExceptionForThunk(env, expr, v, pos);
throw;
}
} else if (v.isApp())
callFunction(*v.app().left, *v.app().right, v, pos);
} else if (v.isApp()) {
try {
callFunction(*v.app().left, *v.app().right, v, pos);
} catch (...) {
handleEvalExceptionForApp(v);
throw;
}
} else if (v.isFailed()) {
handleEvalFailed(v, pos);
}
}

[[gnu::always_inline]]
Expand Down
20 changes: 20 additions & 0 deletions src/libexpr/include/nix/expr/eval.hh
Original file line number Diff line number Diff line change
Expand Up @@ -610,8 +610,28 @@ public:
*/
inline void forceValue(Value & v, const PosIdx pos);

private:

/**
* Internal support function for forceValue
*
* This code is factored out so that it's not in the heavily inlined hot path.
*/
void handleEvalExceptionForThunk(Env * env, Expr * expr, Value & v, const PosIdx pos);

/**
* Internal support function for forceValue
*
* This code is factored out so that it's not in the heavily inlined hot path.
*/
void handleEvalExceptionForApp(Value & v);

void handleEvalFailed(Value & v, PosIdx pos);

void tryFixupBlackHolePos(Value & v, PosIdx pos);

public:

/**
* Force a value, then recursively force list elements and
* attributes.
Expand Down
Loading
Loading