NixOS · xokdvium · Feb 21, 2026 · Feb 18, 2026 · Sep 7, 2025 · Sep 9, 2025
@@ -0,0 +1,23 @@
+---
+synopsis: "C API: Errors returned from your primops are not treated as recoverable by default"
+prs: [15286, 13930]
+---
+
+Nix 2.34 by default remembers the error in the thunk that triggered it.
+
+Previously the following sequence of events worked:
+
+1. Have a thunk that invokes a primop that's defined through the C API
+2. The primop returns an error
+3. Force the thunk again
+4. The primop returns a value
+5. The thunk evaluated successfully
+
+**Resolution**
+
+C API consumers that rely on this must change their recoverable error calls:
+
+```diff
+-nix_set_err_msg(context, NIX_ERR_*, msg);
++nix_set_err_msg(context, NIX_ERR_RECOVERABLE, msg);
+```
@@ -1,4 +1,5 @@
 #include "nix/expr/attr-set.hh"
+#include "nix/expr/eval-error.hh"
 #include "nix/util/configuration.hh"
 #include "nix/expr/eval.hh"
 #include "nix/store/globals.hh"
@@ -107,8 +108,13 @@ static void nix_c_primop_wrapper(
     f(userdata, &ctx, (EvalState *) &state, external_args.data(), vTmpPtr);
 
     if (ctx.last_err_code != NIX_OK) {
-        /* TODO: Throw different errors depending on the error code */
-        state.error<nix::EvalError>("Error from custom function: %s", *ctx.last_err).atPos(pos).debugThrow();
+        if (ctx.last_err_code == NIX_ERR_RECOVERABLE) {
+            state.error<nix::RecoverableEvalError>("Recoverable error from custom function: %s", *ctx.last_err)
+                .atPos(pos)
+                .debugThrow();
+        } else {
+            state.error<nix::EvalError>("Error from custom function: %s", *ctx.last_err).atPos(pos).debugThrow();
+        }
     }
 
     if (!vTmp.isValid()) {
@@ -194,6 +200,8 @@ ValueType nix_get_type(nix_c_context * context, const nix_value * value)
         switch (v.type()) {
         case nThunk:
             return NIX_TYPE_THUNK;
+        case nFailed:
+            return NIX_TYPE_FAILED;
         case nInt:
             return NIX_TYPE_INT;
         case nFloat:

@@ -100,7 +100,8 @@ typedef enum {
     /** @brief External value from C++ plugins or C API
      * @see Externals
      */
-    NIX_TYPE_EXTERNAL
+    NIX_TYPE_EXTERNAL,
+    NIX_TYPE_FAILED,
 } ValueType;
 
 // forward declarations

@@ -517,4 +517,106 @@ TEST_F(nix_api_expr_test, nix_expr_attrset_update)
     assert_ctx_ok();
 }
 
+// The following is a test case for retryable thunks. This is a requirement
+// for the current way in which NixOps4 evaluates its deployment expressions.
+// An alternative strategy could be implemented, but unwinding the stack may
+// be a more efficient way to deal with many suspensions/resumptions, compared
+// to e.g. using a thread or coroutine stack for each suspended dependency.
+// This test models the essential bits of a deployment tool that uses such
+// a strategy.
+
+// State for the retryable primop - simulates deployment resource availability
+struct DeploymentResourceState
+{
+    bool vm_created = false;
+};
+
+static void primop_load_resource_input(
+    void * user_data, nix_c_context * context, EvalState * state, nix_value ** args, nix_value * ret)
+{
+    assert(context);
+    assert(state);
+    auto * resource_state = static_cast<DeploymentResourceState *>(user_data);
+
+    // Get the resource input name argument
+    std::string input_name;
+    if (nix_get_string(context, args[0], OBSERVE_STRING(input_name)) != NIX_OK)
+        return;
+
+    // Only handle "vm_id" input - throw for anything else
+    if (input_name != "vm_id") {
+        std::string error_msg = "unknown resource input: " + input_name;
+        nix_set_err_msg(context, NIX_ERR_NIX_ERROR, error_msg.c_str());
+        return;
+    }
+
+    if (resource_state->vm_created) {
+        // VM has been created, return the ID
+        nix_init_string(context, ret, "vm-12345");
+    } else {
+        // VM not created yet, fail with dependency error
+        nix_set_err_msg(context, NIX_ERR_RECOVERABLE, "VM not yet created");
+    }
+}
+
+TEST_F(nix_api_expr_test, nix_expr_thunk_re_evaluation_after_deployment)
+{
+    // This test demonstrates NixOps4's requirement: a thunk calling a primop should be
+    // re-evaluable when deployment resources become available that were not available initially.
+
+    DeploymentResourceState resource_state;
+
+    PrimOp * primop = nix_alloc_primop(
+        ctx,
+        primop_load_resource_input,
+        1,
+        "loadResourceInput",
+        nullptr,
+        "load a deployment resource input",
+        &resource_state);
+    assert_ctx_ok();
+
+    nix_value * primopValue = nix_alloc_value(ctx, state);
+    assert_ctx_ok();
+    nix_init_primop(ctx, primopValue, primop);
+    assert_ctx_ok();
+
+    nix_value * inputName = nix_alloc_value(ctx, state);
+    assert_ctx_ok();
+    nix_init_string(ctx, inputName, "vm_id");
+    assert_ctx_ok();
+
+    // Create a single thunk by using nix_init_apply instead of nix_value_call
+    // This creates a lazy application that can be forced multiple times
+    nix_value * thunk = nix_alloc_value(ctx, state);
+    assert_ctx_ok();
+    nix_init_apply(ctx, thunk, primopValue, inputName);
+    assert_ctx_ok();
+
+    // First force: VM not created yet, should fail
+    nix_value_force(ctx, state, thunk);
+    ASSERT_EQ(NIX_ERR_NIX_ERROR, nix_err_code(ctx));
+    ASSERT_THAT(nix_err_msg(nullptr, ctx, nullptr), testing::HasSubstr("VM not yet created"));
+
+    // Clear the error context for the next attempt
+    nix_c_context_free(ctx);
+    ctx = nix_c_context_create();
+
+    // Simulate deployment process: VM gets created
+    resource_state.vm_created = true;
+
+    // Second force of the SAME thunk: this is where the "failed" value issue appears
+    // With failed value caching, this should fail because the thunk is marked as permanently failed
+    // Without failed value caching (or with retryable failures), this should succeed
+    nix_value_force(ctx, state, thunk);
+
+    // If we get here without error, the thunk was successfully re-evaluated
+    assert_ctx_ok();
+
+    std::string result;
+    nix_get_string(ctx, thunk, OBSERVE_STRING(result));
+    assert_ctx_ok();
+    ASSERT_STREQ("vm-12345", result.c_str());
+}
+
 } // namespace nixC
@@ -188,6 +188,22 @@ TEST_F(ValuePrintingTests, vBlackhole)
     test(vBlackhole, "«potential infinite recursion»");
 }
 
+TEST_F(ValuePrintingTests, vFailed)
+{
+    Value v;
+    try {
+        throw Error("nope");
+    } catch (...) {
+        v.mkFailed(std::current_exception(), nullptr);
+    }
+
+    // Historically, a tried and then ignored value (e.g. through tryEval) was
+    // reverted to the original thunk.
+
+    test(v, "«thunk»");
+    test(v, ANSI_MAGENTA "«thunk»" ANSI_NORMAL, PrintOptions{.ansiColors = true});
+}
+
 TEST_F(ValuePrintingTests, depthAttrs)
 {
     Value vOne;

@@ -11,7 +11,7 @@
 namespace nix::eval_cache {
 
 CachedEvalError::CachedEvalError(ref<AttrCursor> cursor, Symbol attr)
-    : EvalError(cursor->root->state, "cached failure of attribute '%s'", cursor->getAttrPathStr(attr))
+    : CloneableError(cursor->root->state, "cached failure of attribute '%s'", cursor->getAttrPathStr(attr))
     , cursor(cursor)
     , attr(attr)
 {

@@ -114,5 +114,6 @@ template class EvalErrorBuilder<InfiniteRecursionError>;
 template class EvalErrorBuilder<StackOverflowError>;
 template class EvalErrorBuilder<InvalidPathError>;
 template class EvalErrorBuilder<IFDError>;
+template class EvalErrorBuilder<RecoverableEvalError>;
 
 } // namespace nix
@@ -1,4 +1,5 @@
 #include "nix/expr/eval.hh"
+#include "nix/expr/eval-error.hh"
 #include "nix/expr/eval-settings.hh"
 #include "nix/expr/primops.hh"
 #include "nix/expr/print-options.hh"
@@ -31,6 +32,7 @@
 #include <algorithm>
 #include <cstddef>
 #include <cstdlib>
+#include <exception>
 #include <iostream>
 #include <sstream>
 #include <cstring>
@@ -155,6 +157,8 @@ std::string_view showType(ValueType type, bool withArticle)
         return WA("a", "float");
     case nThunk:
         return WA("a", "thunk");
+    case nFailed:
+        return WA("an", "error");
     }
     unreachable();
 }
@@ -2178,6 +2182,54 @@ void ExprBlackHole::eval(EvalState & state, [[maybe_unused]] Env & env, Value &
 // always force this to be separate, otherwise forceValue may inline it and take
 // a massive perf hit
 [[gnu::noinline]]
+void EvalState::handleEvalExceptionForThunk(Env * env, Expr * expr, Value & v, const PosIdx pos)
+{
+    if (!env)
+        tryFixupBlackHolePos(v, pos);
+
+    auto e = std::current_exception();
+    Value * recovery = nullptr;
+    try {
+        std::rethrow_exception(e);
+    } catch (const RecoverableEvalError & e) {
+        recovery = allocValue();
+    } catch (...) {
+    }
+    if (recovery) {
+        recovery->mkThunk(env, expr);
+    }
+    v.mkFailed(e, recovery);
+}
+
+[[gnu::noinline]]
+void EvalState::handleEvalExceptionForApp(Value & v, const Value & savedApp)
+{
+    auto e = std::current_exception();
+    Value * recovery = nullptr;
+    try {
+        std::rethrow_exception(e);
+    } catch (const RecoverableEvalError & e) {
+        recovery = allocValue();
+    } catch (...) {
+    }
+    if (recovery) {
+        *recovery = savedApp;
+    }
+    v.mkFailed(e, recovery);
+}
+
+[[gnu::noinline]]
+void EvalState::handleEvalFailed(Value & v, const PosIdx pos)
+{
+    assert(v.isFailed());
+    if (auto recoveryValue = v.failed().recoveryValue) {
+        v = *recoveryValue;
+        forceValue(v, pos);
+    } else {
+        v.failed().rethrow();
+    }
+}
+
 void EvalState::tryFixupBlackHolePos(Value & v, PosIdx pos)
 {
     if (!v.isBlackhole())
@@ -2186,7 +2238,8 @@ void EvalState::tryFixupBlackHolePos(Value & v, PosIdx pos)
     try {
         std::rethrow_exception(e);
     } catch (InfiniteRecursionError & e) {
-        e.atPos(positions[pos]);
+        if (!e.hasPos())
+            e.atPos(positions[pos]);
     } catch (...) {
     }
 }
@@ -2825,8 +2878,11 @@ void EvalState::assertEqValues(Value & v1, Value & v2, const PosIdx pos, std::st
         }
         return;
 
-    case nThunk: // Must not be left by forceValue
-        assert(false);
+    // Cannot be returned by forceValue().
+    case nThunk:
+    case nFailed:
+        unreachable();
+
     default: // Note that we pass compiler flags that should make `default:` unreachable.
         // Also note that this probably ran after `eqValues`, which implements
         // the same logic more efficiently (without having to unwind stacks),
@@ -2920,8 +2976,11 @@ bool EvalState::eqValues(Value & v1, Value & v2, const PosIdx pos, std::string_v
         // !!!
         return v1.fpoint() == v2.fpoint();
 
-    case nThunk: // Must not be left by forceValue
-        assert(false);
+    // Cannot be returned by forceValue().
+    case nThunk:
+    case nFailed:
+        unreachable();
+
     default: // Note that we pass compiler flags that should make `default:` unreachable.
         error<EvalError>("eqValues: cannot compare %1% with %2%", showType(v1), showType(v2))
             .withTrace(pos, errorCtx)

@@ -14,7 +14,7 @@ namespace nix::eval_cache {
 struct AttrDb;
 class AttrCursor;
 
-struct CachedEvalError : EvalError
+struct CachedEvalError : CloneableError<CachedEvalError, EvalError>
 {
     const ref<AttrCursor> cursor;
     const Symbol attr;

@@ -18,22 +18,22 @@ class EvalErrorBuilder;
  *
  * Most subclasses should inherit from `EvalError` instead of this class.
  */
-class EvalBaseError : public Error
+class EvalBaseError : public CloneableError<EvalBaseError, Error>
 {
     template<class T>
     friend class EvalErrorBuilder;
 public:
     EvalState & state;
 
     EvalBaseError(EvalState & state, ErrorInfo && errorInfo)
-        : Error(errorInfo)
+        : CloneableError(errorInfo)
         , state(state)
     {
     }
 
     template<typename... Args>
     explicit EvalBaseError(EvalState & state, const std::string & formatString, const Args &... formatArgs)
-        : Error(formatString, formatArgs...)
+        : CloneableError(formatString, formatArgs...)
         , state(state)
     {
     }
@@ -60,23 +60,31 @@ MakeError(InfiniteRecursionError, EvalError);
  * Inherits from EvalBaseError (not EvalError) because resource exhaustion
  * should not be cached.
  */
-struct StackOverflowError : public EvalBaseError
+struct StackOverflowError : public CloneableError<StackOverflowError, EvalBaseError>
 {
     StackOverflowError(EvalState & state)
-        : EvalBaseError(state, "stack overflow; max-call-depth exceeded")
+        : CloneableError(state, "stack overflow; max-call-depth exceeded")
     {
     }
 };
 
 MakeError(IFDError, EvalBaseError);
 
-struct InvalidPathError : public EvalError
+/**
+ * An evaluation error which should be retried instead of rethrown.
+ *
+ * A RecoverableEvalError is not an EvalError, because we shouldn't cache it in
+ * the eval cache, as it should be retried anyway.
+ */
+MakeError(RecoverableEvalError, EvalBaseError);
+
+struct InvalidPathError : public CloneableError<InvalidPathError, EvalError>
 {
 public:
     Path path;
 
     InvalidPathError(EvalState & state, const Path & path)
-        : EvalError(state, "path '%s' is not valid", path)
+        : CloneableError(state, "path '%s' is not valid", path)
     {
     }
 };