diff --git a/lib/tbot/services/workloadidentity/workload_api_test.go b/lib/tbot/services/workloadidentity/workload_api_test.go index aa8d1c85ed0ab..19fa40ebd33ec 100644 --- a/lib/tbot/services/workloadidentity/workload_api_test.go +++ b/lib/tbot/services/workloadidentity/workload_api_test.go @@ -43,6 +43,7 @@ import ( "github.com/gravitational/teleport/lib/tbot/bot" "github.com/gravitational/teleport/lib/tbot/bot/connection" "github.com/gravitational/teleport/lib/tbot/workloadidentity" + "github.com/gravitational/teleport/lib/tbot/workloadidentity/workloadattest" "github.com/gravitational/teleport/lib/utils/log/logtest" "github.com/gravitational/teleport/tool/teleport/testenv" ) @@ -138,6 +139,11 @@ func TestBotWorkloadIdentityAPI(t *testing.T) { Name: workloadIdentity.GetMetadata().GetName(), }, Listen: listenAddr.String(), + Attestors: workloadattest.Config{ + Unix: workloadattest.UnixAttestorConfig{ + BinaryHashMaxSizeBytes: workloadattest.TestBinaryHashMaxBytes, + }, + }, }, trustBundleCache, crlCache, diff --git a/lib/tbot/workloadidentity/workloadattest/unix.go b/lib/tbot/workloadidentity/workloadattest/unix.go index a7b0a2aaa8b04..6e12de5d16c5c 100644 --- a/lib/tbot/workloadidentity/workloadattest/unix.go +++ b/lib/tbot/workloadidentity/workloadattest/unix.go @@ -25,6 +25,7 @@ import ( "errors" "io" "log/slog" + "time" "github.com/gravitational/trace" "github.com/shirou/gopsutil/v4/process" @@ -35,6 +36,18 @@ import ( // DefaultBinaryHashMaxBytes is default value for BinaryHashMaxSizeBytes. const DefaultBinaryHashMaxBytes = 1 << 30 // 1GiB +// TestBinaryHashMaxBytes is a more suitable value for BinaryHashMaxSizeBytes +// in tests. Reading `/proc//exe` relies on inode stability, and in GitHub +// Actions we've observed read stalls likely caused by overlayfs or debugfs. +const TestBinaryHashMaxBytes = 1 << 12 // 4KiB + +// BinaryHashReadTimeout is the maximum amount of time we will wait to read +// a process's executable to calculate its checksum. In normal circumstances +// we should never reach this timeout, but reading `/proc//exe` depends +// on inode stability, so it's possible to encounter read stalls if there is +// a network or overlay filesystem involved. +const BinaryHashReadTimeout = 15 * time.Second + // UnixAttestorConfig holds the configuration for the Unix workload attestor. type UnixAttestorConfig struct { // BinaryHashMaxSize is the maximum number of bytes that will be read from @@ -149,22 +162,51 @@ func (a *UnixAttestor) Attest(ctx context.Context, pid int) (*workloadidentityv1 att.BinaryPath = &path } - exe, err := a.os.OpenExe(ctx, p) + if hash := a.hashBinary(ctx, p); hash != "" { + att.BinaryHash = &hash + } + + return att, nil +} + +func (a *UnixAttestor) hashBinary(ctx context.Context, proc *process.Process) string { + exe, err := a.os.OpenExe(ctx, proc) if err != nil { a.log.ErrorContext(ctx, "Failed to open workload executable for hashing", "error", err) - return att, nil + return "" } defer func() { _ = exe.Close() }() - hash := sha256.New() - if _, err := copyAtMost(hash, exe, a.cfg.BinaryHashMaxSizeBytes); err != nil { - a.log.ErrorContext(ctx, "Failed to hash workload executable", "error", err) - return att, nil + // Read the workload executable to calculate a checksum. We do this in a + // goroutine in case of read stalls (e.g. due to the executable being on + // a network or overlay filesystem). If this happens, the goroutine will + // terminate when we close the file descriptor (see defer statement above). + type sumResult struct { + sum string + err error + } + resCh := make(chan sumResult, 1) + go func() { + hash := sha256.New() + if _, err := copyAtMost(hash, exe, a.cfg.BinaryHashMaxSizeBytes); err == nil { + resCh <- sumResult{sum: hex.EncodeToString(hash.Sum(nil))} + } else { + resCh <- sumResult{err: err} + } + }() + + select { + case res := <-resCh: + if res.err != nil { + a.log.ErrorContext(ctx, "Failed to hash workload executable", "error", err) + } + return res.sum + case <-time.After(BinaryHashReadTimeout): + a.log.ErrorContext(ctx, "Timeout reading workload executable. If this happens frequently, it could be due to the workload executable being on a network or overlay filesystem, you may also consider lowering `attestors.unix.binary_hash_max_size_bytes`.") + case <-ctx.Done(): } - sum := hex.EncodeToString(hash.Sum(nil)) - att.BinaryHash = &sum - return att, nil + return "" } // copyAtMost copies at most n bytes from src to dst. If src contains more than