From bedc908e19f3a37dbfb9b699f32ae11161314b52 Mon Sep 17 00:00:00 2001 From: joerger Date: Tue, 13 May 2025 17:13:28 -0700 Subject: [PATCH 01/14] Refactor connection sharing logic to allow reconnection logic. --- api/utils/keys/piv/yubikey.go | 447 ++++++++++++++++++---------------- 1 file changed, 236 insertions(+), 211 deletions(-) diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index a1ba9570f6b56..f4c401216100f 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -29,9 +29,11 @@ import ( "errors" "fmt" "io" + "log/slog" "math/big" "strings" "sync" + "sync/atomic" "time" "github.com/go-piv/piv-go/piv" @@ -185,102 +187,98 @@ func (y *YubiKey) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyI // process. Without this, the connection will be released, // leading to a failure when providing PIN or touch input: // "verify pin: transmitting request: the supplied handle was invalid". - release, err := y.conn.connect() - if err != nil { - return nil, trace.Wrap(err) - } - defer release() - - var touchPromptDelayTimer *time.Timer - if ref.Policy.TouchRequired { - touchPromptDelayTimer = time.NewTimer(signTouchPromptDelay) - defer touchPromptDelayTimer.Stop() - - go func() { - select { - case <-touchPromptDelayTimer.C: - // Prompt for touch after a delay, in case the function succeeds without touch due to a cached touch. - err := prompt.Touch(ctx, keyInfo) - if err != nil { - // Cancel the entire function when an error occurs. - // This is typically used for aborting the prompt. - cancel(trace.Wrap(err)) + return doWithSharedConn(y.conn, func(yk *piv.YubiKey) ([]byte, error) { + var touchPromptDelayTimer *time.Timer + if ref.Policy.TouchRequired { + touchPromptDelayTimer = time.NewTimer(signTouchPromptDelay) + defer touchPromptDelayTimer.Stop() + + go func() { + select { + case <-touchPromptDelayTimer.C: + // Prompt for touch after a delay, in case the function succeeds without touch due to a cached touch. + err := prompt.Touch(ctx, keyInfo) + if err != nil { + // Cancel the entire function when an error occurs. + // This is typically used for aborting the prompt. + cancel(trace.Wrap(err)) + } + return + case <-ctx.Done(): + // touch cached, skip prompt. + return } - return - case <-ctx.Done(): - // touch cached, skip prompt. - return - } - }() - } - - promptPIN := func() (string, error) { - // touch prompt delay is disrupted by pin prompts. To prevent misfired - // touch prompts, pause the timer for the duration of the pin prompt. - if touchPromptDelayTimer != nil { - if touchPromptDelayTimer.Stop() { - defer touchPromptDelayTimer.Reset(signTouchPromptDelay) - } + }() } - return y.promptPIN(ctx, prompt, hardwarekey.PINRequired, keyInfo, ref.PINCacheTTL) - } - - pinPolicy := piv.PINPolicyNever - if ref.Policy.PINRequired { - pinPolicy = piv.PINPolicyOnce - } - - auth := piv.KeyAuth{ - PINPrompt: promptPIN, - PINPolicy: pinPolicy, - } + promptPIN := func() (string, error) { + // touch prompt delay is disrupted by pin prompts. To prevent misfired + // touch prompts, pause the timer for the duration of the pin prompt. + if touchPromptDelayTimer != nil { + if touchPromptDelayTimer.Stop() { + defer touchPromptDelayTimer.Reset(signTouchPromptDelay) + } + } - // YubiKeys with firmware version 5.3.1 have a bug where insVerify(0x20, 0x00, 0x80, nil) - // clears the PIN cache instead of performing a non-mutable check. This causes the signature - // with pin policy "once" to fail unless PIN is provided for each call. We can avoid this bug - // by skipping the insVerify check and instead manually retrying with a PIN prompt only when - // the signature fails. - manualRetryWithPIN := false - fw531 := piv.Version{Major: 5, Minor: 3, Patch: 1} - if auth.PINPolicy == piv.PINPolicyOnce && y.conn.conn.Version() == fw531 { - // Set the keys PIN policy to never to skip the insVerify check. If PIN was provided in - // a previous recent call, the signature will succeed as expected of the "once" policy. - auth.PINPolicy = piv.PINPolicyNever - manualRetryWithPIN = true - } + return y.promptPIN(ctx, prompt, hardwarekey.PINRequired, keyInfo, ref.PINCacheTTL) + } - privateKey, err := y.conn.privateKey(pivSlot, ref.PublicKey, auth) - if err != nil { - return nil, trace.Wrap(err) - } + pinPolicy := piv.PINPolicyNever + if ref.Policy.PINRequired { + pinPolicy = piv.PINPolicyOnce + } - signer, ok := privateKey.(crypto.Signer) - if !ok { - return nil, trace.BadParameter("private key type %T does not implement crypto.Signer", privateKey) - } + auth := piv.KeyAuth{ + PINPrompt: promptPIN, + PINPolicy: pinPolicy, + } - // For generic auth errors, such as when PIN is not provided, the smart card returns the error code 0x6982. - // The piv-go library wraps error codes like this with a user readable message: "security status not satisfied". - const pivGenericAuthErrCodeString = "6982" + // YubiKeys with firmware version 5.3.1 have a bug where insVerify(0x20, 0x00, 0x80, nil) + // clears the PIN cache instead of performing a non-mutable check. This causes the signature + // with pin policy "once" to fail unless PIN is provided for each call. We can avoid this bug + // by skipping the insVerify check and instead manually retrying with a PIN prompt only when + // the signature fails. + manualRetryWithPIN := false + fw531 := piv.Version{Major: 5, Minor: 3, Patch: 1} + if auth.PINPolicy == piv.PINPolicyOnce && y.version == fw531 { + // Set the keys PIN policy to never to skip the insVerify check. If PIN was provided in + // a previous recent call, the signature will succeed as expected of the "once" policy. + auth.PINPolicy = piv.PINPolicyNever + manualRetryWithPIN = true + } - signature, err := abandonableSign(ctx, signer, rand, digest, opts) - switch { - case err == nil: - return signature, nil - case manualRetryWithPIN && strings.Contains(err.Error(), pivGenericAuthErrCodeString): - pin, err := promptPIN() + privateKey, err := y.conn.privateKey(pivSlot, ref.PublicKey, auth) if err != nil { return nil, trace.Wrap(err) } - if err := y.conn.verifyPIN(pin); err != nil { - return nil, trace.Wrap(err) + + signer, ok := privateKey.(crypto.Signer) + if !ok { + return nil, trace.BadParameter("private key type %T does not implement crypto.Signer", privateKey) } + + // For generic auth errors, such as when PIN is not provided, the smart card returns the error code 0x6982. + // The piv-go library wraps error codes like this with a user readable message: "security status not satisfied". + const pivGenericAuthErrCodeString = "6982" + signature, err := abandonableSign(ctx, signer, rand, digest, opts) - return signature, trace.Wrap(err) - default: - return nil, trace.Wrap(err) - } + switch { + case err == nil: + return signature, nil + case manualRetryWithPIN && strings.Contains(err.Error(), pivGenericAuthErrCodeString): + pin, err := promptPIN() + if err != nil { + return nil, trace.Wrap(err) + } + if err := y.conn.verifyPIN(pin); err != nil { + return nil, trace.Wrap(err) + } + signature, err := abandonableSign(ctx, signer, rand, digest, opts) + return signature, trace.Wrap(err) + default: + return nil, trace.Wrap(err) + } + }) } // abandonableSign is a wrapper around signer.Sign. @@ -558,38 +556,89 @@ type sharedPIVConnection struct { // This value may change between OS's, or with other system changes. card string - // conn is the shared PIV connection. - conn *piv.YubiKey - mu sync.Mutex - activeConnections int + // conn is a shared PIV connection. + conn *piv.YubiKey + // connMu is a RW mutex that protects conn. The conn is only opened/closed while under a full + // lock, meaning that multiple callers can utilize the connection concurrently while under a + // read lock without the risk of the connection being closed partway through a PIV command. + connMu sync.RWMutex + // connHolds is the number of active holds on the connection. It should be modified under + // connMu.RLock to ensure a new hold isn't added for a closing connection. + connHolds atomic.Int32 } -// connect establishes a connection to a YubiKey PIV module and returns a release function. -// The release function should be called to properly close the shared connection. -// The connection is not immediately terminated, allowing other callers to -// use it before it's released. -// The YubiKey PIV module itself takes some additional time to handle closed -// connections, so we use a retry loop to give the PIV module time to close prior connections. -func (c *sharedPIVConnection) connect() (func(), error) { - c.mu.Lock() - defer c.mu.Unlock() +func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, error)) (T, error) { + nilT := *new(T) + + conn, err := c.holdConn() + if err != nil { + return nilT, trace.Wrap(err) + } + defer c.releaseConn() + + // TODO: retry on pcsc reset err. + return do(conn) +} + +// holdConn holds an existing shared connection, or opens and holds a new shared connection. +// Unless holdConn returns an error, it must be followed by a call to releaseConn to ensure +// the connection is closed once there are no remaining holds. +func (c *sharedPIVConnection) holdConn() (*piv.YubiKey, error) { + c.connMu.RLock() + defer c.connMu.RUnlock() + + if c.conn == nil { + c.connMu.RUnlock() + if err := c.connect(); err != nil { + return nil, trace.Wrap(err) + } + c.connMu.RLock() + } + + c.connHolds.Add(1) + return c.conn, nil +} + +// releaseConn releases a hold on a shared connection and, +// if there are no remaining holds, closes the connection. +func (c *sharedPIVConnection) releaseConn() { + c.connMu.RLock() + defer c.connMu.RUnlock() + + remaining := c.connHolds.Add(-1) - release := func() { - c.mu.Lock() - defer c.mu.Unlock() + // If there are no remaining holds on the connection, close it. + if remaining == 0 { + c.connMu.RUnlock() + defer c.connMu.RLock() - c.activeConnections-- - if c.activeConnections == 0 { + c.connMu.Lock() + defer c.connMu.Unlock() + + // Double check that a new hold wasn't added while waiting for the full lock. + if c.connHolds.Load() != 0 { c.conn.Close() c.conn = nil } } +} + +// connect establishes a connection to a YubiKey PIV module and returns a release function. +// The release function should be called to properly close the shared connection. +// The connection is not immediately terminated, allowing other callers to +// use it before it's released. +// The YubiKey PIV module itself takes some additional time to handle closed +// connections, so we use a retry loop to give the PIV module time to close prior connections. +func (c *sharedPIVConnection) connect() error { + c.connMu.Lock() + defer c.connMu.Unlock() + // Check if another goroutine was able to open a connection first. if c.conn != nil { - c.activeConnections++ - return release, nil + return nil } + ctx := context.Background() linearRetry, err := retryutils.NewLinear(retryutils.LinearConfig{ // If a PIV connection has just been closed, it take ~5 ms to become // available to new connections. For this reason, we initially wait a @@ -602,164 +651,140 @@ func (c *sharedPIVConnection) connect() (func(), error) { Max: time.Millisecond * 50, }) if err != nil { - return nil, trace.Wrap(err) + return trace.Wrap(err) } - // Backoff and retry for up to 1 second. - retryCtx, cancel := context.WithTimeout(context.Background(), time.Second) - defer cancel() + isRetryError := func(err error) bool { + const retryError = "connecting to smart card: the smart card cannot be accessed because of other connections outstanding" + return strings.Contains(err.Error(), retryError) + } - err = linearRetry.For(retryCtx, func() error { + tryConnect := func() error { c.conn, err = piv.Open(c.card) if err != nil && !isRetryError(err) { return retryutils.PermanentRetryError(err) } return trace.Wrap(err) - }) + } + + // Backoff and retry for up to 1 second. + retryCtx, cancel := context.WithTimeout(ctx, time.Second) + defer cancel() + + err = linearRetry.For(retryCtx, tryConnect) + + // Using PIV synchronously causes issues since only one connection is allowed at a time. + // This shouldn't be an issue for `tsh` which primarily runs consecutively, but Teleport + // Connect works through callbacks, etc. and may try to open multiple connections at a time. + // If this error is being emitted more than rarely, the 1 second timeout may need to be increased. + // + // It's also possible that the user is running another PIV program, which may hold the PIV + // connection indefinitely (yubikey-agent). In this case, user action is necessary, so we + // alert them with this issue. if trace.IsLimitExceeded(err) { - // Using PIV synchronously causes issues since only one connection is allowed at a time. - // This shouldn't be an issue for `tsh` which primarily runs consecutively, but Teleport - // Connect works through callbacks, etc. and may try to open multiple connections at a time. - // If this error is being emitted more than rarely, the 1 second timeout may need to be increased. - // - // It's also possible that the user is running another PIV program, which may hold the PIV - // connection indefinitely (yubikey-agent). In this case, user action is necessary, so we - // alert them with this issue. - return nil, trace.LimitExceeded("could not connect to YubiKey as another application is using it. Please try again once the program that uses the YubiKey, such as yubikey-agent is closed") - } else if err != nil { - return nil, trace.Wrap(err) + slog.WarnContext(ctx, "failed to connect to YubiKey as it is currently in use by another process. "+ + "This can occur when running multiple Teleport clients simultaneously, or running long lived PIV "+ + "applications like yubikey-agent. Try again once other PIV processes have completed.") + return trace.LimitExceeded("failed to connect to YubiKey as it is currently in use by another process") } - c.activeConnections++ - return release, nil + return trace.Wrap(err) } func (c *sharedPIVConnection) privateKey(slot piv.Slot, public crypto.PublicKey, auth piv.KeyAuth) (crypto.PrivateKey, error) { - release, err := c.connect() - if err != nil { - return nil, trace.Wrap(err) - } - defer release() - privateKey, err := c.conn.PrivateKey(slot, public, auth) - return privateKey, trace.Wrap(err) + return doWithSharedConn(c, func(yk *piv.YubiKey) (crypto.PrivateKey, error) { + priv, err := yk.PrivateKey(slot, public, auth) + return priv, trace.Wrap(err) + }) } func (c *sharedPIVConnection) getSerialNumber() (uint32, error) { - release, err := c.connect() - if err != nil { - return 0, trace.Wrap(err) - } - defer release() - serial, err := c.conn.Serial() - return serial, trace.Wrap(err) + return doWithSharedConn(c, func(yk *piv.YubiKey) (uint32, error) { + serial, err := yk.Serial() + return serial, trace.Wrap(err) + }) } func (c *sharedPIVConnection) getVersion() (piv.Version, error) { - release, err := c.connect() - if err != nil { - return piv.Version{}, trace.Wrap(err) - } - defer release() - return c.conn.Version(), nil + return doWithSharedConn(c, func(yk *piv.YubiKey) (piv.Version, error) { + return yk.Version(), nil + }) } func (c *sharedPIVConnection) reset() error { - release, err := c.connect() - if err != nil { - return trace.Wrap(err) - } - defer release() - return trace.Wrap(c.conn.Reset()) + _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { + err := yk.Reset() + return nil, trace.Wrap(err) + }) + return trace.Wrap(err) } func (c *sharedPIVConnection) setCertificate(key [24]byte, slot piv.Slot, cert *x509.Certificate) error { - release, err := c.connect() - if err != nil { - return trace.Wrap(err) - } - defer release() - return trace.Wrap(c.conn.SetCertificate(key, slot, cert)) + _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { + err := yk.SetCertificate(key, slot, cert) + return nil, trace.Wrap(err) + }) + return trace.Wrap(err) } func (c *sharedPIVConnection) certificate(slot piv.Slot) (*x509.Certificate, error) { - release, err := c.connect() - if err != nil { - return nil, trace.Wrap(err) - } - defer release() - cert, err := c.conn.Certificate(slot) - return cert, trace.Wrap(err) + return doWithSharedConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { + cert, err := yk.Certificate(slot) + return cert, trace.Wrap(err) + }) } func (c *sharedPIVConnection) generateKey(key [24]byte, slot piv.Slot, opts piv.Key) (crypto.PublicKey, error) { - release, err := c.connect() - if err != nil { - return nil, trace.Wrap(err) - } - defer release() - pubKey, err := c.conn.GenerateKey(key, slot, opts) - return pubKey, trace.Wrap(err) + return doWithSharedConn(c, func(yk *piv.YubiKey) (crypto.PublicKey, error) { + pub, err := yk.GenerateKey(key, slot, opts) + return pub, trace.Wrap(err) + }) } func (c *sharedPIVConnection) attest(slot piv.Slot) (*x509.Certificate, error) { - release, err := c.connect() - if err != nil { - return nil, trace.Wrap(err) - } - defer release() - cert, err := c.conn.Attest(slot) - return cert, trace.Wrap(err) + return doWithSharedConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { + cert, err := yk.Attest(slot) + return cert, trace.Wrap(err) + }) } func (c *sharedPIVConnection) attestationCertificate() (*x509.Certificate, error) { - release, err := c.connect() - if err != nil { - return nil, trace.Wrap(err) - } - defer release() - cert, err := c.conn.AttestationCertificate() - return cert, trace.Wrap(err) + return doWithSharedConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { + cert, err := yk.AttestationCertificate() + return cert, trace.Wrap(err) + }) } func (c *sharedPIVConnection) setPIN(oldPIN string, newPIN string) error { - release, err := c.connect() - if err != nil { - return trace.Wrap(err) - } - defer release() - return trace.Wrap(c.conn.SetPIN(oldPIN, newPIN)) + _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { + err := yk.SetPIN(oldPIN, newPIN) + return nil, trace.Wrap(err) + }) + return trace.Wrap(err) } func (c *sharedPIVConnection) setPUK(oldPUK string, newPUK string) error { - release, err := c.connect() - if err != nil { - return trace.Wrap(err) - } - defer release() - return trace.Wrap(c.conn.SetPUK(oldPUK, newPUK)) + _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { + err := yk.SetPUK(oldPUK, newPUK) + return nil, trace.Wrap(err) + }) + return trace.Wrap(err) } func (c *sharedPIVConnection) unblock(puk string, newPIN string) error { - release, err := c.connect() - if err != nil { - return trace.Wrap(err) - } - defer release() - return trace.Wrap(c.conn.Unblock(puk, newPIN)) + _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { + err := yk.Unblock(puk, newPIN) + return nil, trace.Wrap(err) + }) + return trace.Wrap(err) } func (c *sharedPIVConnection) verifyPIN(pin string) error { - release, err := c.connect() - if err != nil { - return trace.Wrap(err) - } - defer release() - return trace.Wrap(c.conn.VerifyPIN(pin)) -} - -func isRetryError(err error) bool { - const retryError = "connecting to smart card: the smart card cannot be accessed because of other connections outstanding" - return strings.Contains(err.Error(), retryError) + _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { + err := yk.VerifyPIN(pin) + return nil, trace.Wrap(err) + }) + return trace.Wrap(err) } func parsePIVSlot(slotKey hardwarekey.PIVSlotKey) (piv.Slot, error) { From 01693e82708f993940e5e7f45bf12337375dc726 Mon Sep 17 00:00:00 2001 From: joerger Date: Tue, 13 May 2025 17:47:34 -0700 Subject: [PATCH 02/14] Refactor signature method into organized parts; Move signPromptMu further down the callstack to allow better concurrent access; Add attestMu to prevent certificate corruption. --- api/utils/keys/piv/service.go | 29 +++- api/utils/keys/piv/yubikey.go | 319 ++++++++++++++++++---------------- 2 files changed, 200 insertions(+), 148 deletions(-) diff --git a/api/utils/keys/piv/service.go b/api/utils/keys/piv/service.go index 8f2f09b81cfed..8ba0a6453eb08 100644 --- a/api/utils/keys/piv/service.go +++ b/api/utils/keys/piv/service.go @@ -188,10 +188,33 @@ func (s *YubiKeyService) Sign(ctx context.Context, ref *hardwarekey.PrivateKeyRe return nil, trace.Wrap(err) } - s.signMu.Lock() - defer s.signMu.Unlock() + pivSlot, err := parsePIVSlot(ref.SlotKey) + if err != nil { + return nil, trace.Wrap(err) + } + + // Check that the public key in the slot matches our record. + publicKey, err := y.getPublicKey(pivSlot) + if err != nil { + return nil, trace.Wrap(err) + } + + if !publicKey.Equal(ref.PublicKey) { + return nil, trace.CompareFailed("public key mismatch on PIV slot 0x%x", pivSlot.Key) + } + + // If the sign request is for an unknown agent key, ensure that the requested PIV slot was + // configured with a self-signed Teleport metadata certificate. + if keyInfo.AgentKeyInfo.UnknownAgentKey { + switch err := y.checkCertificate(pivSlot); { + case trace.IsNotFound(err), errors.As(err, &nonTeleportCertError{}): + return nil, trace.Wrap(err, agentRequiresTeleportCertMessage) + case err != nil: + return nil, trace.Wrap(err) + } + } - return y.sign(ctx, ref, keyInfo, s.getPrompt(), rand, digest, opts) + return y.signWithRetry(ctx, ref, keyInfo, s.getPrompt(), rand, digest, opts) } // TODO(Joerger): Re-attesting the key every time we decode a hardware key signer is very resource diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index f4c401216100f..dc27c469e9a51 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -62,6 +62,11 @@ type YubiKey struct { version piv.Version // pinCache can be used to skip PIN prompts for keys that have PIN caching enabled. pinCache *pinCache + + // signPromptMu prevents prompting for PIN/touch repeatedly for concurrent signatures. + // TODO(Joerger): Rather than preventing concurrent signatures, we can make the + // PIN and touch prompts durable to concurrent signatures. + signPromptMu sync.Mutex } // FindYubiKey finds a YubiKey PIV card by serial number. If the provided @@ -149,168 +154,115 @@ const ( signTouchPromptDelay = time.Millisecond * 200 ) -func (y *YubiKey) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { - pivSlot, err := parsePIVSlot(ref.SlotKey) - if err != nil { - return nil, trace.Wrap(err) - } +const ( + // For generic auth errors, such as when PIN is not provided, the smart card returns the error code 0x6982. + // The piv-go library wraps error codes like this with a user readable message: "security status not satisfied". + pivGenericAuthErrCodeString = "6982" - // Check that the public key in the slot matches our record. - slotCert, err := y.conn.attest(pivSlot) - if err != nil { - return nil, trace.Wrap(err) - } - type cryptoPublicKeyI interface { - Equal(x crypto.PublicKey) bool + // If a pcsc transaction is closed by the pcsc daemon, all operations will result in the SCARD_W_RESET_CARD error code, + // which the piv-go library replaces with the following error message. This error can be handled by starting a new + // transactions or reconnecting. + // + // See https://github.com/go-piv/piv-go/pull/173 for more details. + // + // TODO(Joerger): Once ^ is merged and released upstream, remove this adhoc retry. + pcscResetCardErrMessage = "the smart card has been reset, so any shared state information is invalid" +) + +func (y *YubiKey) signWithRetry(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { + pinPolicy := piv.PINPolicyNever + if ref.Policy.PINRequired { + pinPolicy = piv.PINPolicyOnce } - if slotPub, ok := slotCert.PublicKey.(cryptoPublicKeyI); !ok { - return nil, trace.BadParameter("expected crypto.PublicKey but got %T", slotCert.PublicKey) - } else if !slotPub.Equal(ref.PublicKey) { - return nil, trace.CompareFailed("public key mismatch on PIV slot 0x%x", pivSlot.Key) + + // YubiKeys with firmware version 5.3.1 have a bug where insVerify(0x20, 0x00, 0x80, nil) + // clears the PIN cache instead of performing a non-mutable check. This causes the signature + // with pin policy "once" to fail unless PIN is provided for each call. We can avoid this bug + // by skipping the insVerify check and instead manually retrying with a PIN prompt only when + // the signature fails. + fw531 := piv.Version{Major: 5, Minor: 3, Patch: 1} + if pinPolicy == piv.PINPolicyOnce && y.version == fw531 { + // Set the keys PIN policy to never to skip the insVerify check. If PIN was provided in + // a previous recent call, the signature will succeed as expected of the "once" policy. + pinPolicy = piv.PINPolicyNever } - // If the sign request is for an unknown agent key, ensure that the requested PIV slot was - // configured with a self-signed Teleport metadata certificate. - if keyInfo.AgentKeyInfo.UnknownAgentKey { - switch err := y.checkCertificate(pivSlot); { - case trace.IsNotFound(err), errors.As(err, &nonTeleportCertError{}): - return nil, trace.Wrap(err, agentRequiresTeleportCertMessage) - case err != nil: - return nil, trace.Wrap(err) - } + signature, err := y.sign(ctx, ref, keyInfo, prompt, pinPolicy, rand, digest, opts) + switch { + case err == nil: + return signature, nil + case strings.Contains(err.Error(), pivGenericAuthErrCodeString): + // Force the signature to prompt for PIN, as something likely went wrong for the + // card to skip PIN verification in the first place (e.g. firmware bug). + pinPolicy = piv.PINPolicyAlways + return y.sign(ctx, ref, keyInfo, prompt, pinPolicy, rand, digest, opts) + case strings.Contains(err.Error(), pcscResetCardErrMessage): + // Usually this error occurs on Windows, which times out exclusive transactions after 5 seconds without any activity, + // giving users only 5 seconds to answer PIN prompts. The PIN should now be cached locally, so we simply retry. + return y.sign(ctx, ref, keyInfo, prompt, pinPolicy, rand, digest, opts) + default: + return nil, trace.Wrap(err) } +} +func (y *YubiKey) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, pinPolicy piv.PINPolicy, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { ctx, cancel := context.WithCancelCause(ctx) defer cancel(nil) - // Lock the connection for the entire duration of the sign - // process. Without this, the connection will be released, - // leading to a failure when providing PIN or touch input: - // "verify pin: transmitting request: the supplied handle was invalid". - return doWithSharedConn(y.conn, func(yk *piv.YubiKey) ([]byte, error) { - var touchPromptDelayTimer *time.Timer - if ref.Policy.TouchRequired { - touchPromptDelayTimer = time.NewTimer(signTouchPromptDelay) - defer touchPromptDelayTimer.Stop() - - go func() { - select { - case <-touchPromptDelayTimer.C: - // Prompt for touch after a delay, in case the function succeeds without touch due to a cached touch. - err := prompt.Touch(ctx, keyInfo) - if err != nil { - // Cancel the entire function when an error occurs. - // This is typically used for aborting the prompt. - cancel(trace.Wrap(err)) - } - return - case <-ctx.Done(): - // touch cached, skip prompt. - return - } - }() - } + promptPIN := func() (string, error) { + return y.promptPIN(ctx, prompt, hardwarekey.PINRequired, keyInfo, ref.PINCacheTTL) + } - promptPIN := func() (string, error) { - // touch prompt delay is disrupted by pin prompts. To prevent misfired - // touch prompts, pause the timer for the duration of the pin prompt. + auth := piv.KeyAuth{ + PINPrompt: promptPIN, + PINPolicy: pinPolicy, + } + + y.signPromptMu.Lock() + defer y.signPromptMu.Unlock() + + var touchPromptDelayTimer *time.Timer + if ref.Policy.TouchRequired { + // touch prompt delay is disrupted by pin prompts. To prevent misfired + // touch prompts, pause the timer for the duration of the pin prompt. + // + // TODO(Joerger): Once https://github.com/go-piv/piv-go/pull/174 is merged and we can verify PIN + // before even attempting to sign, this timer logic can be removed in favor of simple time.After. + touchPromptDelayTimer = time.NewTimer(signTouchPromptDelay) + defer touchPromptDelayTimer.Stop() + + auth.PINPrompt = func() (pin string, err error) { if touchPromptDelayTimer != nil { if touchPromptDelayTimer.Stop() { + // TODO(Joerger): Does the pin prompt delay the touch prompt even more than this? defer touchPromptDelayTimer.Reset(signTouchPromptDelay) } } - return y.promptPIN(ctx, prompt, hardwarekey.PINRequired, keyInfo, ref.PINCacheTTL) - } - - pinPolicy := piv.PINPolicyNever - if ref.Policy.PINRequired { - pinPolicy = piv.PINPolicyOnce - } - - auth := piv.KeyAuth{ - PINPrompt: promptPIN, - PINPolicy: pinPolicy, + return promptPIN() } - // YubiKeys with firmware version 5.3.1 have a bug where insVerify(0x20, 0x00, 0x80, nil) - // clears the PIN cache instead of performing a non-mutable check. This causes the signature - // with pin policy "once" to fail unless PIN is provided for each call. We can avoid this bug - // by skipping the insVerify check and instead manually retrying with a PIN prompt only when - // the signature fails. - manualRetryWithPIN := false - fw531 := piv.Version{Major: 5, Minor: 3, Patch: 1} - if auth.PINPolicy == piv.PINPolicyOnce && y.version == fw531 { - // Set the keys PIN policy to never to skip the insVerify check. If PIN was provided in - // a previous recent call, the signature will succeed as expected of the "once" policy. - auth.PINPolicy = piv.PINPolicyNever - manualRetryWithPIN = true - } - - privateKey, err := y.conn.privateKey(pivSlot, ref.PublicKey, auth) - if err != nil { - return nil, trace.Wrap(err) - } - - signer, ok := privateKey.(crypto.Signer) - if !ok { - return nil, trace.BadParameter("private key type %T does not implement crypto.Signer", privateKey) - } - - // For generic auth errors, such as when PIN is not provided, the smart card returns the error code 0x6982. - // The piv-go library wraps error codes like this with a user readable message: "security status not satisfied". - const pivGenericAuthErrCodeString = "6982" - - signature, err := abandonableSign(ctx, signer, rand, digest, opts) - switch { - case err == nil: - return signature, nil - case manualRetryWithPIN && strings.Contains(err.Error(), pivGenericAuthErrCodeString): - pin, err := promptPIN() - if err != nil { - return nil, trace.Wrap(err) - } - if err := y.conn.verifyPIN(pin); err != nil { - return nil, trace.Wrap(err) + // There is no built in mechanism to prompt for touch on demand, so we simply prompt for touch after + // a short duration in hopes of lining up with the actual YubiKey touch prompt (flashing key). In the + // case where touch is cached, the delay prevents the prompt from firing when it isn't needed. + go func() { + select { + case <-touchPromptDelayTimer.C: + err := prompt.Touch(ctx, keyInfo) + if err != nil { + // Cancel the entire function when an error occurs. + // This is typically used for aborting the prompt. + cancel(trace.Wrap(err)) + } + return + case <-ctx.Done(): + // touch cached, skip prompt. + return } - signature, err := abandonableSign(ctx, signer, rand, digest, opts) - return signature, trace.Wrap(err) - default: - return nil, trace.Wrap(err) - } - }) -} - -// abandonableSign is a wrapper around signer.Sign. -// It enhances the functionality of signer.Sign by allowing the caller to stop -// waiting for the result if the provided context is canceled. -// It is especially important for WarmupHardwareKey, -// where waiting for the user providing a PIN/touch could block program termination. -// Important: this function only abandons the signer.Sign result, doesn't cancel it. -func abandonableSign(ctx context.Context, signer crypto.Signer, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { - type signResult struct { - signature []byte - err error + }() } - signResultCh := make(chan signResult) - go func() { - if err := ctx.Err(); err != nil { - return - } - signature, err := signer.Sign(rand, digest, opts) - select { - case <-ctx.Done(): - case signResultCh <- signResult{signature: signature, err: trace.Wrap(err)}: - } - }() - - select { - case <-ctx.Done(): - return nil, ctx.Err() - case result := <-signResultCh: - return result.signature, trace.Wrap(result.err) - } + return y.conn.sign(ctx, ref, auth, rand, digest, opts) } // Reset resets the YubiKey PIV module to default settings. @@ -405,6 +357,25 @@ func (y *YubiKey) checkCertificate(slot piv.Slot) error { return nil } +type cryptoPublicKeyI interface { + Equal(x crypto.PublicKey) bool +} + +// getPublicKey gets a public key from the given PIV slot. +func (y *YubiKey) getPublicKey(slot piv.Slot) (cryptoPublicKeyI, error) { + slotCert, err := y.conn.attest(slot) + if err != nil { + return nil, trace.Wrap(err, "failed to get slot cert on PIV slot 0x%x", slot.Key) + } + + slotPub, ok := slotCert.PublicKey.(cryptoPublicKeyI) + if !ok { + return nil, trace.BadParameter("expected crypto.PublicKey but got %T", slotCert.PublicKey) + } + + return slotPub, nil +} + // attestKey attests the key in the given PIV slot. // The key's public key can be found in the returned slotCert. func (y *YubiKey) attestKey(slot piv.Slot) (slotCert *x509.Certificate, attCert *x509.Certificate, att *piv.Attestation, err error) { @@ -565,6 +536,10 @@ type sharedPIVConnection struct { // connHolds is the number of active holds on the connection. It should be modified under // connMu.RLock to ensure a new hold isn't added for a closing connection. connHolds atomic.Int32 + + // attestMu prevents signatures from occurring concurrently with an attestation + // request, which would corrupt the resulting certificate. + attestMu sync.RWMutex } func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, error)) (T, error) { @@ -691,13 +666,64 @@ func (c *sharedPIVConnection) connect() error { return trace.Wrap(err) } -func (c *sharedPIVConnection) privateKey(slot piv.Slot, public crypto.PublicKey, auth piv.KeyAuth) (crypto.PrivateKey, error) { - return doWithSharedConn(c, func(yk *piv.YubiKey) (crypto.PrivateKey, error) { - priv, err := yk.PrivateKey(slot, public, auth) - return priv, trace.Wrap(err) +func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, auth piv.KeyAuth, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { + pivSlot, err := parsePIVSlot(ref.SlotKey) + if err != nil { + return nil, trace.Wrap(err) + } + + c.attestMu.RLock() + defer c.attestMu.RUnlock() + + return doWithSharedConn(c, func(yk *piv.YubiKey) ([]byte, error) { + // Prepare the key and perform the signature with the same connection. + // Closing the connection in between breaks the underlying PIV handle. + priv, err := yk.PrivateKey(pivSlot, ref.PublicKey, auth) + if err != nil { + return nil, trace.Wrap(err) + } + + signer, ok := priv.(crypto.Signer) + if !ok { + return nil, trace.BadParameter("private key type %T does not implement crypto.Signer", priv) + } + + return abandonableSign(ctx, signer, rand, digest, opts) }) } +// abandonableSign is a wrapper around signer.Sign. +// It enhances the functionality of signer.Sign by allowing the caller to stop +// waiting for the result if the provided context is canceled. +// It is especially important for WarmupHardwareKey, +// where waiting for the user providing a PIN/touch could block program termination. +// Important: this function only abandons the signer.Sign result, doesn't cancel it. +func abandonableSign(ctx context.Context, signer crypto.Signer, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { + type signResult struct { + signature []byte + err error + } + + signResultCh := make(chan signResult) + go func() { + if err := ctx.Err(); err != nil { + return + } + signature, err := signer.Sign(rand, digest, opts) + select { + case <-ctx.Done(): + case signResultCh <- signResult{signature: signature, err: trace.Wrap(err)}: + } + }() + + select { + case <-ctx.Done(): + return nil, ctx.Err() + case result := <-signResultCh: + return result.signature, trace.Wrap(result.err) + } +} + func (c *sharedPIVConnection) getSerialNumber() (uint32, error) { return doWithSharedConn(c, func(yk *piv.YubiKey) (uint32, error) { serial, err := yk.Serial() @@ -742,6 +768,9 @@ func (c *sharedPIVConnection) generateKey(key [24]byte, slot piv.Slot, opts piv. } func (c *sharedPIVConnection) attest(slot piv.Slot) (*x509.Certificate, error) { + c.attestMu.Lock() + defer c.attestMu.Unlock() + return doWithSharedConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { cert, err := yk.Attest(slot) return cert, trace.Wrap(err) From 8315252427fa040342d145ab42d67d194c88a657 Mon Sep 17 00:00:00 2001 From: joerger Date: Tue, 13 May 2025 19:10:36 -0700 Subject: [PATCH 03/14] Handle PIV reconnection within doWithSharedConn. --- api/utils/keys/piv/yubikey.go | 101 ++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 36 deletions(-) diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index dc27c469e9a51..1748389e7b7a5 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -196,10 +196,6 @@ func (y *YubiKey) signWithRetry(ctx context.Context, ref *hardwarekey.PrivateKey // card to skip PIN verification in the first place (e.g. firmware bug). pinPolicy = piv.PINPolicyAlways return y.sign(ctx, ref, keyInfo, prompt, pinPolicy, rand, digest, opts) - case strings.Contains(err.Error(), pcscResetCardErrMessage): - // Usually this error occurs on Windows, which times out exclusive transactions after 5 seconds without any activity, - // giving users only 5 seconds to answer PIN prompts. The PIN should now be cached locally, so we simply retry. - return y.sign(ctx, ref, keyInfo, prompt, pinPolicy, rand, digest, opts) default: return nil, trace.Wrap(err) } @@ -527,7 +523,8 @@ type sharedPIVConnection struct { // This value may change between OS's, or with other system changes. card string - // conn is a shared PIV connection. + // conn is a shared PIV connection. This connection is only guaranteed to be non-nil + // and concurrency safe within a call to [doWithSharedConn]. conn *piv.YubiKey // connMu is a RW mutex that protects conn. The conn is only opened/closed while under a full // lock, meaning that multiple callers can utilize the connection concurrently while under a @@ -536,50 +533,64 @@ type sharedPIVConnection struct { // connHolds is the number of active holds on the connection. It should be modified under // connMu.RLock to ensure a new hold isn't added for a closing connection. connHolds atomic.Int32 + // connHealthy signals whether the connection is healthy or needs to be reconnected. + connHealthy atomic.Bool // attestMu prevents signatures from occurring concurrently with an attestation // request, which would corrupt the resulting certificate. attestMu sync.RWMutex } +// doWithSharedConn holds a shared connection to perform the given function. func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, error)) (T, error) { nilT := *new(T) - conn, err := c.holdConn() - if err != nil { + c.connMu.RLock() + defer c.connMu.RUnlock() + + if err := c.holdConn(); err != nil { return nilT, trace.Wrap(err) } defer c.releaseConn() - // TODO: retry on pcsc reset err. - return do(conn) + t, err := do(c.conn) + + // Usually this error occurs on Windows, which times out exclusive transactions after 5 seconds without any activity, + // giving users only 5 seconds to answer PIN prompts. The PIN should now be cached locally, so we simply retry. + if err != nil && strings.Contains(err.Error(), pcscResetCardErrMessage) { + if err := c.reconnect(); err != nil { + return nilT, trace.Wrap(err) + } + + t, err = do(c.conn) + } + + return t, trace.Wrap(err) } // holdConn holds an existing shared connection, or opens and holds a new shared connection. // Unless holdConn returns an error, it must be followed by a call to releaseConn to ensure // the connection is closed once there are no remaining holds. -func (c *sharedPIVConnection) holdConn() (*piv.YubiKey, error) { - c.connMu.RLock() - defer c.connMu.RUnlock() - - if c.conn == nil { +// +// Must be called under [sharedPIVConnection.connMu.RLock]. +func (c *sharedPIVConnection) holdConn() error { + if c.conn == nil || !c.connHealthy.Load() { c.connMu.RUnlock() if err := c.connect(); err != nil { - return nil, trace.Wrap(err) + return trace.Wrap(err) } c.connMu.RLock() } c.connHolds.Add(1) - return c.conn, nil + return nil } // releaseConn releases a hold on a shared connection and, // if there are no remaining holds, closes the connection. +// +// Must be called under [sharedPIVConnection.connMu.RLock]. func (c *sharedPIVConnection) releaseConn() { - c.connMu.RLock() - defer c.connMu.RUnlock() - remaining := c.connHolds.Add(-1) // If there are no remaining holds on the connection, close it. @@ -598,6 +609,21 @@ func (c *sharedPIVConnection) releaseConn() { } } +// reconnect marks the connection as unhealthy and waits for a new connection. +// A new connection will not be created until all consumers of the unhealthy +// connection complete. reconnect supports multiple concurrent callers. +// +// Must be called under [sharedPIVConnection.connMu.RLock]. +func (c *sharedPIVConnection) reconnect() error { + // Prevent new callers from holding the unhealthy connection. + c.connHealthy.Store(false) + + c.connMu.RUnlock() + defer c.connMu.RLock() + + return c.connect() +} + // connect establishes a connection to a YubiKey PIV module and returns a release function. // The release function should be called to properly close the shared connection. // The connection is not immediately terminated, allowing other callers to @@ -608,8 +634,8 @@ func (c *sharedPIVConnection) connect() error { c.connMu.Lock() defer c.connMu.Unlock() - // Check if another goroutine was able to open a connection first. - if c.conn != nil { + // Check if there is an existing, healthy connection. + if c.conn != nil && c.connHealthy.Load() { return nil } @@ -646,24 +672,27 @@ func (c *sharedPIVConnection) connect() error { retryCtx, cancel := context.WithTimeout(ctx, time.Second) defer cancel() - err = linearRetry.For(retryCtx, tryConnect) + if err = linearRetry.For(retryCtx, tryConnect); err != nil { + // Using PIV synchronously causes issues since only one connection is allowed at a time. + // This shouldn't be an issue for `tsh` which primarily runs consecutively, but Teleport + // Connect works through callbacks, etc. and may try to open multiple connections at a time. + // If this error is being emitted more than rarely, the 1 second timeout may need to be increased. + // + // It's also possible that the user is running another PIV program, which may hold the PIV + // connection indefinitely (yubikey-agent). In this case, user action is necessary, so we + // alert them with this issue. + if trace.IsLimitExceeded(err) { + slog.WarnContext(ctx, "failed to connect to YubiKey as it is currently in use by another process. "+ + "This can occur when running multiple Teleport clients simultaneously, or running long lived PIV "+ + "applications like yubikey-agent. Try again once other PIV processes have completed.") + return trace.LimitExceeded("failed to connect to YubiKey as it is currently in use by another process") + } - // Using PIV synchronously causes issues since only one connection is allowed at a time. - // This shouldn't be an issue for `tsh` which primarily runs consecutively, but Teleport - // Connect works through callbacks, etc. and may try to open multiple connections at a time. - // If this error is being emitted more than rarely, the 1 second timeout may need to be increased. - // - // It's also possible that the user is running another PIV program, which may hold the PIV - // connection indefinitely (yubikey-agent). In this case, user action is necessary, so we - // alert them with this issue. - if trace.IsLimitExceeded(err) { - slog.WarnContext(ctx, "failed to connect to YubiKey as it is currently in use by another process. "+ - "This can occur when running multiple Teleport clients simultaneously, or running long lived PIV "+ - "applications like yubikey-agent. Try again once other PIV processes have completed.") - return trace.LimitExceeded("failed to connect to YubiKey as it is currently in use by another process") + return trace.Wrap(err) } - return trace.Wrap(err) + c.connHealthy.Store(true) + return nil } func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, auth piv.KeyAuth, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { From 1157cef26afc4d729ff05d052ee70b1bebfa8dc9 Mon Sep 17 00:00:00 2001 From: joerger Date: Tue, 13 May 2025 19:38:32 -0700 Subject: [PATCH 04/14] Add test; Fix issues. --- api/utils/keys/piv/service_test.go | 43 ++++++++++++++++++++++++++++++ api/utils/keys/piv/yubikey.go | 3 ++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/api/utils/keys/piv/service_test.go b/api/utils/keys/piv/service_test.go index 981707470a6d2..471a42842f7ab 100644 --- a/api/utils/keys/piv/service_test.go +++ b/api/utils/keys/piv/service_test.go @@ -22,11 +22,13 @@ import ( "crypto/x509/pkix" "fmt" "os" + "sync" "testing" "time" pivgo "github.com/go-piv/piv-go/piv" "github.com/gravitational/trace" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/gravitational/teleport/api/utils/keys" @@ -291,6 +293,47 @@ func TestPINCaching(t *testing.T) { require.Error(t, err) } +func TestConcurrentSignature(t *testing.T) { + // This test will overwrite any PIV data on the yubiKey. + if os.Getenv("TELEPORT_TEST_YUBIKEY_PIV") == "" { + t.Skipf("Skipping TestGenerateYubiKeyPrivateKey because TELEPORT_TEST_YUBIKEY_PIV is not set") + } + + ctx := context.Background() + promptReader := prompt.NewFakeReader() + prompt := hardwarekey.NewCLIPrompt(os.Stderr, promptReader) + s := piv.NewYubiKeyService(prompt) + + y, err := piv.FindYubiKey(0) + require.NoError(t, err) + + resetYubikey(t, y) + t.Cleanup(func() { resetYubikey(t, y) }) + + // Set pin. + const testPIN = "123123" + require.NoError(t, y.SetPIN(pivgo.DefaultPIN, testPIN)) + + promptReader.AddString(testPIN) + priv, err := keys.NewHardwarePrivateKey(ctx, s, hardwarekey.PrivateKeyConfig{ + // Use PIN policy to slow down the signatures a bit so that they are concurrent. + Policy: hardwarekey.PromptPolicyPIN, + }) + require.NoError(t, err) + + var wg sync.WaitGroup + for range 5 { + wg.Add(1) + go func() { + defer wg.Done() + err = priv.WarmupHardwareKey(ctx) + assert.NoError(t, err) + }() + } + + wg.Wait() +} + // resetYubikey connects to the first yubiKey and resets it to defaults. func resetYubikey(t *testing.T, y *piv.YubiKey) { t.Helper() diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index 1748389e7b7a5..6faa718e59319 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -577,6 +577,7 @@ func (c *sharedPIVConnection) holdConn() error { if c.conn == nil || !c.connHealthy.Load() { c.connMu.RUnlock() if err := c.connect(); err != nil { + c.connMu.RLock() return trace.Wrap(err) } c.connMu.RLock() @@ -602,7 +603,7 @@ func (c *sharedPIVConnection) releaseConn() { defer c.connMu.Unlock() // Double check that a new hold wasn't added while waiting for the full lock. - if c.connHolds.Load() != 0 { + if c.connHolds.Load() == 0 { c.conn.Close() c.conn = nil } From 2495d0003b0473871a0a4225651b8b4f72103812 Mon Sep 17 00:00:00 2001 From: joerger Date: Tue, 13 May 2025 19:51:31 -0700 Subject: [PATCH 05/14] Add debug log. --- api/utils/keys/piv/yubikey.go | 1 + 1 file changed, 1 insertion(+) diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index 6faa718e59319..3a55a66a8e985 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -558,6 +558,7 @@ func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, e // Usually this error occurs on Windows, which times out exclusive transactions after 5 seconds without any activity, // giving users only 5 seconds to answer PIN prompts. The PIN should now be cached locally, so we simply retry. if err != nil && strings.Contains(err.Error(), pcscResetCardErrMessage) { + slog.DebugContext(context.Background(), "smart card connection timed out, reconnecting", "error", err) if err := c.reconnect(); err != nil { return nilT, trace.Wrap(err) } From 670ce8fe5f5be59071783e66dbe6b8d393b14d3e Mon Sep 17 00:00:00 2001 From: joerger Date: Wed, 14 May 2025 11:12:39 -0700 Subject: [PATCH 06/14] Clean up locks. --- api/utils/keys/piv/yubikey.go | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index 3a55a66a8e985..0f5d184d89bee 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -576,12 +576,9 @@ func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, e // Must be called under [sharedPIVConnection.connMu.RLock]. func (c *sharedPIVConnection) holdConn() error { if c.conn == nil || !c.connHealthy.Load() { - c.connMu.RUnlock() if err := c.connect(); err != nil { - c.connMu.RLock() return trace.Wrap(err) } - c.connMu.RLock() } c.connHolds.Add(1) @@ -598,16 +595,16 @@ func (c *sharedPIVConnection) releaseConn() { // If there are no remaining holds on the connection, close it. if remaining == 0 { c.connMu.RUnlock() - defer c.connMu.RLock() - c.connMu.Lock() - defer c.connMu.Unlock() // Double check that a new hold wasn't added while waiting for the full lock. if c.connHolds.Load() == 0 { c.conn.Close() c.conn = nil } + + c.connMu.Unlock() + c.connMu.RLock() } } @@ -619,10 +616,6 @@ func (c *sharedPIVConnection) releaseConn() { func (c *sharedPIVConnection) reconnect() error { // Prevent new callers from holding the unhealthy connection. c.connHealthy.Store(false) - - c.connMu.RUnlock() - defer c.connMu.RLock() - return c.connect() } @@ -632,8 +625,12 @@ func (c *sharedPIVConnection) reconnect() error { // use it before it's released. // The YubiKey PIV module itself takes some additional time to handle closed // connections, so we use a retry loop to give the PIV module time to close prior connections. +// +// Must be called under [sharedPIVConnection.connMu.RLock]. func (c *sharedPIVConnection) connect() error { + c.connMu.RUnlock() c.connMu.Lock() + defer c.connMu.RLock() defer c.connMu.Unlock() // Check if there is an existing, healthy connection. From 0633cfdc08dc1ebf1d0a4ecccb578545d58f8d05 Mon Sep 17 00:00:00 2001 From: joerger Date: Wed, 14 May 2025 19:05:48 -0700 Subject: [PATCH 07/14] Only prompt PIN on retry; Clean up prompt logic. --- api/utils/keys/piv/service.go | 2 +- api/utils/keys/piv/yubikey.go | 141 ++++++++++++++++------------------ 2 files changed, 69 insertions(+), 74 deletions(-) diff --git a/api/utils/keys/piv/service.go b/api/utils/keys/piv/service.go index 8ba0a6453eb08..0e2926d45f389 100644 --- a/api/utils/keys/piv/service.go +++ b/api/utils/keys/piv/service.go @@ -214,7 +214,7 @@ func (s *YubiKeyService) Sign(ctx context.Context, ref *hardwarekey.PrivateKeyRe } } - return y.signWithRetry(ctx, ref, keyInfo, s.getPrompt(), rand, digest, opts) + return y.signWithPINRetry(ctx, ref, keyInfo, s.getPrompt(), rand, digest, opts) } // TODO(Joerger): Re-attesting the key every time we decode a hardware key signer is very resource diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index 0f5d184d89bee..ef1480b1a32fa 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -63,10 +63,10 @@ type YubiKey struct { // pinCache can be used to skip PIN prompts for keys that have PIN caching enabled. pinCache *pinCache - // signPromptMu prevents prompting for PIN/touch repeatedly for concurrent signatures. + // promptMu prevents prompting for PIN/touch repeatedly for concurrent signatures. // TODO(Joerger): Rather than preventing concurrent signatures, we can make the // PIN and touch prompts durable to concurrent signatures. - signPromptMu sync.Mutex + promptMu sync.Mutex } // FindYubiKey finds a YubiKey PIV card by serial number. If the provided @@ -169,96 +169,87 @@ const ( pcscResetCardErrMessage = "the smart card has been reset, so any shared state information is invalid" ) -func (y *YubiKey) signWithRetry(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { - pinPolicy := piv.PINPolicyNever - if ref.Policy.PINRequired { - pinPolicy = piv.PINPolicyOnce - } - - // YubiKeys with firmware version 5.3.1 have a bug where insVerify(0x20, 0x00, 0x80, nil) - // clears the PIN cache instead of performing a non-mutable check. This causes the signature - // with pin policy "once" to fail unless PIN is provided for each call. We can avoid this bug - // by skipping the insVerify check and instead manually retrying with a PIN prompt only when - // the signature fails. - fw531 := piv.Version{Major: 5, Minor: 3, Patch: 1} - if pinPolicy == piv.PINPolicyOnce && y.version == fw531 { - // Set the keys PIN policy to never to skip the insVerify check. If PIN was provided in - // a previous recent call, the signature will succeed as expected of the "once" policy. - pinPolicy = piv.PINPolicyNever +func (y *YubiKey) signWithPINRetry(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { + // When using [piv.PINPolicyOnce], PIN is only required when it isn't cached in the PCSC + // transaction internally. The piv-go prompt logic attempts to check this requirement + // before prompting, which is generally workable. However, the PIN prompt logic is not + // flexible enough for the retry and PIN caching mechanisms supported in Teleport. As a + // result, we must first try signature without PIN and only prompt for PIN when we get a + // "security status not satisfied" error ([pcscResetCardErrMessage]). + // + // TODO(Joerger): Once https://github.com/go-piv/piv-go/pull/174 is merged upstream, we can + // check if PIN is required and verify PIN before attempting the signature. This is a more + // reliable method of checking the PIN requirement than the somewhat general auth error + // returned by the failed signature. + // IMPORTANT: Maintain the signature retry flow for firmware version 5.3.1, which has a bug + // with checking the PIN requirement - https://github.com/gravitational/teleport/pull/36427. + auth := piv.KeyAuth{ + PINPolicy: piv.PINPolicyNever, } - signature, err := y.sign(ctx, ref, keyInfo, prompt, pinPolicy, rand, digest, opts) + signature, err := y.sign(ctx, ref, keyInfo, prompt, auth, rand, digest, opts) switch { case err == nil: return signature, nil - case strings.Contains(err.Error(), pivGenericAuthErrCodeString): - // Force the signature to prompt for PIN, as something likely went wrong for the - // card to skip PIN verification in the first place (e.g. firmware bug). - pinPolicy = piv.PINPolicyAlways - return y.sign(ctx, ref, keyInfo, prompt, pinPolicy, rand, digest, opts) + case strings.Contains(err.Error(), pivGenericAuthErrCodeString) && ref.Policy.PINRequired: + pin, err := y.promptPIN(ctx, prompt, hardwarekey.PINRequired, keyInfo, ref.PINCacheTTL) + if err != nil { + return nil, trace.Wrap(err) + } + + // Setting the [piv.PINPolicyAlways] ensures that the PIN is used and skips + // the required check usually used with [piv.PINPolicyOnce]. + auth.PINPolicy = piv.PINPolicyAlways + auth.PIN = pin + return y.sign(ctx, ref, keyInfo, prompt, auth, rand, digest, opts) default: return nil, trace.Wrap(err) } } -func (y *YubiKey) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, pinPolicy piv.PINPolicy, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { - ctx, cancel := context.WithCancelCause(ctx) - defer cancel(nil) +func (y *YubiKey) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, auth piv.KeyAuth, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { + ctx, cancel := context.WithCancel(ctx) + defer cancel() - promptPIN := func() (string, error) { - return y.promptPIN(ctx, prompt, hardwarekey.PINRequired, keyInfo, ref.PINCacheTTL) + if ref.Policy.TouchRequired { + ctx = y.promptTouch(ctx, prompt, keyInfo) } - auth := piv.KeyAuth{ - PINPrompt: promptPIN, - PINPolicy: pinPolicy, - } + return y.conn.sign(ctx, ref, auth, rand, digest, opts) +} - y.signPromptMu.Lock() - defer y.signPromptMu.Unlock() +// promptTouch starts the touch prompt. The context returned is tied to the touch +// prompt so that if the user cancels the touch prompt it cancels the sign request. +func (y *YubiKey) promptTouch(ctx context.Context, prompt hardwarekey.Prompt, keyInfo hardwarekey.ContextualKeyInfo) context.Context { + ctx, cancel := context.WithCancelCause(ctx) - var touchPromptDelayTimer *time.Timer - if ref.Policy.TouchRequired { - // touch prompt delay is disrupted by pin prompts. To prevent misfired - // touch prompts, pause the timer for the duration of the pin prompt. - // - // TODO(Joerger): Once https://github.com/go-piv/piv-go/pull/174 is merged and we can verify PIN - // before even attempting to sign, this timer logic can be removed in favor of simple time.After. - touchPromptDelayTimer = time.NewTimer(signTouchPromptDelay) - defer touchPromptDelayTimer.Stop() - - auth.PINPrompt = func() (pin string, err error) { - if touchPromptDelayTimer != nil { - if touchPromptDelayTimer.Stop() { - // TODO(Joerger): Does the pin prompt delay the touch prompt even more than this? - defer touchPromptDelayTimer.Reset(signTouchPromptDelay) - } - } + // There is no built in mechanism to prompt for touch on demand, so we simply prompt for touch after + // a short duration in hopes of lining up with the actual YubiKey touch prompt (flashing key). In the + // case where touch is cached, the delay prevents the prompt from firing when it isn't needed. + go func() { + defer cancel(nil) - return promptPIN() - } + // Wait for any concurrent prompts to complete. If there is a concurrent touch prompt, + // or touch is otherwise provided in the meantime, we can skip the prompt below. + y.promptMu.Lock() + defer y.promptMu.Unlock() - // There is no built in mechanism to prompt for touch on demand, so we simply prompt for touch after - // a short duration in hopes of lining up with the actual YubiKey touch prompt (flashing key). In the - // case where touch is cached, the delay prevents the prompt from firing when it isn't needed. - go func() { - select { - case <-touchPromptDelayTimer.C: - err := prompt.Touch(ctx, keyInfo) - if err != nil { - // Cancel the entire function when an error occurs. - // This is typically used for aborting the prompt. - cancel(trace.Wrap(err)) - } - return - case <-ctx.Done(): - // touch cached, skip prompt. - return + select { + case <-time.After(signTouchPromptDelay): + err := prompt.Touch(ctx, keyInfo) + if err != nil { + // Cancel the entire function when an error occurs. + // This is typically used for aborting the prompt. + cancel(trace.Wrap(err)) } - }() - } + return + case <-ctx.Done(): + // touch cached, skip prompt. + return + } + }() - return y.conn.sign(ctx, ref, auth, rand, digest, opts) + return ctx } // Reset resets the YubiKey PIV module to default settings. @@ -454,6 +445,7 @@ func (y *YubiKey) checkOrSetPIN(ctx context.Context, prompt hardwarekey.Prompt, // the pin cache mutex or the exclusive PC/SC transaction. const pinPromptTimeout = time.Minute +// promptPIN prompts for PIN. If PIN caching is enabled, it verifies and caches the PIN for future calls. func (y *YubiKey) promptPIN(ctx context.Context, prompt hardwarekey.Prompt, requirement hardwarekey.PINPromptRequirement, keyInfo hardwarekey.ContextualKeyInfo, pinCacheTTL time.Duration) (string, error) { y.pinCache.mu.Lock() defer y.pinCache.mu.Unlock() @@ -466,6 +458,9 @@ func (y *YubiKey) promptPIN(ctx context.Context, prompt hardwarekey.Prompt, requ ctx, cancel := context.WithTimeout(ctx, pinPromptTimeout) defer cancel() + y.promptMu.Lock() + defer y.promptMu.Unlock() + pin, err := prompt.AskPIN(ctx, requirement, keyInfo) if err != nil { return "", trace.Wrap(err) From 4c28d5d5175f2488aa44ba668e102182d0e26828 Mon Sep 17 00:00:00 2001 From: joerger Date: Wed, 14 May 2025 19:13:53 -0700 Subject: [PATCH 08/14] Add todo for upstream PR. --- api/utils/keys/piv/yubikey.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index ef1480b1a32fa..a436e0bbda784 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -552,6 +552,10 @@ func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, e // Usually this error occurs on Windows, which times out exclusive transactions after 5 seconds without any activity, // giving users only 5 seconds to answer PIN prompts. The PIN should now be cached locally, so we simply retry. + // + // TODO(Joerger): Once https://github.com/go-piv/piv-go/pull/173 is merged, this reconnection will happen internally + // to the piv-go library. Since it simply reconnects instead of closing and re-opening the transaction from scratch, + // the internal reconnection should be concurrency safe. if err != nil && strings.Contains(err.Error(), pcscResetCardErrMessage) { slog.DebugContext(context.Background(), "smart card connection timed out, reconnecting", "error", err) if err := c.reconnect(); err != nil { From d84f90abd6ade9154f98f25aeeb7c5a197027f11 Mon Sep 17 00:00:00 2001 From: joerger Date: Fri, 16 May 2025 12:26:17 -0700 Subject: [PATCH 09/14] Prevent PIV operations that don't support concurrency from being run concurrently; add test. --- api/utils/keys/piv/yubikey.go | 199 ++++++++++++++++++++++------- api/utils/keys/piv/yubikey_test.go | 139 ++++++++++++++++++++ 2 files changed, 289 insertions(+), 49 deletions(-) create mode 100644 api/utils/keys/piv/yubikey_test.go diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index a436e0bbda784..2047b583c9476 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -530,18 +530,17 @@ type sharedPIVConnection struct { connHolds atomic.Int32 // connHealthy signals whether the connection is healthy or needs to be reconnected. connHealthy atomic.Bool - - // attestMu prevents signatures from occurring concurrently with an attestation - // request, which would corrupt the resulting certificate. - attestMu sync.RWMutex } -// doWithSharedConn holds a shared connection to perform the given function. -func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, error)) (T, error) { +// doWithExclusiveConn holds a shared connection with an exclusive lock to perform the given function. +// This is used for operations which cannot be run concurrently with other operations. +func doWithExclusiveConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, error)) (T, error) { nilT := *new(T) - c.connMu.RLock() - defer c.connMu.RUnlock() + // Hold the shared connection mutex so that we are not attempting to open an + // exclusive connection when the connection is currently being used in a shared context. + c.connMu.Lock() + defer c.connMu.Unlock() if err := c.holdConn(); err != nil { return nilT, trace.Wrap(err) @@ -558,7 +557,7 @@ func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, e // the internal reconnection should be concurrency safe. if err != nil && strings.Contains(err.Error(), pcscResetCardErrMessage) { slog.DebugContext(context.Background(), "smart card connection timed out, reconnecting", "error", err) - if err := c.reconnect(); err != nil { + if err := c.reconnectConn(); err != nil { return nilT, trace.Wrap(err) } @@ -569,10 +568,10 @@ func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, e } // holdConn holds an existing shared connection, or opens and holds a new shared connection. -// Unless holdConn returns an error, it must be followed by a call to releaseConn to ensure +// Unless holdSharedConn returns an error, it must be followed by a call to releaseConn to ensure // the connection is closed once there are no remaining holds. // -// Must be called under [sharedPIVConnection.connMu.RLock]. +// Must be called under [sharedPIVConnection.connMu.Lock]. func (c *sharedPIVConnection) holdConn() error { if c.conn == nil || !c.connHealthy.Load() { if err := c.connect(); err != nil { @@ -587,56 +586,164 @@ func (c *sharedPIVConnection) holdConn() error { // releaseConn releases a hold on a shared connection and, // if there are no remaining holds, closes the connection. // -// Must be called under [sharedPIVConnection.connMu.RLock]. +// # Must be called after each call to +// +// Must be called under [sharedPIVConnection.connMu.Lock]. func (c *sharedPIVConnection) releaseConn() { remaining := c.connHolds.Add(-1) // If there are no remaining holds on the connection, close it. if remaining == 0 { - c.connMu.RUnlock() - c.connMu.Lock() - // Double check that a new hold wasn't added while waiting for the full lock. - if c.connHolds.Load() == 0 { + if c.connHolds.Load() == 0 && c.conn != nil { c.conn.Close() c.conn = nil } - - c.connMu.Unlock() - c.connMu.RLock() } } -// reconnect marks the connection as unhealthy and waits for a new connection. -// A new connection will not be created until all consumers of the unhealthy -// connection complete. reconnect supports multiple concurrent callers. +// reconnectConn marks the connection as unhealthy and attempts to open a new connection. // -// Must be called under [sharedPIVConnection.connMu.RLock]. -func (c *sharedPIVConnection) reconnect() error { - // Prevent new callers from holding the unhealthy connection. +// Must be called under [sharedPIVConnection.connMu.Lock]. +func (c *sharedPIVConnection) reconnectConn() error { c.connHealthy.Store(false) return c.connect() } -// connect establishes a connection to a YubiKey PIV module and returns a release function. -// The release function should be called to properly close the shared connection. -// The connection is not immediately terminated, allowing other callers to -// use it before it's released. -// The YubiKey PIV module itself takes some additional time to handle closed -// connections, so we use a retry loop to give the PIV module time to close prior connections. +// doWithSharedConn holds a shared connection to perform the given function. +func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, error)) (T, error) { + nilT := *new(T) + + c.connMu.RLock() + defer c.connMu.RUnlock() + + if err := c.holdSharedConn(); err != nil { + return nilT, trace.Wrap(err) + } + defer func() { + c.releaseSharedConn() + }() + + t, err := do(c.conn) + + // Usually this error occurs on Windows, which times out exclusive transactions after 5 seconds without any activity, + // giving users only 5 seconds to answer PIN prompts. The PIN should now be cached locally, so we simply retry. + // + // TODO(Joerger): Once https://github.com/go-piv/piv-go/pull/173 is merged, this reconnection will happen internally + // to the piv-go library. Since it simply reconnects instead of closing and re-opening the transaction from scratch, + // the internal reconnection should be concurrency safe. + if err != nil && strings.Contains(err.Error(), pcscResetCardErrMessage) { + slog.DebugContext(context.Background(), "smart card connection timed out, reconnecting", "error", err) + if err := c.reconnectSharedConn(); err != nil { + return nilT, trace.Wrap(err) + } + + t, err = do(c.conn) + } + + return t, trace.Wrap(err) +} + +// holdSharedConn holds an existing shared connection, or opens and holds a new shared connection. +// Unless holdSharedConn returns an error, it must be followed by a call to releaseConn to ensure +// the connection is closed once there are no remaining holds. // // Must be called under [sharedPIVConnection.connMu.RLock]. -func (c *sharedPIVConnection) connect() error { +func (c *sharedPIVConnection) holdSharedConn() error { + c.connHolds.Add(1) + + // If there is not an open, healthy connection, open one. + if c.conn == nil || !c.connHealthy.Load() { + // Exchange RLock for Lock. + c.connMu.RUnlock() + c.connMu.Lock() + defer c.connMu.RLock() + defer c.connMu.Unlock() + + if err := c.connect(); err != nil { + return trace.Wrap(err) + } + } + + return nil +} + +// releaseConn releases a hold on a shared connection and, +// if there are no remaining holds, closes the connection. +// +// # Must be called after each call to +// +// Must be called under [sharedPIVConnection.connMu.Lock]. +func (c *sharedPIVConnection) releaseSharedConn() { + remaining := c.connHolds.Add(-1) + + // If there are no remaining holds on the connection, close it. + if remaining == 0 { + // Exchange RLock for Lock. + c.connMu.RUnlock() + c.connMu.Lock() + defer c.connMu.RLock() + defer c.connMu.Unlock() + + // Double check that a new hold wasn't added while waiting for the full lock + // or that another release didn't already close the connection. + if c.connHolds.Load() == 0 && c.conn != nil { + c.conn.Close() + c.conn = nil + } + } +} + +// reconnectSharedConn marks the connection as unhealthy and waits for a new connection. +// A new connection will not be created until all consumers of the unhealthy +// connection complete. reconnectSharedConn supports multiple concurrent callers. +// +// Must be called under [sharedPIVConnection.connMu.RLock]. +func (c *sharedPIVConnection) reconnectSharedConn() error { + // Prevent concurrent shared conn callers from holding the unhealthy connection. + c.connHealthy.Store(false) + + // Exchange RLock for Lock. c.connMu.RUnlock() c.connMu.Lock() defer c.connMu.RLock() defer c.connMu.Unlock() + return c.connect() +} + +// connect opens a new shared connection to a YubiKey PIV module. +// +// A call to connect must be followed by a call to [sharedPIVConnection.releaseConnection] +// in order to ensure the connection is freed for other PIV processes when not in use by this +// process. +// +// Must be called under [sharedPIVConnection.connMu.Lock]. +func (c *sharedPIVConnection) connect() error { // Check if there is an existing, healthy connection. if c.conn != nil && c.connHealthy.Load() { return nil } + conn, err := openYubiKey(c.card) + if err != nil { + return trace.Wrap(err) + } + + c.conn = conn + c.connHealthy.Store(true) + return nil +} + +// openYubiKey opens an exclusive connection to the YubiKey PIV module. + +// connect establishes a connection to a YubiKey PIV module and returns a release function. +// The release function should be called to properly close the shared connection. +// The connection is not immediately terminated, allowing other callers to +// use it before it's released. +// The YubiKey PIV module itself takes some additional time to handle closed +// connections, so we use a retry loop to give the PIV module time to close prior connections. +func openYubiKey(card string) (*piv.YubiKey, error) { ctx := context.Background() linearRetry, err := retryutils.NewLinear(retryutils.LinearConfig{ // If a PIV connection has just been closed, it take ~5 ms to become @@ -650,7 +757,7 @@ func (c *sharedPIVConnection) connect() error { Max: time.Millisecond * 50, }) if err != nil { - return trace.Wrap(err) + return nil, trace.Wrap(err) } isRetryError := func(err error) bool { @@ -658,8 +765,9 @@ func (c *sharedPIVConnection) connect() error { return strings.Contains(err.Error(), retryError) } + var conn *piv.YubiKey tryConnect := func() error { - c.conn, err = piv.Open(c.card) + conn, err = piv.Open(card) if err != nil && !isRetryError(err) { return retryutils.PermanentRetryError(err) } @@ -683,14 +791,13 @@ func (c *sharedPIVConnection) connect() error { slog.WarnContext(ctx, "failed to connect to YubiKey as it is currently in use by another process. "+ "This can occur when running multiple Teleport clients simultaneously, or running long lived PIV "+ "applications like yubikey-agent. Try again once other PIV processes have completed.") - return trace.LimitExceeded("failed to connect to YubiKey as it is currently in use by another process") + return nil, trace.LimitExceeded("failed to connect to YubiKey as it is currently in use by another process") } - return trace.Wrap(err) + return nil, trace.Wrap(err) } - c.connHealthy.Store(true) - return nil + return conn, nil } func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, auth piv.KeyAuth, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { @@ -699,9 +806,6 @@ func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.Private return nil, trace.Wrap(err) } - c.attestMu.RLock() - defer c.attestMu.RUnlock() - return doWithSharedConn(c, func(yk *piv.YubiKey) ([]byte, error) { // Prepare the key and perform the signature with the same connection. // Closing the connection in between breaks the underlying PIV handle. @@ -773,7 +877,7 @@ func (c *sharedPIVConnection) reset() error { } func (c *sharedPIVConnection) setCertificate(key [24]byte, slot piv.Slot, cert *x509.Certificate) error { - _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { + _, err := doWithExclusiveConn(c, func(yk *piv.YubiKey) (any, error) { err := yk.SetCertificate(key, slot, cert) return nil, trace.Wrap(err) }) @@ -781,31 +885,28 @@ func (c *sharedPIVConnection) setCertificate(key [24]byte, slot piv.Slot, cert * } func (c *sharedPIVConnection) certificate(slot piv.Slot) (*x509.Certificate, error) { - return doWithSharedConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { + return doWithExclusiveConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { cert, err := yk.Certificate(slot) return cert, trace.Wrap(err) }) } func (c *sharedPIVConnection) generateKey(key [24]byte, slot piv.Slot, opts piv.Key) (crypto.PublicKey, error) { - return doWithSharedConn(c, func(yk *piv.YubiKey) (crypto.PublicKey, error) { + return doWithExclusiveConn(c, func(yk *piv.YubiKey) (crypto.PublicKey, error) { pub, err := yk.GenerateKey(key, slot, opts) return pub, trace.Wrap(err) }) } func (c *sharedPIVConnection) attest(slot piv.Slot) (*x509.Certificate, error) { - c.attestMu.Lock() - defer c.attestMu.Unlock() - - return doWithSharedConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { + return doWithExclusiveConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { cert, err := yk.Attest(slot) return cert, trace.Wrap(err) }) } func (c *sharedPIVConnection) attestationCertificate() (*x509.Certificate, error) { - return doWithSharedConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { + return doWithExclusiveConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { cert, err := yk.AttestationCertificate() return cert, trace.Wrap(err) }) diff --git a/api/utils/keys/piv/yubikey_test.go b/api/utils/keys/piv/yubikey_test.go new file mode 100644 index 0000000000000..1f90935ad11e7 --- /dev/null +++ b/api/utils/keys/piv/yubikey_test.go @@ -0,0 +1,139 @@ +//go:build piv + +// Copyright 2025 Gravitational, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package piv + +import ( + "context" + "crypto" + "crypto/x509/pkix" + "os" + "sync" + "testing" + + "github.com/go-piv/piv-go/piv" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/gravitational/teleport/api/utils/keys/hardwarekey" +) + +func TestConcurrentOperations(t *testing.T) { + // This test will overwrite any PIV data on the yubiKey. + if os.Getenv("TELEPORT_TEST_YUBIKEY_PIV") == "" { + t.Skipf("Skipping TestGenerateYubiKeyPrivateKey because TELEPORT_TEST_YUBIKEY_PIV is not set") + } + + y, err := FindYubiKey(0) + require.NoError(t, err) + + y.Reset() + t.Cleanup(func() { y.Reset() }) + + usedSlot := piv.SlotAuthentication + ref, err := y.generatePrivateKey(usedSlot, hardwarekey.PromptPolicyNone, hardwarekey.SignatureAlgorithmEC256, 0) + require.NoError(t, err) + require.NotNil(t, ref) + + unusedSlot := piv.SlotCardAuthentication + cert, err := SelfSignedMetadataCertificate(pkix.Name{}) + require.NoError(t, err) + + // Run each PIV command several times concurrently to ensure the concurrency + // protections in place properly protect each operations, especially those + // which do not support concurrency. + var wg sync.WaitGroup + for range 5 { + wg.Add(1) + go func() { + defer wg.Done() + _, err := y.conn.getSerialNumber() + assert.NoError(t, err, "getSerialNumber") + }() + wg.Add(1) + go func() { + defer wg.Done() + _, err := y.conn.sign(context.Background(), ref, piv.KeyAuth{PINPolicy: piv.PINPolicyNever}, nil, make([]byte, 100), crypto.Hash(0)) + assert.NoError(t, err, "sign") + }() + wg.Add(1) + go func() { + defer wg.Done() + _, err := y.conn.getVersion() + assert.NoError(t, err, "getVersion") + }() + wg.Add(1) + go func() { + defer wg.Done() + err := y.conn.setCertificate(piv.DefaultManagementKey, unusedSlot, cert) + assert.NoError(t, err, "setCertificate") + }() + wg.Add(1) + go func() { + defer wg.Done() + _, err := y.conn.certificate(usedSlot) + assert.NoError(t, err, "certificate") + }() + wg.Add(1) + go func() { + defer wg.Done() + _, err := y.conn.generateKey(piv.DefaultManagementKey, unusedSlot, piv.Key{ + Algorithm: piv.AlgorithmEC256, + TouchPolicy: piv.TouchPolicyNever, + PINPolicy: piv.PINPolicyNever, + }) + assert.NoError(t, err, "generateKey") + }() + wg.Add(1) + go func() { + defer wg.Done() + _, err := y.conn.attest(usedSlot) + assert.NoError(t, err, "attest") + }() + wg.Add(1) + go func() { + defer wg.Done() + _, err := y.conn.attestationCertificate() + assert.NoError(t, err, "attestationCertificate") + }() + wg.Add(1) + go func() { + defer wg.Done() + err := y.conn.setPIN(piv.DefaultPIN, piv.DefaultPIN) + assert.NoError(t, err, "setPIN") + }() + wg.Add(1) + go func() { + defer wg.Done() + err := y.conn.setPUK(piv.DefaultPUK, piv.DefaultPUK) + assert.NoError(t, err, "setPUK") + }() + wg.Add(1) + go func() { + defer wg.Done() + err := y.conn.unblock(piv.DefaultPUK, piv.DefaultPIN) + assert.NoError(t, err, "unblock") + }() + wg.Add(1) + go func() { + defer wg.Done() + err := y.conn.verifyPIN(piv.DefaultPIN) + assert.NoError(t, err, "verifyPIN") + }() + } + + wg.Wait() +} From 91846dac76647380414b1a1897a32032f2c94251 Mon Sep 17 00:00:00 2001 From: joerger Date: Fri, 16 May 2025 12:28:51 -0700 Subject: [PATCH 10/14] - Revert connection sharing refactors. - Add exclusiveOperationMu mutex and test. --- api/utils/keys/piv/yubikey.go | 491 +++++++++++++--------------------- 1 file changed, 180 insertions(+), 311 deletions(-) diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index 2047b583c9476..132cc71f49614 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -29,11 +29,9 @@ import ( "errors" "fmt" "io" - "log/slog" "math/big" "strings" "sync" - "sync/atomic" "time" "github.com/go-piv/piv-go/piv" @@ -158,15 +156,6 @@ const ( // For generic auth errors, such as when PIN is not provided, the smart card returns the error code 0x6982. // The piv-go library wraps error codes like this with a user readable message: "security status not satisfied". pivGenericAuthErrCodeString = "6982" - - // If a pcsc transaction is closed by the pcsc daemon, all operations will result in the SCARD_W_RESET_CARD error code, - // which the piv-go library replaces with the following error message. This error can be handled by starting a new - // transactions or reconnecting. - // - // See https://github.com/go-piv/piv-go/pull/173 for more details. - // - // TODO(Joerger): Once ^ is merged and released upstream, remove this adhoc retry. - pcscResetCardErrMessage = "the smart card has been reset, so any shared state information is invalid" ) func (y *YubiKey) signWithPINRetry(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { @@ -175,7 +164,7 @@ func (y *YubiKey) signWithPINRetry(ctx context.Context, ref *hardwarekey.Private // before prompting, which is generally workable. However, the PIN prompt logic is not // flexible enough for the retry and PIN caching mechanisms supported in Teleport. As a // result, we must first try signature without PIN and only prompt for PIN when we get a - // "security status not satisfied" error ([pcscResetCardErrMessage]). + // "security status not satisfied" error ([pivGenericAuthErrCodeString]). // // TODO(Joerger): Once https://github.com/go-piv/piv-go/pull/174 is merged upstream, we can // check if PIN is required and verify PIN before attempting the signature. This is a more @@ -518,233 +507,42 @@ type sharedPIVConnection struct { // This value may change between OS's, or with other system changes. card string - // conn is a shared PIV connection. This connection is only guaranteed to be non-nil - // and concurrency safe within a call to [doWithSharedConn]. - conn *piv.YubiKey - // connMu is a RW mutex that protects conn. The conn is only opened/closed while under a full - // lock, meaning that multiple callers can utilize the connection concurrently while under a - // read lock without the risk of the connection being closed partway through a PIV command. - connMu sync.RWMutex - // connHolds is the number of active holds on the connection. It should be modified under - // connMu.RLock to ensure a new hold isn't added for a closing connection. - connHolds atomic.Int32 - // connHealthy signals whether the connection is healthy or needs to be reconnected. - connHealthy atomic.Bool -} - -// doWithExclusiveConn holds a shared connection with an exclusive lock to perform the given function. -// This is used for operations which cannot be run concurrently with other operations. -func doWithExclusiveConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, error)) (T, error) { - nilT := *new(T) - - // Hold the shared connection mutex so that we are not attempting to open an - // exclusive connection when the connection is currently being used in a shared context. - c.connMu.Lock() - defer c.connMu.Unlock() - - if err := c.holdConn(); err != nil { - return nilT, trace.Wrap(err) - } - defer c.releaseConn() - - t, err := do(c.conn) - - // Usually this error occurs on Windows, which times out exclusive transactions after 5 seconds without any activity, - // giving users only 5 seconds to answer PIN prompts. The PIN should now be cached locally, so we simply retry. - // - // TODO(Joerger): Once https://github.com/go-piv/piv-go/pull/173 is merged, this reconnection will happen internally - // to the piv-go library. Since it simply reconnects instead of closing and re-opening the transaction from scratch, - // the internal reconnection should be concurrency safe. - if err != nil && strings.Contains(err.Error(), pcscResetCardErrMessage) { - slog.DebugContext(context.Background(), "smart card connection timed out, reconnecting", "error", err) - if err := c.reconnectConn(); err != nil { - return nilT, trace.Wrap(err) - } - - t, err = do(c.conn) - } - - return t, trace.Wrap(err) -} - -// holdConn holds an existing shared connection, or opens and holds a new shared connection. -// Unless holdSharedConn returns an error, it must be followed by a call to releaseConn to ensure -// the connection is closed once there are no remaining holds. -// -// Must be called under [sharedPIVConnection.connMu.Lock]. -func (c *sharedPIVConnection) holdConn() error { - if c.conn == nil || !c.connHealthy.Load() { - if err := c.connect(); err != nil { - return trace.Wrap(err) - } - } + // conn is the shared PIV connection. + conn *piv.YubiKey + mu sync.Mutex + activeConnections int - c.connHolds.Add(1) - return nil + // exclusiveOperationMu is used to ensure that PIV operations that don't + // support concurrency are not run concurrently. + exclusiveOperationMu sync.RWMutex } -// releaseConn releases a hold on a shared connection and, -// if there are no remaining holds, closes the connection. -// -// # Must be called after each call to -// -// Must be called under [sharedPIVConnection.connMu.Lock]. -func (c *sharedPIVConnection) releaseConn() { - remaining := c.connHolds.Add(-1) - - // If there are no remaining holds on the connection, close it. - if remaining == 0 { - // Double check that a new hold wasn't added while waiting for the full lock. - if c.connHolds.Load() == 0 && c.conn != nil { - c.conn.Close() - c.conn = nil - } - } -} - -// reconnectConn marks the connection as unhealthy and attempts to open a new connection. -// -// Must be called under [sharedPIVConnection.connMu.Lock]. -func (c *sharedPIVConnection) reconnectConn() error { - c.connHealthy.Store(false) - return c.connect() -} - -// doWithSharedConn holds a shared connection to perform the given function. -func doWithSharedConn[T any](c *sharedPIVConnection, do func(*piv.YubiKey) (T, error)) (T, error) { - nilT := *new(T) - - c.connMu.RLock() - defer c.connMu.RUnlock() - - if err := c.holdSharedConn(); err != nil { - return nilT, trace.Wrap(err) - } - defer func() { - c.releaseSharedConn() - }() - - t, err := do(c.conn) - - // Usually this error occurs on Windows, which times out exclusive transactions after 5 seconds without any activity, - // giving users only 5 seconds to answer PIN prompts. The PIN should now be cached locally, so we simply retry. - // - // TODO(Joerger): Once https://github.com/go-piv/piv-go/pull/173 is merged, this reconnection will happen internally - // to the piv-go library. Since it simply reconnects instead of closing and re-opening the transaction from scratch, - // the internal reconnection should be concurrency safe. - if err != nil && strings.Contains(err.Error(), pcscResetCardErrMessage) { - slog.DebugContext(context.Background(), "smart card connection timed out, reconnecting", "error", err) - if err := c.reconnectSharedConn(); err != nil { - return nilT, trace.Wrap(err) - } - - t, err = do(c.conn) - } - - return t, trace.Wrap(err) -} - -// holdSharedConn holds an existing shared connection, or opens and holds a new shared connection. -// Unless holdSharedConn returns an error, it must be followed by a call to releaseConn to ensure -// the connection is closed once there are no remaining holds. -// -// Must be called under [sharedPIVConnection.connMu.RLock]. -func (c *sharedPIVConnection) holdSharedConn() error { - c.connHolds.Add(1) - - // If there is not an open, healthy connection, open one. - if c.conn == nil || !c.connHealthy.Load() { - // Exchange RLock for Lock. - c.connMu.RUnlock() - c.connMu.Lock() - defer c.connMu.RLock() - defer c.connMu.Unlock() - - if err := c.connect(); err != nil { - return trace.Wrap(err) - } - } +// connect establishes a connection to a YubiKey PIV module and returns a release function. +// The release function should be called to properly close the shared connection. +// The connection is not immediately terminated, allowing other callers to +// use it before it's released. +// The YubiKey PIV module itself takes some additional time to handle closed +// connections, so we use a retry loop to give the PIV module time to close prior connections. +func (c *sharedPIVConnection) connect() (func(), error) { + c.mu.Lock() + defer c.mu.Unlock() - return nil -} + release := func() { + c.mu.Lock() + defer c.mu.Unlock() -// releaseConn releases a hold on a shared connection and, -// if there are no remaining holds, closes the connection. -// -// # Must be called after each call to -// -// Must be called under [sharedPIVConnection.connMu.Lock]. -func (c *sharedPIVConnection) releaseSharedConn() { - remaining := c.connHolds.Add(-1) - - // If there are no remaining holds on the connection, close it. - if remaining == 0 { - // Exchange RLock for Lock. - c.connMu.RUnlock() - c.connMu.Lock() - defer c.connMu.RLock() - defer c.connMu.Unlock() - - // Double check that a new hold wasn't added while waiting for the full lock - // or that another release didn't already close the connection. - if c.connHolds.Load() == 0 && c.conn != nil { + c.activeConnections-- + if c.activeConnections == 0 { c.conn.Close() c.conn = nil } } -} - -// reconnectSharedConn marks the connection as unhealthy and waits for a new connection. -// A new connection will not be created until all consumers of the unhealthy -// connection complete. reconnectSharedConn supports multiple concurrent callers. -// -// Must be called under [sharedPIVConnection.connMu.RLock]. -func (c *sharedPIVConnection) reconnectSharedConn() error { - // Prevent concurrent shared conn callers from holding the unhealthy connection. - c.connHealthy.Store(false) - - // Exchange RLock for Lock. - c.connMu.RUnlock() - c.connMu.Lock() - defer c.connMu.RLock() - defer c.connMu.Unlock() - - return c.connect() -} - -// connect opens a new shared connection to a YubiKey PIV module. -// -// A call to connect must be followed by a call to [sharedPIVConnection.releaseConnection] -// in order to ensure the connection is freed for other PIV processes when not in use by this -// process. -// -// Must be called under [sharedPIVConnection.connMu.Lock]. -func (c *sharedPIVConnection) connect() error { - // Check if there is an existing, healthy connection. - if c.conn != nil && c.connHealthy.Load() { - return nil - } - conn, err := openYubiKey(c.card) - if err != nil { - return trace.Wrap(err) + if c.conn != nil { + c.activeConnections++ + return release, nil } - c.conn = conn - c.connHealthy.Store(true) - return nil -} - -// openYubiKey opens an exclusive connection to the YubiKey PIV module. - -// connect establishes a connection to a YubiKey PIV module and returns a release function. -// The release function should be called to properly close the shared connection. -// The connection is not immediately terminated, allowing other callers to -// use it before it's released. -// The YubiKey PIV module itself takes some additional time to handle closed -// connections, so we use a retry loop to give the PIV module time to close prior connections. -func openYubiKey(card string) (*piv.YubiKey, error) { - ctx := context.Background() linearRetry, err := retryutils.NewLinear(retryutils.LinearConfig{ // If a PIV connection has just been closed, it take ~5 ms to become // available to new connections. For this reason, we initially wait a @@ -760,25 +558,23 @@ func openYubiKey(card string) (*piv.YubiKey, error) { return nil, trace.Wrap(err) } + // Backoff and retry for up to 1 second. + retryCtx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + isRetryError := func(err error) bool { const retryError = "connecting to smart card: the smart card cannot be accessed because of other connections outstanding" return strings.Contains(err.Error(), retryError) } - var conn *piv.YubiKey - tryConnect := func() error { - conn, err = piv.Open(card) + err = linearRetry.For(retryCtx, func() error { + c.conn, err = piv.Open(c.card) if err != nil && !isRetryError(err) { return retryutils.PermanentRetryError(err) } return trace.Wrap(err) - } - - // Backoff and retry for up to 1 second. - retryCtx, cancel := context.WithTimeout(ctx, time.Second) - defer cancel() - - if err = linearRetry.For(retryCtx, tryConnect); err != nil { + }) + if trace.IsLimitExceeded(err) { // Using PIV synchronously causes issues since only one connection is allowed at a time. // This shouldn't be an issue for `tsh` which primarily runs consecutively, but Teleport // Connect works through callbacks, etc. and may try to open multiple connections at a time. @@ -787,17 +583,13 @@ func openYubiKey(card string) (*piv.YubiKey, error) { // It's also possible that the user is running another PIV program, which may hold the PIV // connection indefinitely (yubikey-agent). In this case, user action is necessary, so we // alert them with this issue. - if trace.IsLimitExceeded(err) { - slog.WarnContext(ctx, "failed to connect to YubiKey as it is currently in use by another process. "+ - "This can occur when running multiple Teleport clients simultaneously, or running long lived PIV "+ - "applications like yubikey-agent. Try again once other PIV processes have completed.") - return nil, trace.LimitExceeded("failed to connect to YubiKey as it is currently in use by another process") - } - + return nil, trace.LimitExceeded("could not connect to YubiKey as another application is using it. Please try again once the program that uses the YubiKey, such as yubikey-agent is closed") + } else if err != nil { return nil, trace.Wrap(err) } - return conn, nil + c.activeConnections++ + return release, nil } func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, auth piv.KeyAuth, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { @@ -806,21 +598,28 @@ func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.Private return nil, trace.Wrap(err) } - return doWithSharedConn(c, func(yk *piv.YubiKey) ([]byte, error) { - // Prepare the key and perform the signature with the same connection. - // Closing the connection in between breaks the underlying PIV handle. - priv, err := yk.PrivateKey(pivSlot, ref.PublicKey, auth) - if err != nil { - return nil, trace.Wrap(err) - } + release, err := c.connect() + if err != nil { + return nil, trace.Wrap(err) + } + defer release() - signer, ok := priv.(crypto.Signer) - if !ok { - return nil, trace.BadParameter("private key type %T does not implement crypto.Signer", priv) - } + c.exclusiveOperationMu.RLock() + defer c.exclusiveOperationMu.RUnlock() - return abandonableSign(ctx, signer, rand, digest, opts) - }) + // Prepare the key and perform the signature with the same connection. + // Closing the connection in between breaks the underlying PIV handle. + priv, err := c.conn.PrivateKey(pivSlot, ref.PublicKey, auth) + if err != nil { + return nil, trace.Wrap(err) + } + + signer, ok := priv.(crypto.Signer) + if !ok { + return nil, trace.BadParameter("private key type %T does not implement crypto.Signer", priv) + } + + return abandonableSign(ctx, signer, rand, digest, opts) } // abandonableSign is a wrapper around signer.Sign. @@ -856,92 +655,162 @@ func abandonableSign(ctx context.Context, signer crypto.Signer, rand io.Reader, } func (c *sharedPIVConnection) getSerialNumber() (uint32, error) { - return doWithSharedConn(c, func(yk *piv.YubiKey) (uint32, error) { - serial, err := yk.Serial() - return serial, trace.Wrap(err) - }) + release, err := c.connect() + if err != nil { + return 0, trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.RLock() + defer c.exclusiveOperationMu.RUnlock() + + serial, err := c.conn.Serial() + return serial, trace.Wrap(err) } func (c *sharedPIVConnection) getVersion() (piv.Version, error) { - return doWithSharedConn(c, func(yk *piv.YubiKey) (piv.Version, error) { - return yk.Version(), nil - }) + release, err := c.connect() + if err != nil { + return piv.Version{}, trace.Wrap(err) + } + defer release() + + // Version only requires an open connection, so we don't need to lock on [c.exclusiveOperationMu]. + return c.conn.Version(), nil } func (c *sharedPIVConnection) reset() error { - _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { - err := yk.Reset() - return nil, trace.Wrap(err) - }) - return trace.Wrap(err) + release, err := c.connect() + if err != nil { + return trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.Lock() + defer c.exclusiveOperationMu.Unlock() + + return trace.Wrap(c.conn.Reset()) } func (c *sharedPIVConnection) setCertificate(key [24]byte, slot piv.Slot, cert *x509.Certificate) error { - _, err := doWithExclusiveConn(c, func(yk *piv.YubiKey) (any, error) { - err := yk.SetCertificate(key, slot, cert) - return nil, trace.Wrap(err) - }) - return trace.Wrap(err) + release, err := c.connect() + if err != nil { + return trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.Lock() + defer c.exclusiveOperationMu.Unlock() + + return trace.Wrap(c.conn.SetCertificate(key, slot, cert)) } func (c *sharedPIVConnection) certificate(slot piv.Slot) (*x509.Certificate, error) { - return doWithExclusiveConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { - cert, err := yk.Certificate(slot) - return cert, trace.Wrap(err) - }) + release, err := c.connect() + if err != nil { + return nil, trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.Lock() + defer c.exclusiveOperationMu.Unlock() + + cert, err := c.conn.Certificate(slot) + return cert, trace.Wrap(err) } func (c *sharedPIVConnection) generateKey(key [24]byte, slot piv.Slot, opts piv.Key) (crypto.PublicKey, error) { - return doWithExclusiveConn(c, func(yk *piv.YubiKey) (crypto.PublicKey, error) { - pub, err := yk.GenerateKey(key, slot, opts) - return pub, trace.Wrap(err) - }) + release, err := c.connect() + if err != nil { + return nil, trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.Lock() + defer c.exclusiveOperationMu.Unlock() + + pubKey, err := c.conn.GenerateKey(key, slot, opts) + return pubKey, trace.Wrap(err) } func (c *sharedPIVConnection) attest(slot piv.Slot) (*x509.Certificate, error) { - return doWithExclusiveConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { - cert, err := yk.Attest(slot) - return cert, trace.Wrap(err) - }) + release, err := c.connect() + if err != nil { + return nil, trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.Lock() + defer c.exclusiveOperationMu.Unlock() + + cert, err := c.conn.Attest(slot) + return cert, trace.Wrap(err) } func (c *sharedPIVConnection) attestationCertificate() (*x509.Certificate, error) { - return doWithExclusiveConn(c, func(yk *piv.YubiKey) (*x509.Certificate, error) { - cert, err := yk.AttestationCertificate() - return cert, trace.Wrap(err) - }) + release, err := c.connect() + if err != nil { + return nil, trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.Lock() + defer c.exclusiveOperationMu.Unlock() + + cert, err := c.conn.AttestationCertificate() + return cert, trace.Wrap(err) } func (c *sharedPIVConnection) setPIN(oldPIN string, newPIN string) error { - _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { - err := yk.SetPIN(oldPIN, newPIN) - return nil, trace.Wrap(err) - }) - return trace.Wrap(err) + release, err := c.connect() + if err != nil { + return trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.RLock() + defer c.exclusiveOperationMu.RUnlock() + + return trace.Wrap(c.conn.SetPIN(oldPIN, newPIN)) } func (c *sharedPIVConnection) setPUK(oldPUK string, newPUK string) error { - _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { - err := yk.SetPUK(oldPUK, newPUK) - return nil, trace.Wrap(err) - }) - return trace.Wrap(err) + release, err := c.connect() + if err != nil { + return trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.RLock() + defer c.exclusiveOperationMu.RUnlock() + + return trace.Wrap(c.conn.SetPUK(oldPUK, newPUK)) } func (c *sharedPIVConnection) unblock(puk string, newPIN string) error { - _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { - err := yk.Unblock(puk, newPIN) - return nil, trace.Wrap(err) - }) - return trace.Wrap(err) + release, err := c.connect() + if err != nil { + return trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.RLock() + defer c.exclusiveOperationMu.RUnlock() + + return trace.Wrap(c.conn.Unblock(puk, newPIN)) } func (c *sharedPIVConnection) verifyPIN(pin string) error { - _, err := doWithSharedConn(c, func(yk *piv.YubiKey) (any, error) { - err := yk.VerifyPIN(pin) - return nil, trace.Wrap(err) - }) - return trace.Wrap(err) + release, err := c.connect() + if err != nil { + return trace.Wrap(err) + } + defer release() + + c.exclusiveOperationMu.RLock() + defer c.exclusiveOperationMu.RUnlock() + + return trace.Wrap(c.conn.VerifyPIN(pin)) } func parsePIVSlot(slotKey hardwarekey.PIVSlotKey) (piv.Slot, error) { From 5931d11f1cb01f58ea23fd4419dfa0c7b5777a6e Mon Sep 17 00:00:00 2001 From: joerger Date: Mon, 19 May 2025 11:35:01 -0700 Subject: [PATCH 11/14] Address comments; Move touch prompt into abandonableSign for clearer synchronous logic. --- api/utils/keys/piv/service.go | 2 +- api/utils/keys/piv/yubikey.go | 119 +++++++++++++++-------------- api/utils/keys/piv/yubikey_test.go | 2 +- 3 files changed, 62 insertions(+), 61 deletions(-) diff --git a/api/utils/keys/piv/service.go b/api/utils/keys/piv/service.go index 0e2926d45f389..157570088e3fd 100644 --- a/api/utils/keys/piv/service.go +++ b/api/utils/keys/piv/service.go @@ -214,7 +214,7 @@ func (s *YubiKeyService) Sign(ctx context.Context, ref *hardwarekey.PrivateKeyRe } } - return y.signWithPINRetry(ctx, ref, keyInfo, s.getPrompt(), rand, digest, opts) + return y.sign(ctx, ref, keyInfo, s.getPrompt(), rand, digest, opts) } // TODO(Joerger): Re-attesting the key every time we decode a hardware key signer is very resource diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index 132cc71f49614..3a989db8c4871 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -158,7 +158,7 @@ const ( pivGenericAuthErrCodeString = "6982" ) -func (y *YubiKey) signWithPINRetry(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { +func (y *YubiKey) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { // When using [piv.PINPolicyOnce], PIN is only required when it isn't cached in the PCSC // transaction internally. The piv-go prompt logic attempts to check this requirement // before prompting, which is generally workable. However, the PIN prompt logic is not @@ -176,7 +176,14 @@ func (y *YubiKey) signWithPINRetry(ctx context.Context, ref *hardwarekey.Private PINPolicy: piv.PINPolicyNever, } - signature, err := y.sign(ctx, ref, keyInfo, prompt, auth, rand, digest, opts) + var promptTouch promptTouch + if ref.Policy.TouchRequired { + promptTouch = func(ctx context.Context) error { + return y.promptTouch(ctx, prompt, keyInfo) + } + } + + signature, err := y.conn.sign(ctx, ref, auth, promptTouch, rand, digest, opts) switch { case err == nil: return signature, nil @@ -190,57 +197,12 @@ func (y *YubiKey) signWithPINRetry(ctx context.Context, ref *hardwarekey.Private // the required check usually used with [piv.PINPolicyOnce]. auth.PINPolicy = piv.PINPolicyAlways auth.PIN = pin - return y.sign(ctx, ref, keyInfo, prompt, auth, rand, digest, opts) + return y.conn.sign(ctx, ref, auth, promptTouch, rand, digest, opts) default: return nil, trace.Wrap(err) } } -func (y *YubiKey) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, keyInfo hardwarekey.ContextualKeyInfo, prompt hardwarekey.Prompt, auth piv.KeyAuth, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - if ref.Policy.TouchRequired { - ctx = y.promptTouch(ctx, prompt, keyInfo) - } - - return y.conn.sign(ctx, ref, auth, rand, digest, opts) -} - -// promptTouch starts the touch prompt. The context returned is tied to the touch -// prompt so that if the user cancels the touch prompt it cancels the sign request. -func (y *YubiKey) promptTouch(ctx context.Context, prompt hardwarekey.Prompt, keyInfo hardwarekey.ContextualKeyInfo) context.Context { - ctx, cancel := context.WithCancelCause(ctx) - - // There is no built in mechanism to prompt for touch on demand, so we simply prompt for touch after - // a short duration in hopes of lining up with the actual YubiKey touch prompt (flashing key). In the - // case where touch is cached, the delay prevents the prompt from firing when it isn't needed. - go func() { - defer cancel(nil) - - // Wait for any concurrent prompts to complete. If there is a concurrent touch prompt, - // or touch is otherwise provided in the meantime, we can skip the prompt below. - y.promptMu.Lock() - defer y.promptMu.Unlock() - - select { - case <-time.After(signTouchPromptDelay): - err := prompt.Touch(ctx, keyInfo) - if err != nil { - // Cancel the entire function when an error occurs. - // This is typically used for aborting the prompt. - cancel(trace.Wrap(err)) - } - return - case <-ctx.Done(): - // touch cached, skip prompt. - return - } - }() - - return ctx -} - // Reset resets the YubiKey PIV module to default settings. func (y *YubiKey) Reset() error { err := y.conn.reset() @@ -333,18 +295,18 @@ func (y *YubiKey) checkCertificate(slot piv.Slot) error { return nil } -type cryptoPublicKeyI interface { +type cryptoPublicKey interface { Equal(x crypto.PublicKey) bool } // getPublicKey gets a public key from the given PIV slot. -func (y *YubiKey) getPublicKey(slot piv.Slot) (cryptoPublicKeyI, error) { +func (y *YubiKey) getPublicKey(slot piv.Slot) (cryptoPublicKey, error) { slotCert, err := y.conn.attest(slot) if err != nil { return nil, trace.Wrap(err, "failed to get slot cert on PIV slot 0x%x", slot.Key) } - slotPub, ok := slotCert.PublicKey.(cryptoPublicKeyI) + slotPub, ok := slotCert.PublicKey.(cryptoPublicKey) if !ok { return nil, trace.BadParameter("expected crypto.PublicKey but got %T", slotCert.PublicKey) } @@ -466,6 +428,13 @@ func (y *YubiKey) promptPIN(ctx context.Context, prompt hardwarekey.Prompt, requ return pin, nil } +func (y *YubiKey) promptTouch(ctx context.Context, prompt hardwarekey.Prompt, keyInfo hardwarekey.ContextualKeyInfo) error { + y.promptMu.Lock() + defer y.promptMu.Unlock() + + return prompt.Touch(ctx, keyInfo) +} + func (y *YubiKey) setPINAndPUKFromDefault(ctx context.Context, prompt hardwarekey.Prompt, keyInfo hardwarekey.ContextualKeyInfo, pinCacheTTL time.Duration) (string, error) { y.pinCache.mu.Lock() defer y.pinCache.mu.Unlock() @@ -592,7 +561,9 @@ func (c *sharedPIVConnection) connect() (func(), error) { return release, nil } -func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, auth piv.KeyAuth, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { +type promptTouch func(ctx context.Context) error + +func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.PrivateKeyRef, auth piv.KeyAuth, promptTouch promptTouch, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { pivSlot, err := parsePIVSlot(ref.SlotKey) if err != nil { return nil, trace.Wrap(err) @@ -619,16 +590,43 @@ func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.Private return nil, trace.BadParameter("private key type %T does not implement crypto.Signer", priv) } - return abandonableSign(ctx, signer, rand, digest, opts) + return abandonableSign(ctx, signer, promptTouch, rand, digest, opts) } -// abandonableSign is a wrapper around signer.Sign. -// It enhances the functionality of signer.Sign by allowing the caller to stop -// waiting for the result if the provided context is canceled. -// It is especially important for WarmupHardwareKey, -// where waiting for the user providing a PIN/touch could block program termination. +// abandonableSign extends [sharedPIVConnection.sign] to handle context, allowing the +// caller to stop waiting for the result if the provided context is canceled. +// +// This is necessary for hardware key signatures which sometimes require touch from the +// user to complete, which can block program termination. +// // Important: this function only abandons the signer.Sign result, doesn't cancel it. -func abandonableSign(ctx context.Context, signer crypto.Signer, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { +func abandonableSign(ctx context.Context, signer crypto.Signer, promptTouch promptTouch, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + touchErrC := make(chan error) + if promptTouch != nil { + go func() { + defer close(touchErrC) + + // There is no built in mechanism to prompt for touch on demand, so we simply prompt for touch after + // a short duration in hopes of lining up with the actual YubiKey touch prompt (flashing key). In the + // case where touch is cached, the delay prevents the prompt from firing when it isn't needed. + select { + case <-time.After(signTouchPromptDelay): + err := promptTouch(ctx) + if err != nil { + select { + case <-ctx.Done(): + case touchErrC <- err: + } + } + case <-ctx.Done(): + // prompt cached or signature canceled, skip prompt. + } + }() + } + type signResult struct { signature []byte err error @@ -636,6 +634,7 @@ func abandonableSign(ctx context.Context, signer crypto.Signer, rand io.Reader, signResultCh := make(chan signResult) go func() { + defer close(signResultCh) if err := ctx.Err(); err != nil { return } @@ -651,6 +650,8 @@ func abandonableSign(ctx context.Context, signer crypto.Signer, rand io.Reader, return nil, ctx.Err() case result := <-signResultCh: return result.signature, trace.Wrap(result.err) + case err := <-touchErrC: + return nil, trace.Wrap(err) } } diff --git a/api/utils/keys/piv/yubikey_test.go b/api/utils/keys/piv/yubikey_test.go index 1f90935ad11e7..0715910564efa 100644 --- a/api/utils/keys/piv/yubikey_test.go +++ b/api/utils/keys/piv/yubikey_test.go @@ -66,7 +66,7 @@ func TestConcurrentOperations(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - _, err := y.conn.sign(context.Background(), ref, piv.KeyAuth{PINPolicy: piv.PINPolicyNever}, nil, make([]byte, 100), crypto.Hash(0)) + _, err := y.conn.sign(context.Background(), ref, piv.KeyAuth{PINPolicy: piv.PINPolicyNever}, nil, nil, make([]byte, 100), crypto.Hash(0)) assert.NoError(t, err, "sign") }() wg.Add(1) From 9c434b4a9821f85d46eb674ea44efeabca2c8743 Mon Sep 17 00:00:00 2001 From: joerger Date: Mon, 19 May 2025 13:25:46 -0700 Subject: [PATCH 12/14] Restore cancel cause behavior. --- api/utils/keys/piv/yubikey.go | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index 3a989db8c4871..f309e97507965 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -601,14 +601,11 @@ func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.Private // // Important: this function only abandons the signer.Sign result, doesn't cancel it. func abandonableSign(ctx context.Context, signer crypto.Signer, promptTouch promptTouch, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { - ctx, cancel := context.WithCancel(ctx) - defer cancel() + ctx, cancel := context.WithCancelCause(ctx) + defer cancel(nil) - touchErrC := make(chan error) if promptTouch != nil { go func() { - defer close(touchErrC) - // There is no built in mechanism to prompt for touch on demand, so we simply prompt for touch after // a short duration in hopes of lining up with the actual YubiKey touch prompt (flashing key). In the // case where touch is cached, the delay prevents the prompt from firing when it isn't needed. @@ -616,10 +613,7 @@ func abandonableSign(ctx context.Context, signer crypto.Signer, promptTouch prom case <-time.After(signTouchPromptDelay): err := promptTouch(ctx) if err != nil { - select { - case <-ctx.Done(): - case touchErrC <- err: - } + cancel(err) } case <-ctx.Done(): // prompt cached or signature canceled, skip prompt. @@ -650,8 +644,6 @@ func abandonableSign(ctx context.Context, signer crypto.Signer, promptTouch prom return nil, ctx.Err() case result := <-signResultCh: return result.signature, trace.Wrap(result.err) - case err := <-touchErrC: - return nil, trace.Wrap(err) } } From b1f60caf355050f2e4a4f82ed5b6a378034802f8 Mon Sep 17 00:00:00 2001 From: joerger Date: Wed, 21 May 2025 09:48:46 -0700 Subject: [PATCH 13/14] Address signature concurrency comments. --- api/utils/keys/piv/yubikey.go | 47 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index f309e97507965..2f59237feaa1a 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -601,8 +601,24 @@ func (c *sharedPIVConnection) sign(ctx context.Context, ref *hardwarekey.Private // // Important: this function only abandons the signer.Sign result, doesn't cancel it. func abandonableSign(ctx context.Context, signer crypto.Signer, promptTouch promptTouch, rand io.Reader, digest []byte, opts crypto.SignerOpts) ([]byte, error) { - ctx, cancel := context.WithCancelCause(ctx) - defer cancel(nil) + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // Since this function isn't fully synchronous, the goroutines below may outlive + // the function call, especially sign which cannot be stopped once started. We + // use buffered channels to ensure these goroutines can send even with no receiver + // to avoid leaking. + signatureC := make(chan []byte, 1) + errC := make(chan error, 2) + + go func() { + signature, err := signer.Sign(rand, digest, opts) + if err != nil { + errC <- err + return + } + signatureC <- signature + }() if promptTouch != nil { go func() { @@ -611,9 +627,8 @@ func abandonableSign(ctx context.Context, signer crypto.Signer, promptTouch prom // case where touch is cached, the delay prevents the prompt from firing when it isn't needed. select { case <-time.After(signTouchPromptDelay): - err := promptTouch(ctx) - if err != nil { - cancel(err) + if err := promptTouch(ctx); err != nil { + errC <- promptTouch(ctx) } case <-ctx.Done(): // prompt cached or signature canceled, skip prompt. @@ -621,27 +636,11 @@ func abandonableSign(ctx context.Context, signer crypto.Signer, promptTouch prom }() } - type signResult struct { - signature []byte - err error - } - - signResultCh := make(chan signResult) - go func() { - defer close(signResultCh) - if err := ctx.Err(); err != nil { - return - } - signature, err := signer.Sign(rand, digest, opts) - select { - case <-ctx.Done(): - case signResultCh <- signResult{signature: signature, err: trace.Wrap(err)}: - } - }() - select { case <-ctx.Done(): - return nil, ctx.Err() + return nil, trace.Wrap(ctx.Err()) + case err := <-errC: + return nil, trace.Wrap(err) case result := <-signResultCh: return result.signature, trace.Wrap(result.err) } From 5c00ea2cd8fb9835b1e4dc0941c4032086ad2de8 Mon Sep 17 00:00:00 2001 From: joerger Date: Wed, 21 May 2025 12:33:20 -0700 Subject: [PATCH 14/14] Fix signature channel. --- api/utils/keys/piv/yubikey.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/utils/keys/piv/yubikey.go b/api/utils/keys/piv/yubikey.go index 2f59237feaa1a..bb1eccdc0e0fb 100644 --- a/api/utils/keys/piv/yubikey.go +++ b/api/utils/keys/piv/yubikey.go @@ -641,8 +641,8 @@ func abandonableSign(ctx context.Context, signer crypto.Signer, promptTouch prom return nil, trace.Wrap(ctx.Err()) case err := <-errC: return nil, trace.Wrap(err) - case result := <-signResultCh: - return result.signature, trace.Wrap(result.err) + case signature := <-signatureC: + return signature, nil } }