diff --git a/stdlib/public/core/StringIndexValidation.swift b/stdlib/public/core/StringIndexValidation.swift
index 93e363ff0589c..f1f84ee50f955 100644
--- a/stdlib/public/core/StringIndexValidation.swift
+++ b/stdlib/public/core/StringIndexValidation.swift
@@ -400,20 +400,3 @@ extension _StringGuts {
       scalarAlign(validateInclusiveSubscalarIndex_5_7(i)))
   }
 }
-
-// Word index validation (String)
-extension _StringGuts {
-  internal func validateWordIndex(
-    _ i: String.Index
-  ) -> String.Index {
-    return roundDownToNearestWord(scalarAlign(validateSubscalarIndex(i)))
-  }
-
-  internal func validateInclusiveWordIndex(
-    _ i: String.Index
-  ) -> String.Index {
-    return roundDownToNearestWord(
-      scalarAlign(validateInclusiveSubscalarIndex(i))
-    )
-  }
-}
diff --git a/stdlib/public/core/StringWordBreaking.swift b/stdlib/public/core/StringWordBreaking.swift
index bc98e590e5d76..75564907315d2 100644
--- a/stdlib/public/core/StringWordBreaking.swift
+++ b/stdlib/public/core/StringWordBreaking.swift
@@ -1,683 +1,773 @@
 //===----------------------------------------------------------------------===//
 //
+//
 // This source file is part of the Swift.org open source project
 //
-// Copyright (c) 2022 Apple Inc. and the Swift project authors
+// Copyright (c) 2022 - 2025 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//
 
-extension _StringGuts {
-  internal func roundDownToNearestWord(
-    _ i: String.Index
-  ) -> String.Index {
-    _internalInvariant(i._encodedOffset <= count)
-
-    let offset = i._encodedOffset
-
-    if offset == 0 || offset == count {
-      return i
+extension Unicode {
+  /// A state machine for recognizing word boundaries in an arbitrary series of
+  /// Unicode scalars, based on the specification in [Unicode Annex
+  /// #29](https://unicode.org/reports/tr29/#Word_Boundary_Rules).
+  ///
+  /// The text segmentation algorithm is not stable, and it allows implementers
+  /// to tailor it to their needs. Accordingly, reported word boundaries may
+  /// vary in arbitrary ways between Unicode implementations and system
+  /// configurations, including between versions of the Swift Standard Library.
+  ///
+  /// To implement the rules as specified, this low-level construct has built-in
+  /// support to defer making a decision on whether there is a word boundary
+  /// between two Unicode scalars until more scalars are fed to the state
+  /// machine; that is to say, it implements limited lookahead. The API surface
+  /// only allows one such candidate position to exist at any given time -- this
+  /// corresponds to allowing looking ahead up to the next word boundary. (In
+  /// the unlikely case the rules evolve to require looking ahead even further,
+  /// then this interface will need to be modified or replaced accordingly.)
+  ///
+  /// To detect word breaks in a sequence of Unicode scalars, feed each of them
+  /// to the recognizer by calling its `hasBreak(before:)` method. The method
+  /// indicates if there is a word break preceding the given scalar, or at a
+  /// previously reported candidate position. When every scalar in the text has
+  /// been fed to the recognizer, the `hasCandidateBreakAtEnd()` method should
+  /// be called to determine if there is a break at the last reported candidate
+  /// position. There is also an (implicit) word break at the end of text
+  /// position.
+  ///
+  /// Note that `_WordRecognizer` does not take or return actual text positions
+  /// (such as a string index); it is entirely independent of the underlying
+  /// text representation, and it is able to work with any container model. (For
+  /// example, it can be used to incrementally recognize word breaks in UTF-16
+  /// data streamed from a network connection, or iterate over word boundaries
+  /// in piecewise contiguous UTF-8 buffers stored in a rope data structure.) Of
+  /// course, it is also possible to use it to detect word breaks in a standard
+  /// `String` value, such as done by this example function:
+  ///
+  ///     func collectWordBreaks(in string: String) -> [String.Index] {
+  ///        var result: [String.Index] = []
+  ///        var recognizer = Unicode._WordRecognizer()
+  ///        var candidate = string.startIndex
+  ///        for i in string.unicodeScalars.indices {
+  ///           let r = recognizer.hasBreak(before: string.unicodeScalars[i])
+  ///           if r.setCandidate { candidate = i }
+  ///           if r.breakAtCandidate { result.append(candidate) }
+  ///           if r.breakHere { result.append(i) }
+  ///        }
+  ///        if recognizer.hasCandidateBreakAtEnd() {
+  ///           result.append(candidate)
+  ///        }
+  ///        result.append(string.endIndex)
+  ///        return result
+  ///     }
+  ///
+  /// When used this way, the state machine is able to efficiently iterate over
+  /// all breaks within the string by visiting each scalar exactly once, without
+  /// any backtracking.
+  ///
+  /// It is also possible to discard the recognizer after each detected
+  /// boundary, reinitializing it from scratch for each iteration step:
+  ///
+  ///     func wordBreak(
+  ///        after knownBreak: String.Index, in string: String
+  ///     ) -> String.Index {
+  ///        var recognizer = Unicode._WordRecognizer(after: string.unicodeScalars[knownBreak])
+  ///        var i = string.unicodeScalars.index(after: knownBreak)
+  ///        var candidate = i
+  ///        while i < string.endIndex {
+  ///           let r = recognizer.hasBreak(before: string.unicodeScalars[i])
+  ///           if r.setCandidate { candidate = i }
+  ///           if r.breakAtCandidate { return candidate }
+  ///           if r.breakHere { return i }
+  ///           string.uncodeScalars.formIndex(after: &i)
+  ///        }
+  ///        if recognizer.hasCandidateBreakAtEnd() {
+  ///           return candidate
+  ///        }
+  ///        return i
+  ///     }
+  ///
+  /// However, note that iterating this way is less efficient, because it
+  /// discards lookahead information -- some scalars will be processed multiple
+  /// times. The rules are carefully constructed so that the algorithm reports
+  /// the same word boundaries whether or not recognizer state is preserved.
+  @available(StdlibDeploymentTarget 6.3, *)
+  public // Core primitive
+  struct _WordRecognizer: Sendable {
+    // FIXME: We also need proper public API for this
+
+    /// The last scalar that was fed to `hasBreak(before:)`.
+    var _prevScalar: Unicode.Scalar
+    /// The cached word break property of `_prevScalar`.
+    var _prevCategory: _WordBreakProperty
+    /// The word break property of the last preceding scalar that wasn't ignored by rule WB4.
+    var _baseCategory: _WordBreakProperty
+    /// The current state of the recognizer.
+    var _state: _State
+
+    /// Initialize a new word recognizer at the _start of text_ (sot)
+    /// position.
+    ///
+    /// The resulting state machine will report a word break before the first
+    /// scalar that is fed to it.
+    public init() {
+      // To avoid having to handle the empty case specially, we use LF as the
+      // placeholder before the first scalar. Per WB3a, we always produce a break
+      // following a line feed.
+      _baseCategory = .newlineCRLF
+      _prevScalar = Unicode.Scalar(0x0A as UInt8)
+      _prevCategory = .newlineCRLF
+      _state = .ordinary
     }
 
-    let start = previousWordIndex(endingAt: offset)
-    let end = nextWordIndex(startingAt: start)
-    _internalInvariant(offset <= end, "Word breaking inconsistency")
-
-    if offset == end {
-      return i
+    /// Initialize a new word recognizer with a state after a previously
+    /// recognized word boundary.
+    ///
+    /// This enables clients to iterate over word boundaries without maintaining
+    /// a persistent recognizer state. However, iterating this way may result in
+    /// an arbitrarily large amount of duplicate work. (This is because the word
+    /// segmentation algorithm requires looking ahead by as much as a full
+    /// word's worth of Unicode scalars, and when the old recognizer is
+    /// discarded, the information thus collected has to be recreated from
+    /// scratch.) Whenever possible, it is therefore preferable to iterate over
+    /// multiple word boundaries using a single recognizer instance.
+    ///
+    /// - Parameter scalar: The Unicode scalar immediately following a known
+    ///    word boundary position.
+    public init(after scalar: Unicode.Scalar) {
+      // We assume that the state machine provides stable results even if the
+      // start position was a retroactive candidate.
+      _prevScalar = scalar
+      _prevCategory = Unicode._WordBreakProperty(from: scalar)
+      _baseCategory = _prevCategory
+      _state = .ordinary
     }
-
-    return String.Index(_encodedOffset: start)
   }
+}
 
-  @inline(never)
-  @_effects(releasenone)
-  internal func nextWordIndex(startingAt i: Int) -> Int {
-    if _slowPath(isForeign) {
-      return _foreignNextWordIndex(startingAt: i)
-    }
-
-    return unsafe withFastUTF8 { utf8 in
-      nextWordBoundary(startingAt: i) {
-        _internalInvariant($0 >= 0)
-
-        guard $0 < utf8.count else {
-          return nil
-        }
-
-        let (scalar, len) = unsafe _decodeScalar(utf8, startingAt: $0)
-        return (scalar, $0 &+ len)
+@available(StdlibDeploymentTarget 6.3, *)
+extension Unicode._WordRecognizer {
+  /// The parts of the word recognizer state that are in addition to saved
+  /// information about previous scalars.
+  ///
+  /// This is used to implement stateful lookahead when a break at a particular
+  /// candidate position may need to be suppressed or activated based on a
+  /// subsequent scalar value. It is also used to keep track of the number of
+  /// contiguous regional indicator scalars we have seen so far.
+  internal enum _State: Int, Sendable {
+    case ordinary
+    case afterWB6 // AHLetter × (MidLetter | MidNumLetQ) AHLetter
+    case afterWB7b // Hebrew_Letter × Double_Quote Hebrew_Letter
+    case afterWB12 // Numeric × (MidNum | MidNumLetQ) Numeric
+    case afterMidFlag // [^RI] (RI RI)* RI × RI
+
+    var hasPendingCandidate: Bool {
+      switch self {
+      case .afterWB6, .afterWB7b, .afterWB12: return true
+      default: return false
       }
     }
   }
 
-  internal func _foreignNextWordIndex(startingAt i: Int) -> Int {
-#if _runtime(_ObjC)
-    return nextWordBoundary(startingAt: i) {
-      _internalInvariant($0 >= 0)
-
-      guard $0 < count else {
-        return nil
-      }
-
-      let scalars = String.UnicodeScalarView(self)
-      let idx = String.Index(_encodedOffset: $0)
-
-      let scalar = scalars[idx]
-      let nextIndex = scalars.index(after: idx)
-
-      return (scalar, nextIndex._encodedOffset)
-    }
-#else
-    fatalError("No foreign strings on this platform in this version of Swift.")
-#endif
+  /// Reject a break at the current position, triggering a break at the current
+  /// candidate (if any).
+  internal mutating func _reject(
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    let breakAtCandidate = _state.hasPendingCandidate
+    _state = .ordinary
+    return (
+      setCandidate: false,
+      breakAtCandidate: breakAtCandidate,
+      breakHere: false)
   }
 
-  internal func previousWordIndex(endingAt i: Int) -> Int {
-    if _slowPath(isForeign) {
-      return _foreignPreviousWordIndex(endingAt: i)
-    }
-
-    return unsafe withFastUTF8 { utf8 in
-      previousWordBoundary(endingAt: i) {
-        _internalInvariant($0 <= count)
-
-        guard $0 > 0 else {
-          return nil
-        }
-
-        let (scalar, len) = unsafe _decodeScalar(utf8, endingAt: $0)
-        return (scalar, $0 &- len)
-      }
-    }
+  /// Skip this position, ignoring the current Unicode scalar.
+  internal mutating func _ignore(
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    // Note: not updating _baseCategory
+    return (setCandidate: false, breakAtCandidate: false, breakHere: false)
   }
 
-  @inline(never)
-  internal func _foreignPreviousWordIndex(endingAt i: Int) -> Int {
-#if _runtime(_ObjC)
-    return previousWordBoundary(endingAt: i) {
-      _internalInvariant($0 <= count)
-
-      guard $0 > 0 else {
-        return nil
-      }
-
-      let scalars = String.UnicodeScalarView(self)
-      let idx = String.Index(_encodedOffset: $0)
-
-      let previousIndex = scalars.index(before: idx)
-      let scalar = scalars[previousIndex]
-
-      return (scalar, previousIndex._encodedOffset)
-    }
-#else
-    fatalError("No foreign strings on this platform in this version of Swift.")
-#endif
+  /// Signal a break at the current position, also triggering a break at the
+  /// current candidate (if any).
+  internal mutating func _accept(
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    // If we have a pending candidate, put a break at it
+    let breakAtCandidate = _state.hasPendingCandidate
+    _state = .ordinary
+    return (
+      setCandidate: false,
+      breakAtCandidate: breakAtCandidate,
+      breakHere: true)
   }
-}
-
-internal enum _WordQuestion {
-  case checkingRegionalIndicator(count: Int, previousRIIndex: Int)
-  case requireAHLetter
-  case requireNumeric
-  case requireHebrewLetter
-}
-
-extension _WordQuestion: Equatable {}
-
-internal struct _WordBreakingState {
-  var constraint: (question: _WordQuestion, index: Int)? = nil
-
-  var index: Int
-
-  var previousIndex: Int? = nil
-  var previousProperty: Unicode._WordBreakProperty? = nil
-
-  // When walking forward in a string, we need to not break on emoji flag
-  // sequences. Emoji flag sequences are composed of 2 regional indicators, so
-  // when we see our first (.regionalIndicator, .regionalIndicator) decision,
-  // we need to know to return false in this case. However, if the next scalar
-  // is another regional indicator, we reach the same decision rule, but in this
-  // case we actually need to break there's a boundary between emoji flag
-  // sequences.
-  var shouldBreakRI = false
-}
-
-extension _StringGuts {
-  // Returns the stride of the next word at the previous boundary offset.
-  internal func nextWordBoundary(
-    startingAt index: Int,
-    nextScalar: (Int) -> (scalar: Unicode.Scalar, end: Int)?
-  ) -> Int {
-    _precondition(index < endIndex._encodedOffset)
-
-    var (scalar, index) = nextScalar(index)!
-    var state = _WordBreakingState(index: index)
-
-    while let (scalar2, nextIndex) = nextScalar(state.index) {
-      if shouldBreak(between: scalar, and: scalar2, with: &state) {
-        break
-      }
-
-      scalar = scalar2
-      state.index = nextIndex
-    }
-
-    // If we have a leftover constraint, return the index
-    if let constraint = state.constraint {
-      return constraint.index
-    }
 
-    return state.index
+  /// Set the current position as the active break candidate, and transition into the
+  /// specified state.
+  internal mutating func _transition(
+    into state: _State
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    _internalInvariant(_state == .ordinary)
+    _state = state
+    return (setCandidate: true, breakAtCandidate: false, breakHere: false)
   }
 
-  // Returns the stride of the previous word at the current boundary offset.
-  internal func previousWordBoundary(
-    endingAt index: Int,
-    previousScalar: (Int) -> (scalar: Unicode.Scalar, start: Int)?
-  ) -> Int {
-    var (scalar2, index) = previousScalar(index)!
-    var state = _WordBreakingState(index: index)
-
-    while let (scalar, previousIndex) = previousScalar(state.index) {
-      if shouldBreakBackward(between: scalar, and: scalar2, with: &state) {
-        break
-      }
-
-      scalar2 = scalar
-      state.index = previousIndex
-    }
-
-    if let previousIndex = state.previousIndex {
-      return previousIndex
-    }
-
-    if let constraint = state.constraint {
-      if let riIndex = handleRIConstraint(constraint, with: state) {
-        return riIndex
-      }
-
-      return constraint.index
-    }
-
-    return state.index
+  /// If the current state matches the given expectation, suppress a break at
+  /// this position and discard the active candidate; otherwise, report a break
+  /// at both positions.
+  internal mutating func _expect(
+    _ expectedState: _State
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    let breakHere = (_state != expectedState)
+    let breakAtCandidate = breakHere && _state.hasPendingCandidate
+    _state = .ordinary
+    return (
+      setCandidate: false,
+      breakAtCandidate: breakAtCandidate,
+      breakHere: breakHere)
   }
-}
 
-extension _StringGuts {
-  // The "algorithm" that determines whether or not we should break between
-  // certain word break properties.
-  //
-  // This is based off of the Unicode Annex #29 for [Word Boundary
-  // Rules](https://unicode.org/reports/tr29/#Word_Boundary_Rules).
-  internal func shouldBreak(
-    between scalar1: Unicode.Scalar,
-    and scalar2: Unicode.Scalar,
-    with state: inout _WordBreakingState
-  ) -> Bool {
-    // WB3
-    if scalar1.value == 0xD, scalar2.value == 0xA {
-      return false
+  /// Feeds the next scalar to the state machine, reporting if there is a word
+  /// boundary at the current position or a previously reported candidate.
+  ///
+  /// To decide whether there is a word break at the current position, the
+  /// segmentation algorithm sometimes needs to look ahead by visiting
+  /// additional scalars following the break, up to the next word boundary. To
+  /// allow this, the state machine can report that the current position is a
+  /// provisional break "candidate". Clients are expected to remember the
+  /// position of the last reported candidate, so that it can be retroactively
+  /// promoted to a full break as needed.
+  ///
+  /// - Parameter nextScalar: The scalar at the current position in the text.
+  /// - Returns: A triple of Boolean values `setCandidate`, `breakAtCandidate`,
+  ///     `breakHere`. If `setCandidate` is true, then the caller is expected to
+  ///     save the current text position as a potential word boundary. If
+  ///     `breakAtCandidate` is true, then there is a word boundary at the last
+  ///     candidate position. If `breakHere` is true, then there is a word
+  ///     boundary at the current position. The caller is expected to process
+  ///     these three components in this specific order.
+  public mutating func hasBreak(
+    before nextScalar: Unicode.Scalar
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    let nextCategory = Unicode._WordBreakProperty(from: nextScalar)
+    var nextBase = nextCategory
+
+    // FIXME: Implement a proper state machine here, dispatching on
+    // (state, nextProperty), ideally through a static look-up table
+
+    defer {
+      _prevScalar = nextScalar
+      _prevCategory = nextCategory
+      _baseCategory = nextBase
     }
 
-    let x = Unicode._WordBreakProperty(from: scalar1)
-    
-    // WB3a, handled here since we don't need to look up `y` for this
-    if x == .newlineCRLF {
-      return true
-    }
-    
-    let y = Unicode._WordBreakProperty(from: scalar2)
-
-    switch (x, y) {
-
-    // Fast path: If we know our scalars have no properties the decision is
-    //            trivial and we don't need to crawl to the default statement.
-    case (.any, .any):
-      return true
-
-    // WB3b
-    case (_, .newlineCRLF):
-      return true
-
-    // WB3c
-    case (.zwj, .extendedPictographic):
-      return false
+    switch (_prevCategory, nextCategory) {
+    case (.any, .any): // WB999
+      // Fast path: If we know our scalars have no properties then the decision
+      // is trivial and we don't need to crawl to the default statement.
+      return _accept()
+
+    case (.newlineCRLF, _), // WB3a
+         (_, .newlineCRLF): // WB3b
+      if _prevScalar.value == 0xD, nextScalar.value == 0xA { // WB3
+        _internalInvariant(_prevCategory == .newlineCRLF)
+        return _reject()
+      }
+      return _accept()
 
-    // WB3d
-    case (.wSegSpace, .wSegSpace):
-      return false
+    case (.zwj, .extendedPictographic), // WB3c
+         (.wSegSpace, .wSegSpace): // WB3d
+      return _reject()
 
-    // WB4
-    case (_, .format),
+    case (_, .format), // WB4
          (_, .extend),
          (_, .zwj):
-      if x != .format && x != .extend && x != .zwj {
-        state.previousProperty = x
-      }
-
-      return false
+      nextBase = _baseCategory // Cancel _baseCategory update
+      return _ignore()
 
     default:
-      let newX = state.previousProperty ?? x
-
-      return decidePostFormat(between: newX, and: y, with: &state)
+      break
     }
-  }
-
-  internal func decidePostFormat(
-    between x: Unicode._WordBreakProperty,
-    and y: Unicode._WordBreakProperty,
-    with state: inout _WordBreakingState
-  ) -> Bool {
-    state.previousProperty = nil
 
-    switch (x, y) {
-    // WB5
-    case (.aLetter, .aLetter),
+    switch (_baseCategory, nextCategory) {
+    case (.aLetter, .aLetter), // WB5
          (.aLetter, .hebrewLetter),
          (.hebrewLetter, .aLetter),
          (.hebrewLetter, .hebrewLetter):
-      return false
+      return _reject()
 
-    // WB6
-    case (.aLetter, .midLetter),
+    case (.aLetter, .midLetter), // WB6
          (.hebrewLetter, .midLetter),
          (.aLetter, .midNumLet),
          (.hebrewLetter, .midNumLet),
          (.aLetter, .singleQuote):
-      state.constraint = (question: .requireAHLetter, index: state.index)
+      return _transition(into: .afterWB6)
 
-      return false
-
-    // WB7
-    case (.midLetter, .aLetter),
+    case (.midLetter, .aLetter), // WB7
          (.midLetter, .hebrewLetter),
          (.midNumLet, .aLetter),
          (.midNumLet, .hebrewLetter),
          (.singleQuote, .aLetter),
          (.singleQuote, .hebrewLetter):
-      if let constraint = state.constraint {
-        if constraint.question == .requireAHLetter {
-          state.constraint = nil
-          return false
-        }
-
-        state.index = constraint.index
-        return true
-      }
+      return _expect(.afterWB6)
 
-      return true
+    case (.hebrewLetter, .singleQuote): // WB7a
+      return _reject()
 
-    // WB7a
-    case (.hebrewLetter, .singleQuote):
-      return false
+    case (.hebrewLetter, .doubleQuote): // WB7b
+      return _transition(into: .afterWB7b)
 
-    // WB7b
-    case (.hebrewLetter, .doubleQuote):
-      state.constraint = (question: .requireHebrewLetter, index: state.index)
+    case (.doubleQuote, .hebrewLetter): // WB7c
+      return _expect(.afterWB7b)
 
-      return false
+    case (.numeric, .numeric), // WB8
+         (.aLetter, .numeric), // WB9
+         (.hebrewLetter, .numeric), // WB9
+         (.numeric, .aLetter), // WB10
+         (.numeric, .hebrewLetter): // WB10
+      return _reject()
 
-    // WB7c
-    case (.doubleQuote, .hebrewLetter):
-      if let constraint = state.constraint {
-        if constraint.question == .requireHebrewLetter {
-          state.constraint = nil
-          return false
-        }
-
-        state.index = constraint.index
-        return true
-      }
-
-      return true
-
-    // WB8
-    case (.numeric, .numeric):
-      return false
-
-    // WB9
-    case (.aLetter, .numeric),
-         (.hebrewLetter, .numeric):
-      return false
-
-    // WB10
-    case (.numeric, .aLetter),
-         (.numeric, .hebrewLetter):
-      return false
-
-    // WB11
-    case (.midNum, .numeric),
+    case (.midNum, .numeric), // WB11
          (.midNumLet, .numeric),
          (.singleQuote, .numeric):
-      if let constraint = state.constraint {
-        if constraint.question == .requireNumeric {
-          state.constraint = nil
-          return false
-        }
-
-        state.index = constraint.index
-        return true
-      }
-
-      return true
+      return _expect(.afterWB12)
 
-    // WB12
-    case (.numeric, .midNum),
+    case (.numeric, .midNum), // WB12
          (.numeric, .midNumLet),
          (.numeric, .singleQuote):
-      state.constraint = (question: .requireNumeric, index: state.index)
+      return _transition(into: .afterWB12)
 
-      return false
-
-    // WB13
-    case (.katakana, .katakana):
-      return false
-
-    // WB13a
-    case (.aLetter, .extendNumLet),
+    case (.katakana, .katakana), // WB13
+         (.aLetter, .extendNumLet), // WB13a
          (.hebrewLetter, .extendNumLet),
          (.numeric, .extendNumLet),
          (.katakana, .extendNumLet),
-         (.extendNumLet, .extendNumLet):
-      return false
-
-    // WB13b
-    case (.extendNumLet, .aLetter),
+         (.extendNumLet, .extendNumLet),
+         (.extendNumLet, .aLetter), // WB13b
          (.extendNumLet, .hebrewLetter),
          (.extendNumLet, .numeric),
          (.extendNumLet, .katakana):
-      return false
+      return _reject()
 
-    // WB15
-    case (.regionalIndicator, .regionalIndicator):
-      defer {
-        state.shouldBreakRI.toggle()
+    case (.regionalIndicator, .regionalIndicator): // WB15/WB16
+      let breakHere: Bool
+      if _state == .afterMidFlag {
+        _state = .ordinary
+        breakHere = true
+      } else {
+        _state = .afterMidFlag
+        breakHere = false
       }
+      return (setCandidate: false, breakAtCandidate: false, breakHere: breakHere)
 
-      return state.shouldBreakRI
-
-    default:
-      return true
+    default: // WB999
+      return _accept()
     }
   }
+
+  /// Returns true if the previously reported word boundary candidate needs to
+  /// be promoted to a full break if there are no more scalars in the input text.
+  ///
+  /// There is always an (implicit) word boundary at position at the end of
+  /// text, following the last scalar; however, the end may also trigger a
+  /// pending unreported break at the last candidate previously set by
+  /// `hasBreak`. This method returns true in that case, allowing clients to
+  /// reliably detect such boundaries.
+  public func hasCandidateBreakAtEnd() -> Bool {
+    _state.hasPendingCandidate
+  }
 }
 
-extension _StringGuts {
-  // The "algorithm" that determines whether or not we should break between
-  // certain word break properties.
-  //
-  // This is based off of the Unicode Annex #29 for [Word Boundary
-  // Rules](https://unicode.org/reports/tr29/#Word_Boundary_Rules).
-  internal func shouldBreakBackward(
-    between scalar1: Unicode.Scalar,
-    and scalar2: Unicode.Scalar,
-    with state: inout _WordBreakingState
-  ) -> Bool {
-    // WB3
-    if scalar1.value == 0xD, scalar2.value == 0xA {
-      return false
+extension Unicode {
+  /// A state machine for recognizing safe word boundaries in a backward
+  /// sequence of Unicode scalars, based on the specification in [Unicode Annex
+  /// #29](https://unicode.org/reports/tr29/#Word_Boundary_Rules).
+  ///
+  /// The text segmentation algorithm is not stable, and it allows implementers
+  /// to tailor it to their needs. Accordingly, reported word boundaries may
+  /// vary in arbitrary ways between Unicode implementations and system
+  /// configurations, including between versions of the Swift Standard Library.
+  ///
+  /// This is intended to help implement searching for word boundaries near an
+  /// arbitrary position in a middle of a larger text, as described in [section
+  /// 6.4 of Annex #29](https://unicode.org/reports/tr29/#Random_Access). The
+  /// start position may be at at an arbitrary scalar anywhere in the input text
+  /// -- there is no expectation that the first scalar addresses a known word
+  /// boundary. The state machine scans backwards from that position until it
+  /// detects a reliable word boundary.
+  ///
+  /// To detect a word break near a particular position in a series of Unicode
+  /// scalars, start iterating scalars backward from the start position, feeding
+  /// each of them to the `hasGuaranteedBreak(after:)` method. The method
+  /// indicates if there is a word break preceding the given scalar, or at a
+  /// previously reported candidate position. There is always a word break at
+  /// the start of the text; so if we run out of scalars, the start position is
+  /// going to be a suitable safe word boundary.
+  ///
+  /// Note that this construct may skip over an arbitrary number of word
+  /// boundaries while it is searching for a safe break position. Once a safe
+  /// boundary is found, callers are usually expected to use it as the start
+  /// position to iterate forward using the standard segmentation algorithm
+  /// (implemented by `Unicode._WordRecognizer`), for example until they find
+  /// the break nearest to their original start position. In such cases, it is
+  /// usually a good idea to incrementally memoize word boundaries as they are
+  /// detected, to avoid repeating this process on the same positions later.
+  @available(StdlibDeploymentTarget 6.3, *)
+  public // Core primitive
+  struct _RandomAccessWordRecognizer: Sendable {
+    // FIXME: We also need proper public API for this
+
+    /// The last scalar that was fed to `hasGuaranteedBreak`; i.e., the scalar
+    /// immediately following the current one in the text.
+    var _nextScalar: Unicode.Scalar
+    /// The cached word break property of `_nextScalar`.
+    var _nextCategory: _WordBreakProperty
+    /// The word break property of the most recently seen scalar that wasn't
+    /// ignored by rule WB4.
+    var _baseCategory: _WordBreakProperty
+    /// Additional recognizer state.
+    var _state: _State
+    var _hasPendingCandidate: Bool
+
+    /// Initialize a new word recognizer at an arbitrary text position preceding
+    /// the given scalar.
+    public init(before scalar: Unicode.Scalar) {
+      _nextScalar = scalar
+      _nextCategory = Unicode._WordBreakProperty(from: scalar)
+      _baseCategory = _nextCategory
+      _state = .initial
+      _hasPendingCandidate = false
     }
+  }
+}
 
-    let x = Unicode._WordBreakProperty(from: scalar1)
-    let y = Unicode._WordBreakProperty(from: scalar2)
-
-    switch (x, y) {
 
-    // Fast path: If we know our scalars have no properties the decision is
-    //            trivial and we don't need to crawl to the default statement.
-    case (.any, .any):
-      return true
+@available(StdlibDeploymentTarget 6.3, *)
+extension Unicode._RandomAccessWordRecognizer {
+  internal enum _State: Int, Sendable {
+    case initial
+    case ordinary
+    case beforeWB7
+    case beforeWB7c
+    case beforeWB11
 
-    // WB3a and WB3b
-    case (.newlineCRLF, _),
-         (_, .newlineCRLF):
-      return true
+    var hasPendingCandidate: Bool {
+      switch self {
+      case .beforeWB7, .beforeWB7c, .beforeWB11: return true
+      default: return false
+      }
+    }
+  }
 
-    // WB3c
-    case (.zwj, .extendedPictographic):
-      return false
+  internal mutating func _reject(
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    _hasPendingCandidate = false
+    return (setCandidate: false, breakAtCandidate: false, breakHere: false)
+  }
 
-    // WB3d
-    case (.wSegSpace, .wSegSpace):
-      return false
+  internal mutating func _ignore(
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    return (setCandidate: false, breakAtCandidate: false, breakHere: false)
+  }
 
-    // WB4
-    case (.format, _),
-         (.extend, _),
-         (.zwj, _):
-      if y != .format && y != .extend && y != .zwj {
-        state.previousProperty = y
-
-        // If we already have a constraint in flight, then use that as our base
-        // previous index. Otherwise, use where we're at right now.
-        if let constraint = state.constraint {
-          state.previousIndex = constraint.index
-        } else {
-          state.previousIndex = state.index
-        }
-      }
+  internal mutating func _accept(
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    if _hasPendingCandidate {
+      _hasPendingCandidate = false
+      return (setCandidate: false, breakAtCandidate: true, breakHere: false)
+    }
+    return (setCandidate: false, breakAtCandidate: false, breakHere: true)
+  }
 
-      return false
+  internal mutating func _placeCandidate(
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    if _state == .initial {
+      return _reject()
+    }
+    _hasPendingCandidate = true
+    return (setCandidate: true, breakAtCandidate: false, breakHere: false)
+  }
 
-    // WB4
-    case (_, .format),
-         (_, .extend),
-         (_, .zwj):
-      if state.previousProperty != nil {
-        fallthrough
-      }
+  internal mutating func _placeSoftCandidate(
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    if _hasPendingCandidate || _state == .initial {
+      return (false, false, false)
+    }
+    return _placeCandidate()
+  }
 
-      return false
+  public mutating func hasGuaranteedBreak(
+    after previousScalar: Unicode.Scalar
+  ) -> (setCandidate: Bool, breakAtCandidate: Bool, breakHere: Bool) {
+    let prevCategory = Unicode._WordBreakProperty(from: previousScalar)
+    var newState: _State = .ordinary
+    var newBase = prevCategory
+    defer {
+      _nextCategory = prevCategory
+      _nextScalar = previousScalar
+      _baseCategory = newBase
+      _state = newState
+    }
 
-    default:
-      var newY = y
+    switch (prevCategory, _nextCategory) {
+    case (.any, .any): // WB999 shortcut
+      return _accept()
 
-      if let previousProperty = state.previousProperty {
-        newY = previousProperty
+    case (.newlineCRLF, _), // WB3a
+         (_, .newlineCRLF): // WB3b
+      if previousScalar.value == 0xD, _nextScalar.value == 0xA { // WB3
+        return _reject()
       }
+      return _accept()
 
-      return decidePostFormatBackward(between: x, and: newY, with: &state)
-    }
-  }
+    case (.zwj, .extendedPictographic), // WB3c
+         (.wSegSpace, .wSegSpace): // WB3d
+      newBase = _baseCategory
+      newState = _state
+      return _reject()
 
-  internal func decidePostFormatBackward(
-    between x: Unicode._WordBreakProperty,
-    and y: Unicode._WordBreakProperty,
-    with state: inout _WordBreakingState
-  ) -> Bool {
-    state.previousProperty = nil
+    case (.format, _), // WB4
+         (.extend, _),
+         (.zwj, _):
+      newBase = _baseCategory
+      newState = _state
+      if _state == .initial || _nextCategory == .format || _nextCategory == .extend || _nextCategory == .zwj {
+        return _ignore()
+      }
+      return _placeSoftCandidate()
 
-    switch (x, y) {
-    case (.any, .any):
-      return true
+    default:
+      break
+    }
 
-    // WB5
-    case (.aLetter, .aLetter),
+    switch (prevCategory, _baseCategory) {
+    case (.aLetter, .aLetter), // WB5
          (.aLetter, .hebrewLetter),
          (.hebrewLetter, .aLetter),
          (.hebrewLetter, .hebrewLetter):
-      state.previousIndex = nil
-      return false
-
-    // WB6
-    case (.aLetter, .midLetter),
-         (.hebrewLetter, .midLetter),
-         (.aLetter, .midNumLet),
-         (.hebrewLetter, .midNumLet),
-         (.aLetter, .singleQuote):
-      if let constraint = state.constraint {
-        if constraint.question == .requireAHLetter {
-          state.constraint = nil
-          state.previousIndex = nil
-          return false
-        }
-
-        state.index = constraint.index
-        return true
-      }
+      return _reject()
 
-      return true
-
-    // WB7
-    case (.midLetter, .aLetter),
+    case (.midLetter, .aLetter), // WB7
          (.midLetter, .hebrewLetter),
          (.midNumLet, .aLetter),
          (.midNumLet, .hebrewLetter),
          (.singleQuote, .aLetter),
          (.singleQuote, .hebrewLetter):
-      state.constraint = (question: .requireAHLetter, index: state.index)
-
-      return false
-
-    // WB7a
-    case (.hebrewLetter, .singleQuote):
-      state.previousIndex = nil
-      return false
-
-    // WB7b
-    case (.hebrewLetter, .doubleQuote):
-      if let constraint = state.constraint {
-        if constraint.question == .requireHebrewLetter {
-          state.constraint = nil
-          state.previousIndex = nil
-          return false
-        }
-
-        state.index = constraint.index
-        return true
-      }
-
-      return true
+      newState = .beforeWB7
+      return _placeSoftCandidate()
 
-    // WB7c
-    case (.doubleQuote, .hebrewLetter):
-      state.constraint = (question: .requireHebrewLetter, index: state.index)
+    case (.aLetter, .midLetter), // WB6
+         (.hebrewLetter, .midLetter),
+         (.aLetter, .midNumLet),
+         (.hebrewLetter, .midNumLet),
+         (.aLetter, .singleQuote):
+      if _state == .beforeWB7 || _state == .initial {
+        return _reject()
+      }
+      return _accept()
 
-      return false
+    case (.hebrewLetter, .singleQuote): // WB7a
+      return _reject()
 
-    // WB8
-    case (.numeric, .numeric):
-      state.previousIndex = nil
-      return false
+    case (.doubleQuote, .hebrewLetter): // WB7c
+      newState = .beforeWB7c
+      return _placeSoftCandidate()
 
-    // WB9
-    case (.aLetter, .numeric),
-         (.hebrewLetter, .numeric):
-      state.previousIndex = nil
-      return false
+    case (.hebrewLetter, .doubleQuote): // WB7b
+      if _state == .beforeWB7c || _state == .initial {
+        return _reject()
+      }
+      return _accept()
 
-    // WB10
-    case (.numeric, .aLetter),
+    case (.numeric, .numeric), // WB8
+         (.aLetter, .numeric), // WB9
+         (.hebrewLetter, .numeric),
+         (.numeric, .aLetter), // WB10
          (.numeric, .hebrewLetter):
-      state.previousIndex = nil
-      return false
+      return _reject()
 
-    // WB11
-    case (.midNum, .numeric),
+    case (.midNum, .numeric), // WB11
          (.midNumLet, .numeric),
          (.singleQuote, .numeric):
-      state.constraint = (question: .requireNumeric, index: state.index)
-
-      return false
+      newState = .beforeWB11
+      return _placeSoftCandidate()
 
-    // WB12
-    case (.numeric, .midNum),
+    case (.numeric, .midNum), // WB12
          (.numeric, .midNumLet),
          (.numeric, .singleQuote):
-      if let constraint = state.constraint {
-        if constraint.question == .requireNumeric {
-          state.constraint = nil
-          state.previousIndex = nil
-          return false
-        }
-
-        state.index = constraint.index
-        return true
+      if _state == .beforeWB11 || _state == .initial {
+        return _reject()
       }
+      return _accept()
 
-      return true
-
-    // WB13
-    case (.katakana, .katakana):
-      state.previousIndex = nil
-      return false
-
-    // WB13a
-    case (.aLetter, .extendNumLet),
+    case (.katakana, .katakana), // WB13
+         (.aLetter, .extendNumLet), // WB13a
          (.hebrewLetter, .extendNumLet),
          (.numeric, .extendNumLet),
          (.katakana, .extendNumLet),
-         (.extendNumLet, .extendNumLet):
-      state.previousIndex = nil
-      return false
-
-    // WB13b
-    case (.extendNumLet, .aLetter),
+         (.extendNumLet, .extendNumLet),
+         (.extendNumLet, .aLetter), // WB13b
          (.extendNumLet, .hebrewLetter),
          (.extendNumLet, .numeric),
          (.extendNumLet, .katakana):
-      state.previousIndex = nil
-      return false
-
-    // WB15
-    case (.regionalIndicator, .regionalIndicator):
-      var riCount = 0
-      var previousRIIndex = state.index
-      var constraintIndex = state.index
-
-      if let constraint = state.constraint {
-        if case let .checkingRegionalIndicator(count, riIndex) =
-            constraint.question {
-          riCount = count + 1
-          previousRIIndex = count == 0 ? state.index : riIndex
-          constraintIndex = constraint.index
-        }
-      } else {
-        if let previousIndex = state.previousIndex {
-          constraintIndex = previousIndex
-        }
-      }
-
-      state.constraint = (
-        question: .checkingRegionalIndicator(
-          count: riCount,
-          previousRIIndex: previousRIIndex
-        ),
-        index: constraintIndex
-      )
+      return _reject()
 
-      state.previousIndex = nil
+    case (.regionalIndicator, .regionalIndicator): // WB15/WB16
+      return _reject()
 
-      return false
+    case (_, .format),
+         (_, .extend),
+         (_, .zwj):
+      _internalInvariant(!_hasPendingCandidate)
+      newState = .initial
+      return _reject()
 
     default:
-      return true
+      if
+        !_hasPendingCandidate,
+        _nextCategory == .format || _nextCategory == .extend || _nextCategory == .zwj
+      {
+        return _ignore()
+      }
+      return _accept()
     }
   }
+}
 
-  internal func handleRIConstraint(
-    _ constraint: (question: _WordQuestion, index: Int),
-    with state: _WordBreakingState
-  ) -> Int? {
-    if case let .checkingRegionalIndicator(count, previousRIIndex) =
-        constraint.question {
-      // If our count is 0, then we were unable to update previousRIIndex.
-      // However, that index is now equal to state.index.
-      if count == 0 {
-        return state.index
-      }
+extension String {
+  /// Find and return a word boundary position at or before an arbitrary index
+  /// within this string. The result may not be the closest word break to the
+  /// start position.
+  ///
+  /// This implements the core algorithm for finding a "safe" starting point for
+  /// random access to word breaks, following [section 6.4 of Unicode Annex
+  /// #29](https://unicode.org/reports/tr29/#Random_Access). Unicode defines
+  /// word boundaries using a forward-only state machine; this algorithm does
+  /// its best to run the state machine backwards until it finds a guaranteed
+  /// break position.
+  ///
+  /// This process is inherently an approximation: the algorithm may need to
+  /// skip over an arbitrary number of actual word boundaries before finding one
+  /// that it can judge with confidence. This makes it relatively expensive to
+  /// iterate over word boundaries backwards; in the worst case, a naive
+  /// implementation may have quadratic complexity. The recommended way to
+  /// mitigate this is to maintain a cache of word breaks already traversed,
+  /// only calling this method to extend the range of known breaks backwards as
+  /// needed.
+  ///
+  /// - Parameter i: An arbitrary index within the string, not necessarily
+  ///     addressing a word boundary.
+  /// - Returns: A valid index less than equal to the input that is guaranteed to
+  ///     identify some word boundary at or before `i`.
+  @available(SwiftStdlib 6.3, *)
+  public func _wordIndex(somewhereAtOrBefore i: Index) -> Index {
+    var j = _guts.validateInclusiveScalarIndex(i)
+    if j == endIndex {
+      return j
+    }
+    var recognizer = Unicode._RandomAccessWordRecognizer(
+      before: self.unicodeScalars[j])
+    var candidate = j
+    while j > self.startIndex {
+      let p = self.unicodeScalars.index(before: j)
+      let b = recognizer.hasGuaranteedBreak(after: self.unicodeScalars[p])
+      if b.setCandidate { candidate = j }
+      if b.breakAtCandidate { return candidate }
+      if b.breakHere { return j }
+      j = p
+    }
+    return j
+  }
 
-      // We were able to update previousRIIndex!
-      if count.isMultiple(of: 2) {
-        return previousRIIndex
+  /// Return the word boundary position following a known word boundary within
+  /// this string.
+  ///
+  /// This implements the word boundary specification of [Unicode Annex
+  /// #29](https://unicode.org/reports/tr29/#Default_Word_Boundaries). The
+  /// algorithm is not stable, and it allows implementers to tailor it to their
+  /// needs; accordingly, the result of this operation may vary between Unicode
+  /// implementations and system configurations, including versions of the Swift
+  /// Standard Library.
+  ///
+  /// Note: The input index must be on a known word boundary, otherwise the
+  /// result of this operation is unspecified. The start and end indices are
+  /// always known word boundaries, in every string.
+  ///
+  /// - Parameter i: A valid index addressing a word boundary within this
+  ///     string.
+  /// - Returns: The first word break strictly following `i` in the string.
+  @available(StdlibDeploymentTarget 5.7, *)
+  public func _wordIndex(after i: String.Index) -> String.Index {
+    guard #available(StdlibDeploymentTarget 6.3, *) else {
+      fatalError("Unreachable")
+    }
+    let i = _guts.validateScalarIndex(i)
+    if _slowPath(_guts.isForeign) {
+      return _guts._nextForeignWordIndex(after: i)
+    }
+    return _guts._nextUTF8WordIndex(after: i)
+  }
+}
+
+extension _StringGuts {
+  @available(StdlibDeploymentTarget 6.3, *)
+  @inline(never)
+  @_effects(releasenone)
+  internal func _nextUTF8WordIndex(after index: Index) -> Index {
+    _internalInvariant(self.isFastUTF8)
+    let result = unsafe self.withFastUTF8 { utf8 in
+      var offset = index._encodedOffset
+      let first = unsafe _decodeScalar(utf8, startingAt: offset)
+      offset &+= first.scalarLength
+      var recognizer = Unicode._WordRecognizer(after: first.0)
+      var candidate = offset
+      while offset < utf8.count {
+        let (scalar, len) = unsafe _decodeScalar(utf8, startingAt: offset)
+        let r = recognizer.hasBreak(before: scalar)
+        if r.setCandidate { candidate = offset }
+        if r.breakAtCandidate { return candidate }
+        if r.breakHere { return offset }
+        offset &+= len
+      }
+      if recognizer.hasCandidateBreakAtEnd() {
+        return candidate
       }
+      return offset
     }
+    // Note: We only signal that the result is scalar aligned, not
+    // character-aligned. Unicode does attempt to ensure that word breaks are
+    // always character-aligned, but this is not a strict guarantee, especially
+    // not if either segmentation algorithm has a tailored implementation. (As
+    // of 6.3, we do not tailor our implementations, but we used to do so and we
+    // may choose to do it again in the future.)
+    return Index(_encodedOffset: result)._scalarAligned._knownUTF8
+  }
 
-    return nil
+  @available(StdlibDeploymentTarget 6.3, *)
+  @inline(never)
+  internal func _nextForeignWordIndex(after index: Index) -> Index {
+    #if _runtime(_ObjC)
+    _internalInvariant(self.isForeign)
+    let scalars = String.UnicodeScalarView(self)
+    var recognizer = Unicode._WordRecognizer(after: scalars[index])
+    var i = scalars.index(after: index)
+    var candidate = i
+    while i < scalars.endIndex {
+      let r = recognizer.hasBreak(before: scalars[i])
+      if r.setCandidate { candidate = i }
+      if r.breakAtCandidate { return candidate }
+      if r.breakHere { return i }
+      scalars.formIndex(after: &i)
+    }
+    if recognizer.hasCandidateBreakAtEnd() {
+      return candidate
+    }
+    return i
+    #else
+    fatalError("Foreign strings are unsupported on this platform")
+    #endif
   }
 }
-
diff --git a/stdlib/public/core/UnicodeBreakProperty.swift b/stdlib/public/core/UnicodeBreakProperty.swift
index c0f97694d6c51..d4e197e87f316 100644
--- a/stdlib/public/core/UnicodeBreakProperty.swift
+++ b/stdlib/public/core/UnicodeBreakProperty.swift
@@ -13,7 +13,7 @@
 import SwiftShims
 
 extension Unicode {
-  internal enum _GraphemeBreakProperty {
+  internal enum _GraphemeBreakProperty: Sendable {
     case any
     case control
     case extend
@@ -86,7 +86,7 @@ extension Unicode {
 }
 
 extension Unicode {
-  internal enum _WordBreakProperty {
+  internal enum _WordBreakProperty: UInt8, Sendable {
     case aLetter
     case any
     case doubleQuote
@@ -105,8 +105,8 @@ extension Unicode {
     case singleQuote
     case wSegSpace
     case zwj
-    
-    init(from scalar: Unicode.Scalar) {
+
+    internal init(from scalar: Unicode.Scalar) {
       switch scalar.value {
       case 0xA ... 0xD,
            0x85,
@@ -122,7 +122,7 @@ extension Unicode {
         self = .regionalIndicator
       default:
         let rawValue = _swift_stdlib_getWordBreakProperty(scalar.value)
-        
+
         switch rawValue {
         case 0:
           self = .extend
diff --git a/stdlib/public/core/UnicodeSPI.swift b/stdlib/public/core/UnicodeSPI.swift
index 0b4b2f40e29f9..7939f878fd337 100644
--- a/stdlib/public/core/UnicodeSPI.swift
+++ b/stdlib/public/core/UnicodeSPI.swift
@@ -207,35 +207,3 @@ extension Unicode.Scalar.Properties {
     return result
   }
 }
-
-//===----------------------------------------------------------------------===//
-// String Word Breaking
-//===----------------------------------------------------------------------===//
-
-extension String {
-  @_spi(_Unicode)
-  @available(SwiftStdlib 5.7, *)
-  public func _wordIndex(after i: String.Index) -> String.Index {
-    let i = _guts.validateWordIndex(i)
-
-    let next = _guts.nextWordIndex(startingAt: i._encodedOffset)
-    return String.Index(_encodedOffset: next)
-  }
-
-  @_spi(_Unicode)
-  @available(SwiftStdlib 5.7, *)
-  public func _wordIndex(before i: String.Index) -> String.Index {
-    let i = _guts.validateInclusiveWordIndex(i)
-
-    _precondition(i > startIndex, "String index is out of bounds")
-
-    let previous = _guts.previousWordIndex(endingAt: i._encodedOffset)
-    return String.Index(_encodedOffset: previous)
-  }
-
-  @_spi(_Unicode)
-  @available(SwiftStdlib 5.7, *)
-  public func _nearestWordIndex(atOrBelow i: String.Index) -> String.Index {
-    _guts.validateInclusiveWordIndex(i)
-  }
-}
diff --git a/test/abi/macOS/arm64/stdlib.swift b/test/abi/macOS/arm64/stdlib.swift
index a1776ce3fcc34..f88a7efc3fa70 100644
--- a/test/abi/macOS/arm64/stdlib.swift
+++ b/test/abi/macOS/arm64/stdlib.swift
@@ -1121,3 +1121,21 @@ Added: _$ss11InlineArrayVsRi__rlE17_protectedAddressSPyq_GvpMV
 
 // lengthOfBytes(using:)
 Added: __swift_stdlib_NSStringLengthOfBytesInEncodingTrampoline
+
+// Word breaking symbols exposed in 6.3
+Added: _$sSS10_wordIndex19somewhereAtOrBeforeSS0B0VAD_tF
+Added: _$ss7UnicodeO15_WordRecognizerV22hasCandidateBreakAtEndSbyF
+Added: _$ss7UnicodeO15_WordRecognizerV5afterAdB6ScalarV_tcfC
+Added: _$ss7UnicodeO15_WordRecognizerV8hasBreak6beforeSb12setCandidate_Sb07breakAtH0Sb0I4HeretAB6ScalarV_tF
+Added: _$ss7UnicodeO15_WordRecognizerVADycfC
+Added: _$ss7UnicodeO15_WordRecognizerVMa
+Added: _$ss7UnicodeO15_WordRecognizerVMn
+Added: _$ss7UnicodeO15_WordRecognizerVN
+Added: _$ss7UnicodeO27_RandomAccessWordRecognizerV18hasGuaranteedBreak5afterSb12setCandidate_Sb07breakAtJ0Sb0K4HeretAB6ScalarV_tF
+Added: _$ss7UnicodeO27_RandomAccessWordRecognizerV6beforeAdB6ScalarV_tcfC
+Added: _$ss7UnicodeO27_RandomAccessWordRecognizerVMa
+Added: _$ss7UnicodeO27_RandomAccessWordRecognizerVMn
+Added: _$ss7UnicodeO27_RandomAccessWordRecognizerVN
+// Obsolete/broken SPIs removed in 6.3
+Removed: _$sSS17_nearestWordIndex9atOrBelowSS0C0VAD_tF
+Removed: _$sSS10_wordIndex6beforeSS0B0VAD_tF
diff --git a/test/abi/macOS/x86_64/stdlib.swift b/test/abi/macOS/x86_64/stdlib.swift
index c23a5913c6eeb..fc0656ba4f460 100644
--- a/test/abi/macOS/x86_64/stdlib.swift
+++ b/test/abi/macOS/x86_64/stdlib.swift
@@ -1121,3 +1121,21 @@ Added: _$ss11InlineArrayVsRi__rlE17_protectedAddressSPyq_GvpMV
 
 // lengthOfBytes(using:)
 Added: __swift_stdlib_NSStringLengthOfBytesInEncodingTrampoline
+
+// Word breaking symbols exposed in 6.3
+Added: _$sSS10_wordIndex19somewhereAtOrBeforeSS0B0VAD_tF
+Added: _$ss7UnicodeO15_WordRecognizerV22hasCandidateBreakAtEndSbyF
+Added: _$ss7UnicodeO15_WordRecognizerV5afterAdB6ScalarV_tcfC
+Added: _$ss7UnicodeO15_WordRecognizerV8hasBreak6beforeSb12setCandidate_Sb07breakAtH0Sb0I4HeretAB6ScalarV_tF
+Added: _$ss7UnicodeO15_WordRecognizerVADycfC
+Added: _$ss7UnicodeO15_WordRecognizerVMa
+Added: _$ss7UnicodeO15_WordRecognizerVMn
+Added: _$ss7UnicodeO15_WordRecognizerVN
+Added: _$ss7UnicodeO27_RandomAccessWordRecognizerV18hasGuaranteedBreak5afterSb12setCandidate_Sb07breakAtK0Sb0L4HeretAB6ScalarV_tF
+Added: _$ss7UnicodeO27_RandomAccessWordRecognizerV6beforeAdB6ScalarV_tcfC
+Added: _$ss7UnicodeO27_RandomAccessWordRecognizerVMa
+Added: _$ss7UnicodeO27_RandomAccessWordRecognizerVMn
+Added: _$ss7UnicodeO27_RandomAccessWordRecognizerVN
+// Obsolete/broken SPIs removed in 6.3
+Removed: _$sSS17_nearestWordIndex9atOrBelowSS0C0VAD_tF
+Removed: _$sSS10_wordIndex6beforeSS0B0VAD_tF
diff --git a/validation-test/stdlib/StringWordBreaking.swift b/validation-test/stdlib/StringWordBreaking.swift
index 6597c38898763..d3a0ca0a4268a 100644
--- a/validation-test/stdlib/StringWordBreaking.swift
+++ b/validation-test/stdlib/StringWordBreaking.swift
@@ -2,35 +2,119 @@
 // RUN: %target-run-stdlib-swift %S/Inputs/
 
 // REQUIRES: executable_test
-// REQUIRES: objc_interop
 // REQUIRES: optimized_stdlib
+// REQUIRES: objc_interop
 
-@_spi(_Unicode)
-import Swift
+// FIXME: Text segmentation test cases are only available when we have Foundation
 
 import StdlibUnittest
 import StdlibUnicodeUnittest
 import Foundation
 
 let StringWordBreaking = TestSuite("StringWordBreaking")
+defer { runAllTests() }
 
-// FIXME: Reenable once we figure out what to do with WordView
-// @available(SwiftStdlib 5.7, *)
-// extension String._WordView {
-//   var backwardsCount: Int {
-//     var c = 0
-//     var index = endIndex
-//     while index != startIndex {
-//       c += 1
-//       formIndex(before: &index)
-//     }
-//     return c
-//   }
-// }
+extension String {
+  /// Returns all word boundaries within the string, using a single word
+  /// recognizer instance. This is the most efficient way to find word
+  /// boundaries, as it processes each scalar exactly once.
+  @available(StdlibDeploymentTarget 6.3, *)
+  func fastWordBreaks() -> [String.Index] {
+    var result: [String.Index] = []
+    var i = self.startIndex
+    var recognizer = Unicode._WordRecognizer()
+    var candidate = i
+    while i < self.endIndex {
+      let (setCandidate, breakAtCandidate, breakHere) =
+        recognizer.hasBreak(before: self.unicodeScalars[i])
+      if setCandidate {
+        candidate = i
+      }
+      if breakAtCandidate {
+        result.append(candidate)
+      }
+      if breakHere {
+        result.append(i)
+      }
+      self.unicodeScalars.formIndex(after: &i)
+    }
+    if recognizer.hasCandidateBreakAtEnd() {
+      result.append(candidate)
+    }
+    result.append(i)
+    return result
+  }
+
+  /// Return the word boundary position preceding a known boundary within this
+  /// string.
+  ///
+  /// This implements the word boundary specification of [Unicode Annex
+  /// #29](https://unicode.org/reports/tr29/#Default_Word_Boundaries). The
+  /// algorithm is not stable, and it allows implementers to tailor it to their
+  /// needs; accordingly, the result of this operation may vary between Unicode
+  /// implementations and system configurations, including versions of the Swift
+  /// Standard Library.
+  ///
+  /// - Note: If the input index is not on a word boundary, then it is first
+  /// rounded down to the nearest boundary before starting this operation.
+  ///
+  /// - Warning: Using this method to iterate over the word breaks in a string
+  ///    backward has worst-case complexity that is proportional to the _square_
+  ///    of the length of the string. It is usually a better idea to keep a
+  ///    cache of known word boundaries, calculated by iterating _forwards_ from
+  ///    the start index, or a position returned by
+  ///    `_wordIndex(somewhereAtOrBefore:)`.
+  ///
+  /// - Parameter i: A valid index addressing a word boundary within this
+  ///    string.
+  /// - Returns: The first word break strictly following `i` in the string.
+  @available(StdlibDeploymentTarget 6.3, *)
+  public func _wordIndex(before i: String.Index) -> String.Index {
+    let i = self.unicodeScalars._index(roundingDown: i)
+    var j = _wordIndex(somewhereAtOrBefore: unicodeScalars.index(before: i))
+
+    // We know there is a stable break at `j`, however, the backward search may
+    // have skipped over some conditional breaks that it could not fully
+    // evaluate. Find the closest actual break that precedes `i` by iterating
+    // forward until we reach or jump over it.
+    precondition(j < i)
+    var recognizer = Unicode._WordRecognizer()
+    var bestBreak = j
+    var candidate = j
+    while j < self.endIndex {
+      let r = recognizer.hasBreak(before: self.unicodeScalars[j])
+      if r.setCandidate { candidate = j }
+      if r.breakAtCandidate {
+        guard candidate < i else { break }
+        bestBreak = candidate
+      }
+      if r.breakHere {
+        guard j < i else { break }
+        bestBreak = j
+      }
+      self.unicodeScalars.formIndex(after: &j)
+    }
+    if j == self.endIndex, candidate < i, recognizer.hasCandidateBreakAtEnd() {
+      bestBreak = candidate
+    }
+    precondition(bestBreak < i)
+    return bestBreak
+  }
+}
 
 extension String {
+  @available(SwiftStdlib 6.3, *)
+  var statefulWords: [String] {
+    let breaks = fastWordBreaks()
+    var prev = breaks[0]
+    return breaks.dropFirst().map { next in
+      defer { prev = next }
+      return String(self[prev ..< next])
+    }
+  }
+
   @available(SwiftStdlib 5.9, *)
-  var _words: [String] {
+  var statelessWords: [String] {
     var result: [String] = []
 
     var i = startIndex
@@ -48,8 +132,8 @@ extension String {
     return result
   }
 
-  @available(SwiftStdlib 5.9, *)
-  var _wordsBackwards: [String] {
+  @available(SwiftStdlib 6.3, *)
+  var backwardWords: [String] {
     var result: [String] = []
 
     var i = endIndex
@@ -68,54 +152,22 @@ extension String {
   }
 }
 
-if #available(SwiftStdlib 6.1, *) {
-  StringWordBreaking.test("word breaking") {
-    for wordBreakTest in wordBreakTests {
-      expectEqual(
-        wordBreakTest.1,
-        wordBreakTest.0._words,
-        "string: \(String(reflecting: wordBreakTest.0))")
-      expectEqual(
-        wordBreakTest.1.reversed(),
-        wordBreakTest.0._wordsBackwards,
-        "string: \(String(reflecting: wordBreakTest.0))")
-    }
+extension Unicode.Scalar {
+  var unicodeNotation: String {
+      let v = String(self.value, radix: 16, uppercase: true)
+      return "U+\(String(repeating: "0", count: max(0, 4 - v.count)))\(v)"
   }
 }
 
-// rdar://116652595
-//
-// We were accidentally hanging when rounding word indices for some concoctions of
-// strings. In particular, where we had a pair of scalars create a constraint
-// for the preceding pair, but the preceding extend rules were not taking the
-// constraint into consideration.
-if #available(SwiftStdlib 5.10, *) {
-  StringWordBreaking.test("word breaking backward extend constraints") {
-    let strs = ["日\u{FE0F}:X ", "👨‍👨‍👧‍👦\u{FE0F}:X ", "⛔️:X ", "⛔️·X ", "⛔️：X "]
-    let strWords = [
-      ["日\u{FE0F}", ":", "X", " "],
-      ["👨‍👨‍👧‍👦\u{FE0F}", ":", "X", " "],
-      ["⛔️", ":", "X", " "],
-      ["⛔️", "·", "X", " "],
-      ["⛔️", "：", "X", " "]
-    ]
-
-    for (str, words) in zip(strs, strWords) {
-      expectEqual(
-        words,
-        str._words,
-        "string: \(String(reflecting: str))"
-      )
-
-      expectEqual(
-        words.reversed(),
-        str._wordsBackwards,
-        "string: \(String(reflecting: str))"
-      )
-    }
+extension String {
+  var scalarDescriptions: String {
+    return self.unicodeScalars
+      .lazy.map { $0.unicodeNotation }
+      .joined(separator: " ")
   }
 }
 
+#if _runtime(_ObjC)
 // The most simple subclass of NSString that CoreFoundation does not know
 // about.
 class NonContiguousNSString : NSString {
@@ -123,16 +175,17 @@ class NonContiguousNSString : NSString {
     fatalError("don't call this initializer")
   }
   required init(itemProviderData data: Data, typeIdentifier: String) throws {
-    fatalError("don't call this initializer")    
+    fatalError("don't call this initializer")
   }
 
-  override init() { 
+  override init() {
     _value = []
-    super.init() 
+    super.init()
   }
 
-  init(_ value: [UInt16]) {
-    _value = value
+  @inline(never)
+  init(_ value: some Sequence<UInt16>) {
+    _value = Array(value)
     super.init()
   }
 
@@ -157,36 +210,77 @@ extension _StringGuts {
   @_silgen_name("$ss11_StringGutsV9isForeignSbvg")
   func _isForeign() -> Bool
 }
-
-func getUTF16Array(from string: String) -> [UInt16] {
-  var result: [UInt16] = []
-
-  for cp in string.utf16 {
-    result.append(cp)
+#endif
+
+func testCases() -> [(String, [String])] {
+  var tests = StdlibUnicodeUnittest.wordBreakTests
+  if #available(SwiftStdlib 5.10, *) {
+    // rdar://116652595
+    //
+    // We were accidentally hanging when rounding word indices for some
+    // concoctions of strings. In particular, where we had a pair of scalars
+    // create a constraint for the preceding pair, but the preceding extend
+    // rules were not taking the constraint into consideration.
+    tests += [
+      ("日\u{FE0F}:X ", ["日\u{FE0F}", ":", "X", " "]),
+      ("👨‍👨‍👧‍👦\u{FE0F}:X ", ["👨‍👨‍👧‍👦\u{FE0F}", ":", "X", " "]),
+      ("⛔️:X ", ["⛔️", ":", "X", " "]),
+      ("⛔️·X ", ["⛔️", "·", "X", " "]),
+      ("⛔️：X ", ["⛔️", "：", "X", " "]),
+    ]
   }
+  if #available(SwiftStdlib 6.3, *) {
+    tests += [
+      // https://github.com/swiftlang/swift-experimental-string-processing/issues/818
+      // rdar://154902007
+      ("\u{2060}\u{2018}\u{2060}\u{2060}example.com\u{2060}\u{2060}\u{2019}",
+       ["\u{2060}", "\u{2018}\u{2060}\u{2060}", "example.com\u{2060}\u{2060}", "\u{2019}"]),
+    ]
+  }
+  return tests
+}
 
-  return result
+if #available(SwiftStdlib 6.1, *) {
+  StringWordBreaking.test("word breaking") {
+    for (input, expectedWords) in testCases() {
+      expectEqual(
+        input.statelessWords,
+        expectedWords,
+        "input: \(input.debugDescription) \(input.scalarDescriptions)")
+      if #available(SwiftStdlib 6.3, *) {
+        expectEqual(
+          input.statefulWords,
+          expectedWords,
+          "input: \(input.debugDescription) \(input.scalarDescriptions)")
+        expectEqual(
+          input.backwardWords,
+          expectedWords.reversed(),
+          "input: \(input.debugDescription) \(input.scalarDescriptions)")
+      }
+    }
+  }
 }
 
 if #available(SwiftStdlib 6.1, *) {
   StringWordBreaking.test("word breaking foreign") {
-    for wordBreakTest in wordBreakTests {
-      let foreignTest = NonContiguousNSString(
-        getUTF16Array(from: wordBreakTest.0)
-      )
-      let test = foreignTest as String
+    for (nativeString, expectedWords) in testCases() {
+      let input = NonContiguousNSString(nativeString.utf16) as String
 
-      expectTrue(test._guts._isForeign())
+      expectTrue(input._guts._isForeign())
       expectEqual(
-        wordBreakTest.1,
-        test._words,
-        "string: \(String(reflecting: wordBreakTest.0))")
-      expectEqual(
-        wordBreakTest.1.reversed(),
-        test._wordsBackwards,
-        "string: \(String(reflecting: wordBreakTest.0))")
+        input.statelessWords,
+        expectedWords,
+        "input: \(nativeString.debugDescription) \(nativeString.scalarDescriptions)")
+      if #available(SwiftStdlib 6.3, *) {
+        expectEqual(
+          input.statefulWords,
+          expectedWords,
+          "input: \(nativeString.debugDescription) \(nativeString.scalarDescriptions)")
+        expectEqual(
+          input.backwardWords,
+          expectedWords.reversed(),
+          "input: \(nativeString.debugDescription) \(nativeString.scalarDescriptions)")
+      }
     }
   }
 }
-
-runAllTests()
diff --git a/validation-test/stdlib/UnicodeWordRecognizer.swift b/validation-test/stdlib/UnicodeWordRecognizer.swift
new file mode 100644
index 0000000000000..352199139786e
--- /dev/null
+++ b/validation-test/stdlib/UnicodeWordRecognizer.swift
@@ -0,0 +1,311 @@
+// RUN: %empty-directory(%t)
+// RUN: %target-run-stdlib-swift
+
+// REQUIRES: executable_test
+// REQUIRES: objc_interop
+// REQUIRES: optimized_stdlib
+
+// Validate that the various forms of word breaking all lead to consistent
+// results by exhaustively enumerating all possible state machine inputs up to
+// an adequately high length.
+//
+// The word breaking algorithm only cares about word break properties, not
+// specific scalar values. This lets us only use a single representative sample
+// in each class, drastically cutting down the input space to iterate through.
+// This makes it practical to do this up to limits that give us practically
+// useful results.
+
+import StdlibUnittest
+
+let suite = TestSuite("UnicodeWordRecognizer")
+defer { runAllTests() }
+
+// One representative sample from each character class that's relevant to word breaking
+let samples: [Unicode.Scalar] = [
+  "\u{000D}", // CR
+  "\u{000A}", // LF
+  "\u{2028}", // Newline (LINE SEPARATOR)
+  "\u{0041}", // ALetter (LATIN CAPITAL LETTER A)
+  "\u{0022}", // Double_Quote (QUOTATION MARK)
+  "\u{0027}", // Single_Quote (APOSTROPHE)
+  "\u{200D}", // ZWJ (ZERO WIDTH JOINER)
+  "\u{1F1E6}", // RI (REGIONAL INDICATOR SYMBOL LETTER A)
+  "\u{05D0}", // Hebrew_Letter (HEBREW LETTER ALEF)
+  "\u{0300}", // Extend (COMBINING GRAVE ACCENT)
+  "\u{00AD}", // Format (SOFT HYPHEN)
+  "\u{3031}", // Katakana (VERTICAL KANA REPEAT MARK)
+  "\u{003A}", // MidLetter (COLON)
+  "\u{002C}", // MidNum (COMMA)
+  "\u{002E}", // MidNumLet (FULL STOP)
+  "\u{0030}", // Numeric (DIGIT ZERO)
+  "\u{005F}", // ExtendNumLet (LOW LINE)
+  "\u{0020}", // WSegSpace (SPACE)
+  "\u{00A9}", // \p{Extended_Pictographic} (COPYRIGHT)
+  "\u{0021}", // Any (EXCLAMATION MARK)
+]
+
+/// Call `body` with every array of the specified count consisting of
+/// integer elements in the given range. This is calculating the
+/// Cartesian `n`-ary power of the `range` argument.
+///
+///     withEveryArray(of: 1 ..< 3, count: 3) { print($0) }
+///     // [1, 1, 1]
+///     // [2, 1, 1]
+///     // [1, 2, 1]
+///     // [2, 2, 1]
+///     // [1, 1, 2]
+///     // [2, 1, 2]
+///     // [1, 2, 2]
+///     // [2, 2, 2]
+func withEveryArray<E: Error>(
+  of range: Range<Int>,
+  count n: Int,
+  _ body: ([Int]) throws(E) -> Void
+) throws(E) {
+  var vector: [Int] = .init(repeating: range.lowerBound, count: n)
+  guard n > 0 else {
+    try body(vector)
+    return
+  }
+  while true {
+    try body(vector)
+    var i = 0
+    while true {
+      vector[i] += 1
+      if vector[i] < range.upperBound {
+        break
+      }
+      vector[i] = range.lowerBound
+      i += 1
+      if i == n {
+        return // done
+      }
+    }
+  }
+}
+
+func string(for vector: [Int]) -> String {
+  var s = ""
+  for digit in vector {
+    s.unicodeScalars.append(samples[digit])
+  }
+  return s
+}
+
+extension Unicode.Scalar {
+  var unicodeNotation: String {
+      let v = String(self.value, radix: 16, uppercase: true)
+      return "U+\(String(repeating: "0", count: max(0, 4 - v.count)))\(v)"
+  }
+}
+
+extension String {
+  var scalarDescriptions: String {
+    return self.unicodeScalars
+      .lazy.map { $0.unicodeNotation }
+      .joined(separator: " ")
+  }
+}
+
+extension Collection {
+  /// Return a sorted array of all valid indices in the collection, including
+  /// the end index.
+  func allIndices() -> [Index] {
+    var result: [Index] = []
+    result.reserveCapacity(count + 1)
+    result.append(contentsOf: indices)
+    result.append(endIndex)
+    return result
+  }
+}
+
+extension Sequence where Element: Equatable {
+  /// Returns true if the elements of `self` form a sub-sequence of `other`,
+  /// where both inputs are monotonic. `self` is not allowed to contain
+  /// more than a single copy of any item in `other`.
+  func isMonotonicSubsequence(of other: some Sequence<Element>) -> Bool {
+    var i = makeIterator()
+    var j = other.makeIterator()
+    var b = j.next()
+    while let a = i.next() {
+      while true {
+        if b == nil { return false }
+        if a == b {
+          b = j.next()
+          break
+        }
+        b = j.next()
+      }
+    }
+    return true
+  }
+
+  /// Returns true if the elements of `self` form a sub-sequence of `other`,
+  /// where both inputs are monotonic. `self` is allowed to contain duplicate
+  /// elements.
+  func isMonotonicRepeatingSubsequence(of other: some Sequence<Element>) -> Bool {
+    var i = makeIterator()
+    var j = other.makeIterator()
+    var b = j.next()
+    while let a = i.next() {
+      while true {
+        if b == nil { return false }
+        if a == b { break }
+        b = j.next()
+      }
+    }
+    return true
+  }
+
+}
+
+extension String {
+  /// Returns all word boundaries within the string, using a single word
+  /// recognizer instance. This is the most efficient way to find word
+  /// boundaries, as it processes each scalar exactly once.
+  @available(StdlibDeploymentTarget 6.3, *)
+  func fastWordBreaks() -> [String.Index] {
+    var result: [String.Index] = []
+    var i = self.startIndex
+    var recognizer = Unicode._WordRecognizer()
+    var candidate = i
+    while i < self.endIndex {
+      let (setCandidate, breakAtCandidate, breakHere) =
+        recognizer.hasBreak(before: self.unicodeScalars[i])
+      if setCandidate {
+        candidate = i
+      }
+      if breakAtCandidate {
+        result.append(candidate)
+      }
+      if breakHere {
+        result.append(i)
+      }
+      self.unicodeScalars.formIndex(after: &i)
+    }
+    if recognizer.hasCandidateBreakAtEnd() {
+      result.append(candidate)
+    }
+    result.append(i)
+    return result
+  }
+
+  /// Returns all word breaks without keeping persistent state, using
+  /// `_wordIndex(after:)`. This forgets lookahead information after each word
+  /// boundary, so it needs to process some scalars twice, resulting in a
+  /// performance regression vs `fastWordBreaks()`. However, both variants are
+  /// supposed to have the same results.
+  @available(StdlibDeploymentTarget 5.7, *)
+  func slowWordBreaks() -> [String.Index] {
+    var result: [String.Index] = []
+    var i = self.startIndex
+    while i < self.endIndex {
+      result.append(i)
+      i = self._wordIndex(after: i)
+    }
+    result.append(i)
+    return result
+  }
+
+  /// Return all "safe" word breaks in this string by using the backwards word
+  /// recognizer state machine, starting from the end, feeding it every Unicode
+  /// scalar in the string, and collecting all word boundaries detected.
+  ///
+  /// This is expected to sometimes skip over word boundaries that are detected
+  /// when going forward. However, it must never report a word boundary at a
+  /// position that isn't also detected by the forward recognizer.
+  @available(StdlibDeploymentTarget 6.3, *)
+  func safeWordBreaks() -> [String.Index] {
+    var result: [String.Index] = []
+    guard !self.isEmpty else { return result }
+    result.append(self.endIndex) // There is always an implicit wordbreak at the end.
+    var i = self.unicodeScalars.index(before: self.endIndex)
+    var recognizer = Unicode._RandomAccessWordRecognizer(before: self.unicodeScalars[i])
+    var candidate = i
+    while i > self.startIndex {
+      let j = self.unicodeScalars.index(before: i)
+      let r = recognizer.hasGuaranteedBreak(after: self.unicodeScalars[j])
+      if r.setCandidate {
+        candidate = i
+      }
+      if r.breakAtCandidate {
+        result.append(candidate)
+      }
+      if r.breakHere {
+        result.append(i)
+      }
+      i = j
+    }
+    result.reverse()
+    return result
+  }
+
+  /// Return an array of "safe" word boundaries detected by the backwards word
+  /// recognizer state machine, invoked through
+  /// `_wordIndex(somewhereAtOrBefore:)`, one result per each scalar position in
+  /// the string (including its end index).
+  ///
+  /// This is expected to be some monotonically increasing subsequence of word
+  /// boundaries detected in the forward direction, allowing some repeated
+  /// items.
+  @available(StdlibDeploymentTarget 6.3, *)
+  func randomAccessWordBreaks() -> [String.Index] {
+    unicodeScalars.allIndices().map { self._wordIndex(somewhereAtOrBefore: $0) }
+  }
+}
+
+@available(StdlibDeploymentTarget 6.3, *)
+func check(length: Int) {
+  withEveryArray(of: 0 ..< samples.count, count: length) { vector in
+    let str = string(for: vector)
+
+    let fastBreaks = str.fastWordBreaks()
+    let slowBreaks = str.slowWordBreaks()
+    expectEqual(
+      fastBreaks, slowBreaks,
+      """
+      Inconsistent word boundaries in stateful vs stateless iteration:
+        input: \(str.debugDescription) (\(str.scalarDescriptions))
+      """)
+
+    let safeBreaks = str.safeWordBreaks()
+    expectTrue(
+      safeBreaks.isMonotonicSubsequence(of: fastBreaks),
+      """
+      Inconsistent safe word boundaries:
+        input: \(str.debugDescription) (\(str.scalarDescriptions))")
+      """)
+
+    let randomAccessBreaks = str.randomAccessWordBreaks()
+    expectTrue(
+      randomAccessBreaks.isMonotonicRepeatingSubsequence(of: fastBreaks),
+      """
+      Inconsistent random-access word boundaries:
+        input: \(str.debugDescription) (\(str.scalarDescriptions))
+        breaks:               \(fastBreaks)
+        random-access breaks: \(randomAccessBreaks)
+      """)
+  }
+}
+
+if #available(StdlibDeploymentTarget 6.3, *) {
+  suite.test("Exhaustive consistency checks, length 1") {
+    check(length: 1)
+  }
+
+  suite.test("Exhaustive consistency checks, length 2") {
+    check(length: 2)
+  }
+
+  suite.test("Exhaustive consistency checks, length 3") {
+    check(length: 3)
+  }
+
+  suite.test("Exhaustive consistency checks, length 4") {
+    check(length: 4)
+  }
+
+  suite.test("Exhaustive consistency checks, length 5") {
+    check(length: 5)
+  }
+}