diff --git a/Sources/FoundationEssentials/String/String+IO.swift b/Sources/FoundationEssentials/String/String+IO.swift index 4adc11ba4..3513cac7e 100644 --- a/Sources/FoundationEssentials/String/String+IO.swift +++ b/Sources/FoundationEssentials/String/String+IO.swift @@ -18,33 +18,6 @@ internal import _FoundationCShims fileprivate let stringEncodingAttributeName = "com.apple.TextEncoding" -private struct ExtendingToUTF16Sequence> : Sequence { - typealias Element = UInt16 - - struct Iterator : IteratorProtocol { - private var base: Base.Iterator - - init(_ base: Base.Iterator) { - self.base = base - } - - mutating func next() -> Element? { - guard let value = base.next() else { return nil } - return UInt16(value) - } - } - - private let base: Base - - init(_ base: Base) { - self.base = base - } - - func makeIterator() -> Iterator { - Iterator(base.makeIterator()) - } -} - @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *) extension String { @@ -181,12 +154,9 @@ extension String { } #if !FOUNDATION_FRAMEWORK case .isoLatin1: - guard bytes.allSatisfy(\.isValidISOLatin1) else { - return nil - } - // isoLatin1 is an 8-bit encoding that represents a subset of UTF-16 - // Map to 16-bit values and decode as UTF-16 - self.init(_validating: ExtendingToUTF16Sequence(bytes), as: UTF16.self) + // ISO Latin 1 bytes are always valid since it's an 8-bit encoding that maps scalars 0x0 through 0xFF + // Simply extend each byte to 16 bits and decode as UTF-16 + self.init(decoding: bytes.lazy.map { UInt16($0) }, as: UTF16.self) case .macOSRoman: func buildString(_ bytes: UnsafeBufferPointer) -> String { String(unsafeUninitializedCapacity: bytes.count * 3) { buffer in diff --git a/Sources/FoundationEssentials/String/StringProtocol+Essentials.swift b/Sources/FoundationEssentials/String/StringProtocol+Essentials.swift index 003fc486a..4da60c1a6 100644 --- a/Sources/FoundationEssentials/String/StringProtocol+Essentials.swift +++ b/Sources/FoundationEssentials/String/StringProtocol+Essentials.swift @@ -21,12 +21,6 @@ import Darwin internal import _FoundationCShims -extension BinaryInteger { - var isValidISOLatin1: Bool { - (0x20 <= self && self <= 0x7E) || (0xA0 <= self && self <= 0xFF) - } -} - extension UInt8 { private typealias UTF8Representation = (UInt8, UInt8, UInt8) private static func withMacRomanMap(_ body: (UnsafeBufferPointer) -> R) -> R { @@ -228,12 +222,14 @@ extension String { return data + swapped #if !FOUNDATION_FRAMEWORK case .isoLatin1: - return try? Data(capacity: self.utf16.count) { buffer in - for scalar in self.utf16 { - guard scalar.isValidISOLatin1 else { + // ISO Latin 1 encodes code points 0x0 through 0xFF (a maximum of 2 UTF-8 scalars per ISO Latin 1 Scalar) + // The UTF-8 count is a cheap, reasonable starting capacity as it is precise for the all-ASCII case and it will only over estimate by 1 byte per non-ASCII character + return try? Data(capacity: self.utf8.count) { buffer in + for scalar in self.unicodeScalars { + guard let valid = UInt8(exactly: scalar.value) else { throw CocoaError(.fileWriteInapplicableStringEncoding) } - buffer.appendElement(UInt8(scalar & 0xFF)) + buffer.appendElement(valid) } } case .macOSRoman: diff --git a/Tests/FoundationEssentialsTests/StringTests.swift b/Tests/FoundationEssentialsTests/StringTests.swift index 1daa98b92..81cfa1954 100644 --- a/Tests/FoundationEssentialsTests/StringTests.swift +++ b/Tests/FoundationEssentialsTests/StringTests.swift @@ -1336,7 +1336,9 @@ final class StringTests : XCTestCase { "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "0123456789", "!\"#$%&'()*+,-./", - "¡¶ÅÖæöÿ\u{00A0}~" + "¡¶ÅÖæöÿ\u{0080}\u{00A0}~", + "Hello\nworld", + "Hello\r\nworld" ], invalid: [ "🎺", "מ",