@@ -312,13 +312,14 @@ public class WhitespaceLinter {
312312 formattedRun: ArraySlice < UTF8 . CodeUnit >
313313 ) {
314314 guard userRun != formattedRun else { return }
315-
315+ let userString = String ( decoding: userRun, as: UTF8 . self)
316+ let formattedString = String ( decoding: formattedRun, as: UTF8 . self)
316317 // This assumes tabs will always be forbidden for inter-token spacing (but not for leading
317318 // indentation).
318319 if userRun. contains ( utf8Tab) {
319320 diagnose ( . spacingCharError, category: . spacingCharacter, utf8Offset: userIndex)
320- } else if formattedRun . count != userRun . count {
321- let delta = formattedRun . count - userRun . count
321+ } else if formattedString . count != userString . count {
322+ let delta = formattedString . count - userString . count
322323 diagnose ( . spacingError( delta) , category: . spacing, utf8Offset: userIndex)
323324 }
324325 }
@@ -339,20 +340,26 @@ public class WhitespaceLinter {
339340 startingAt offset: Int ,
340341 in data: [ UTF8 . CodeUnit ]
341342 ) -> ArraySlice < UTF8 . CodeUnit > {
342- func isWhitespace( _ char: UTF8 . CodeUnit ) -> Bool {
343- switch char {
344- case UInt8 ( ascii: " " ) , UInt8 ( ascii: " \n " ) , UInt8 ( ascii: " \t " ) , UInt8 ( ascii: " \r " ) , /*VT*/ 0x0B , /*FF*/ 0x0C :
345- return true
343+ var currentIndex = offset
344+ while currentIndex < data. count {
345+ if let unicodeException = UnicodeWhitespace . allCases. first ( where: { exception in
346+ let bytes = exception. utf8Bytes
347+ return currentIndex + bytes. count <= data. count
348+ && data [ currentIndex..< currentIndex + bytes. count] . elementsEqual ( bytes)
349+ } ) {
350+ currentIndex += unicodeException. utf8Bytes. count
351+ continue
352+ }
353+
354+ switch data [ currentIndex] {
355+ case UInt8 ( ascii: " " ) , UInt8 ( ascii: " \n " ) , UInt8 ( ascii: " \t " ) , UInt8 ( ascii: " \r " ) ,
356+ /*VT*/ 0x0B , /*FF*/ 0x0C :
357+ currentIndex += 1
346358 default :
347- return false
359+ return data [ offset ..< currentIndex ]
348360 }
349361 }
350- guard
351- let whitespaceEnd = data [ offset... ] . firstIndex ( where: { !isWhitespace( $0) } )
352- else {
353- return data [ offset..< data. endIndex]
354- }
355- return data [ offset..< whitespaceEnd]
362+ return data [ offset..< currentIndex]
356363 }
357364
358365 /// Returns the code unit at the given index, or nil if the index is the end of the data.
@@ -412,6 +419,22 @@ public class WhitespaceLinter {
412419 }
413420}
414421
422+ /// A collection of Unicode code points that represent non-standard whitespace.
423+ private enum UnicodeWhitespace : CaseIterable {
424+ case u2028 // U+2028 LINE SEPARATOR
425+ case u2029 // U+2029 PARAGRAPH SEPARATOR
426+
427+ /// Returns the UTF-8 byte sequence corresponding to the Unicode exception.
428+ var utf8Bytes : [ UTF8 . CodeUnit ] {
429+ switch self {
430+ case . u2028:
431+ return [ 0xE2 , 0x80 , 0xA8 ]
432+ case . u2029:
433+ return [ 0xE2 , 0x80 , 0xA9 ]
434+ }
435+ }
436+ }
437+
415438/// Describes the composition of the whitespace that creates an indentation for a line of code.
416439public enum WhitespaceIndentation : Equatable {
417440 /// The line has no preceding whitespace, meaning there's no indentation.
0 commit comments