astral-sh · ntBre · Sep 18, 2025 · Aug 23, 2025 · Sep 18, 2025
diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py b/crates/ruff_linter/resources/test/fixtures/flake8_simplify/SIM905.py
@@ -170,3 +170,4 @@
 # leading/trailing whitespace should not count towards maxsplit
 " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
+"a  b".split(maxsplit=1)  # ["a", "b"]
diff --git a/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs b/crates/ruff_linter/src/rules/flake8_simplify/rules/split_static_string.rs
@@ -58,8 +58,8 @@ enum Method {
 }
 
 impl Method {
-    fn is_split(self) -> bool {
-        matches!(self, Method::Split)
+    fn is_rsplit(self) -> bool {
+        matches!(self, Method::RSplit)
     }
 }
 
@@ -201,65 +201,19 @@ fn split_default(
     method: Method,
     settings: &LinterSettings,
 ) -> Option<Expr> {
-    // From the Python documentation:
-    // > If sep is not specified or is None, a different splitting algorithm is applied: runs of
-    // > consecutive whitespace are regarded as a single separator, and the result will contain
-    // > no empty strings at the start or end if the string has leading or trailing whitespace.
-    // > Consequently, splitting an empty string or a string consisting of just whitespace with
-    // > a None separator returns [].
-    // https://docs.python.org/3/library/stdtypes.html#str.split
     let string_val = str_value.to_str();
     match max_split.cmp(&0) {
-        Ordering::Greater => {
-            if !is_maxsplit_without_separator_fix_enabled(settings) {
-                return None;
-            }
+        Ordering::Greater if !is_maxsplit_without_separator_fix_enabled(settings) => None,
+        Ordering::Greater | Ordering::Equal => {
             let Ok(max_split) = usize::try_from(max_split) else {
                 return None;
             };
-            let list_items: Vec<&str> = if method.is_split() {
-                string_val
-                    .trim_start_matches(py_unicode_is_whitespace)
-                    .splitn(max_split + 1, py_unicode_is_whitespace)
-                    .filter(|s| !s.is_empty())
-                    .collect()
-            } else {
-                let mut items: Vec<&str> = string_val
-                    .trim_end_matches(py_unicode_is_whitespace)
-                    .rsplitn(max_split + 1, py_unicode_is_whitespace)
-                    .filter(|s| !s.is_empty())
-                    .collect();
-                items.reverse();
-                items
-            };
+            let list_items = split_whitespace_with_maxsplit(string_val, max_split, method);
             Some(construct_replacement(
                 &list_items,
                 str_value.first_literal_flags(),
             ))
         }
-        Ordering::Equal => {
-            // Behavior for maxsplit = 0 when sep is None:
-            // - If the string is empty or all whitespace, result is [].
-            // - Otherwise:
-            //   - " x ".split(maxsplit=0)  -> ['x ']
-            //   - " x ".rsplit(maxsplit=0) -> [' x']
-            //   - "".split(maxsplit=0) -> []
-            //   - " ".split(maxsplit=0) -> []
-            let processed_str = if method.is_split() {
-                string_val.trim_start_matches(py_unicode_is_whitespace)
-            } else {
-                string_val.trim_end_matches(py_unicode_is_whitespace)
-            };
-            let list_items: &[_] = if processed_str.is_empty() {
-                &[]
-            } else {
-                &[processed_str]
-            };
-            Some(construct_replacement(
-                list_items,
-                str_value.first_literal_flags(),
-            ))
-        }
         Ordering::Less => {
             let list_items: Vec<&str> = string_val
                 .split(py_unicode_is_whitespace)
@@ -367,3 +321,107 @@ const fn py_unicode_is_whitespace(ch: char) -> bool {
         | '\u{3000}'
     )
 }
+
+struct WhitespaceMaxSplitIterator<'a> {
+    remaining: &'a str,
+    max_split: usize,
+    splits: usize,
+    method: Method,
+}
+
+impl<'a> WhitespaceMaxSplitIterator<'a> {
+    fn new(s: &'a str, max_split: usize, method: Method) -> Self {
+        let remaining = match method {
+            Method::Split => s.trim_start_matches(py_unicode_is_whitespace),
+            Method::RSplit => s.trim_end_matches(py_unicode_is_whitespace),
+        };
+
+        Self {
+            remaining,
+            max_split,
+            splits: 0,
+            method,
+        }
+    }
+}
+
+impl<'a> Iterator for WhitespaceMaxSplitIterator<'a> {
+    type Item = &'a str;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.remaining.is_empty() {
+            return None;
+        }
+
+        if self.splits >= self.max_split {
+            let result = self.remaining;
+            self.remaining = "";
+            return Some(result);
+        }
+
+        self.splits += 1;
+        match self.method {
+            Method::Split => match self.remaining.split_once(py_unicode_is_whitespace) {
+                Some((s, remaining)) => {
+                    self.remaining = remaining.trim_start_matches(py_unicode_is_whitespace);
+                    Some(s)
+                }
+                None => Some(std::mem::take(&mut self.remaining)),
+            },
+            Method::RSplit => match self.remaining.rsplit_once(py_unicode_is_whitespace) {
+                Some((remaining, s)) => {
+                    self.remaining = remaining.trim_end_matches(py_unicode_is_whitespace);
+                    Some(s)
+                }
+                None => Some(std::mem::take(&mut self.remaining)),
+            },
+        }
+    }
+}
+
+// From the Python documentation:
+// > If sep is not specified or is None, a different splitting algorithm is applied: runs of
+// > consecutive whitespace are regarded as a single separator, and the result will contain
+// > no empty strings at the start or end if the string has leading or trailing whitespace.
+// > Consequently, splitting an empty string or a string consisting of just whitespace with
+// > a None separator returns [].
+// https://docs.python.org/3/library/stdtypes.html#str.split
+fn split_whitespace_with_maxsplit(s: &str, max_split: usize, method: Method) -> Vec<&str> {
+    let mut result: Vec<_> = WhitespaceMaxSplitIterator::new(s, max_split, method).collect();
+    if method.is_rsplit() {
+        result.reverse();
+    }
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{Method, split_whitespace_with_maxsplit};
+    use test_case::test_case;
+
+    #[test_case("  ", 1, &[])]
+    #[test_case("a  b", 1, &["a", "b"])]
+    #[test_case("a  b", 2, &["a", "b"])]
+    #[test_case(" a b c d ", 2, &["a", "b", "c d "])]
+    #[test_case("  a  b  c  ", 1, &["a", "b  c  "])]
+    #[test_case(" x ", 0, &["x "])]
+    #[test_case(" ", 0, &[])]
+    #[test_case("a\u{3000}b", 1, &["a", "b"])]
+    fn test_split_whitespace_with_maxsplit(s: &str, max_split: usize, expected: &[&str]) {
+        let parts = split_whitespace_with_maxsplit(s, max_split, Method::Split);
+        assert_eq!(parts, expected);
+    }
+
+    #[test_case("  ", 1, &[])]
+    #[test_case("a  b", 1, &["a", "b"])]
+    #[test_case("a  b", 2, &["a", "b"])]
+    #[test_case(" a b c d ", 2, &[" a b", "c", "d"])]
+    #[test_case("  a  b  c  ", 1, &["  a  b", "c"])]
+    #[test_case(" x ", 0, &[" x"])]
+    #[test_case(" ", 0, &[])]
+    #[test_case("a\u{3000}b", 1, &["a", "b"])]
+    fn test_rsplit_whitespace_with_maxsplit(s: &str, max_split: usize, expected: &[&str]) {
+        let parts = split_whitespace_with_maxsplit(s, max_split, Method::RSplit);
+        assert_eq!(parts, expected);
+    }
+}
diff --git a/...ake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM905_SIM905.py.snap b/...ake8_simplify/snapshots/ruff_linter__rules__flake8_simplify__tests__SIM905_SIM905.py.snap
@@ -1372,6 +1372,7 @@ SIM905 Consider using a list literal instead of `str.split`
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
     | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
+173 | "a  b".split(maxsplit=1)  # ["a", "b"]
     |
 help: Replace with list literal
 
@@ -1382,5 +1383,16 @@ SIM905 Consider using a list literal instead of `str.rsplit`
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
     | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+173 | "a  b".split(maxsplit=1)  # ["a", "b"]
+    |
+help: Replace with list literal
+
+SIM905 Consider using a list literal instead of `str.split`
+   --> SIM905.py:173:1
+    |
+171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
+172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
+173 | "a  b".split(maxsplit=1)  # ["a", "b"]
+    | ^^^^^^^^^^^^^^^^^^^^^^^^
     |
 help: Replace with list literal
diff --git a/...lify/snapshots/ruff_linter__rules__flake8_simplify__tests__preview__SIM905_SIM905.py.snap b/...lify/snapshots/ruff_linter__rules__flake8_simplify__tests__preview__SIM905_SIM905.py.snap
@@ -1420,6 +1420,7 @@ SIM905 [*] Consider using a list literal instead of `str.split`
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
     | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
+173 | "a  b".split(maxsplit=1)  # ["a", "b"]
     |
 help: Replace with list literal
 168 | print("<\x1c\x1d\x1e\x1f".rsplit(maxsplit=0))
@@ -1428,6 +1429,7 @@ help: Replace with list literal
     - " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 171 + ["a", "b", "c d "]  # ["a", "b", "c d "]
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
+173 | "a  b".split(maxsplit=1)  # ["a", "b"]
 
 SIM905 [*] Consider using a list literal instead of `str.rsplit`
    --> SIM905.py:172:1
@@ -1436,10 +1438,27 @@ SIM905 [*] Consider using a list literal instead of `str.rsplit`
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
 172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
     | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+173 | "a  b".split(maxsplit=1)  # ["a", "b"]
     |
 help: Replace with list literal
 169 | 
 170 | # leading/trailing whitespace should not count towards maxsplit
 171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
     - " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
 172 + [" a b", "c", "d"]  # [" a b", "c", "d"]
+173 | "a  b".split(maxsplit=1)  # ["a", "b"]
+
+SIM905 [*] Consider using a list literal instead of `str.split`
+   --> SIM905.py:173:1
+    |
+171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
+172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
+173 | "a  b".split(maxsplit=1)  # ["a", "b"]
+    | ^^^^^^^^^^^^^^^^^^^^^^^^
+    |
+help: Replace with list literal
+170 | # leading/trailing whitespace should not count towards maxsplit
+171 | " a b c d ".split(maxsplit=2)  # ["a", "b", "c d "]
+172 | " a b c d ".rsplit(maxsplit=2)  # [" a b", "c", "d"]
+    - "a  b".split(maxsplit=1)  # ["a", "b"]
+173 + ["a", "b"]  # ["a", "b"]