From 536f879c6115a3c29b05393acee02f4fc6ea1c61 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Fri, 7 Nov 2025 16:04:33 +0000 Subject: [PATCH 1/4] drop `unindent` and `indoc` dependencies --- Cargo.toml | 6 +- src/impl_.rs | 1 + src/impl_/unindent.rs | 228 ++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 7 -- src/macros.rs | 15 +-- tests/test_coroutine.rs | 8 +- 6 files changed, 245 insertions(+), 20 deletions(-) create mode 100644 src/impl_/unindent.rs diff --git a/Cargo.toml b/Cargo.toml index c5628a94d4e..65f04a47b9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,8 +33,6 @@ pyo3-ffi = { path = "pyo3-ffi", version = "=0.27.1" } # support crates for macros feature pyo3-macros = { path = "pyo3-macros", version = "=0.27.1", optional = true } -indoc = { version = "2.0.1", optional = true } -unindent = { version = "0.2.1", optional = true } # support crate for multiple-pymethods feature inventory = { version = "0.3.5", optional = true } @@ -71,6 +69,8 @@ portable-atomic = "1.0" assert_approx_eq = "1.1.0" chrono = "0.4.25" chrono-tz = ">= 0.10, < 0.11" +# FIXME: should be able to remove this +indoc = { version = "2.0.1" } # Required for "and $N others" normalization trybuild = ">=1.0.70" proptest = { version = "1.0", default-features = false, features = ["std"] } @@ -98,7 +98,7 @@ experimental-async = ["macros", "pyo3-macros/experimental-async"] experimental-inspect = ["pyo3-macros/experimental-inspect"] # Enables macros: #[pyclass], #[pymodule], #[pyfunction] etc. -macros = ["pyo3-macros", "indoc", "unindent"] +macros = ["pyo3-macros"] # Enables multiple #[pymethods] per #[pyclass] multiple-pymethods = ["inventory", "pyo3-macros/multiple-pymethods"] diff --git a/src/impl_.rs b/src/impl_.rs index 17c1f08c36b..364f43ca4f8 100644 --- a/src/impl_.rs +++ b/src/impl_.rs @@ -26,4 +26,5 @@ pub mod pymethods; pub mod pymodule; #[doc(hidden)] pub mod trampoline; +pub mod unindent; pub mod wrap; diff --git a/src/impl_/unindent.rs b/src/impl_/unindent.rs new file mode 100644 index 00000000000..6e4cb6900a2 --- /dev/null +++ b/src/impl_/unindent.rs @@ -0,0 +1,228 @@ +use core::panic; + +/// This is a reimplementation of the `indoc` crate's unindent functionality: +/// +/// 1. Count the leading spaces of each line, ignoring the first line and any lines that are empty or contain spaces only. +/// 2. Take the minimum. +/// 3. If the first line is empty i.e. the string begins with a newline, remove the first line. +/// 4. Remove the computed number of spaces from the beginning of each line. +const fn unindent_bytes(bytes: &mut [u8]) -> usize { + if bytes.is_empty() { + // nothing to do + return bytes.len(); + } + + // scan for leading spaces (ignoring first line and empty lines) + let mut i = 0; + + // skip first line + i = advance_to_next_line(bytes, i); + + let mut to_unindent = usize::MAX; + + // for remaining lines, count leading spaces + 'lines: while i < bytes.len() { + let line_leading_spaces = count_spaces(bytes, i); + i += line_leading_spaces; + + // line only had spaces, ignore for the count + if let Some(eol) = consume_eol(bytes, i) { + i = eol; + continue 'lines; + } + + // this line has content, consider its leading spaces + if line_leading_spaces < to_unindent { + to_unindent = line_leading_spaces; + } + + i = advance_to_next_line(bytes, i); + } + + if to_unindent == usize::MAX { + // all lines were empty, nothing to unindent + return bytes.len(); + } + + // now copy from the original buffer, bringing values forward as needed + let mut read_idx = 0; + let mut write_idx = 0; + + match consume_eol(bytes, read_idx) { + // skip empty first line + Some(eol) => read_idx = eol, + // copy non-empty first line as-is + None => { + while read_idx < bytes.len() { + let value = bytes[read_idx]; + bytes[write_idx] = value; + read_idx += 1; + write_idx += 1; + if value == b'\n' { + break; + } + } + } + }; + + while read_idx < bytes.len() { + let mut leading_spaces_skipped = 0; + while leading_spaces_skipped < to_unindent + && read_idx < bytes.len() + && bytes[read_idx] == b' ' + { + leading_spaces_skipped += 1; + read_idx += 1; + } + + if leading_spaces_skipped < to_unindent && consume_eol(bytes, read_idx).is_none() { + panic!("removed fewer spaces than expected on non-empty line"); + } + + // copy remainder of line + while read_idx < bytes.len() { + let value = bytes[read_idx]; + bytes[write_idx] = value; + read_idx += 1; + write_idx += 1; + if value == b'\n' { + break; + } + } + } + + write_idx +} + +const fn advance_to_next_line(bytes: &[u8], mut i: usize) -> usize { + while i < bytes.len() { + if let Some(eol) = consume_eol(bytes, i) { + return eol; + } + i += 1; + } + i +} + +const fn count_spaces(bytes: &[u8], mut i: usize) -> usize { + let mut count = 0; + while i < bytes.len() && bytes[i] == b' ' { + count += 1; + i += 1; + } + count +} + +const fn consume_eol(bytes: &[u8], i: usize) -> Option { + if bytes.len() == i { + // special case: treat end of buffer as EOL without consuming anything + Some(i) + } else if bytes.len() > i && bytes[i] == b'\n' { + Some(i + 1) + } else if bytes[i] == b'\r' && bytes.len() > i + 1 && bytes[i + 1] == b'\n' { + Some(i + 2) + } else { + None + } +} + +pub const fn unindent_sized(src: &[u8]) -> ([u8; N], usize) { + let mut out: [u8; N] = [0; N]; + out.copy_from_slice(src); + let new_len = unindent_bytes(&mut out); + (out, new_len) +} + +/// Helper for `py_run!` macro which unindents a string at compile time. +#[macro_export] +#[doc(hidden)] +macro_rules! unindent { + ($value:expr) => {{ + const RAW: &str = $value; + const LEN: usize = RAW.len(); + const UNINDENTED: ([u8; LEN], usize) = + $crate::impl_::unindent::unindent_sized::(RAW.as_bytes()); + // SAFETY: this removes only spaces and preserves all other contents + unsafe { ::core::str::from_utf8_unchecked(UNINDENTED.0.split_at(UNINDENTED.1).0) } + }}; +} + +pub use crate::unindent; + +/// Equivalent of the `unindent!` macro, but works at runtime. +pub fn unindent(s: &str) -> String { + let mut bytes = s.as_bytes().to_owned(); + let unindented_size = unindent_bytes(&mut bytes); + bytes.resize(unindented_size, 0); + String::from_utf8(bytes).unwrap() +} + +#[cfg(test)] +mod tests { + use super::*; + + const SAMPLE_1_WITH_FIRST_LINE: &str = " first line + line one + + line two + "; + + const UNINDENTED_1: &str = " first line\nline one\n\n line two\n"; + + const SAMPLE_2_EMPTY_FIRST_LINE: &str = " + line one + + line two + "; + const UNINDENTED_2: &str = "line one\n\n line two\n"; + + const SAMPLE_3_NO_INDENT: &str = " +no indent + here"; + + const UNINDENTED_3: &str = "no indent\n here"; + + const ALL_CASES: &[(&str, &str)] = &[ + (SAMPLE_1_WITH_FIRST_LINE, UNINDENTED_1), + (SAMPLE_2_EMPTY_FIRST_LINE, UNINDENTED_2), + (SAMPLE_3_NO_INDENT, UNINDENTED_3), + ]; + + // run const tests for each sample to ensure they work at compile time + + #[test] + fn test_unindent_const() { + const UNINDENTED: &str = unindent!(SAMPLE_1_WITH_FIRST_LINE); + assert_eq!(UNINDENTED, UNINDENTED_1); + } + + #[test] + fn test_unindent_const_removes_empty_first_line() { + const UNINDENTED: &str = unindent!(SAMPLE_2_EMPTY_FIRST_LINE); + assert_eq!(UNINDENTED, UNINDENTED_2); + } + + #[test] + fn test_unindent_const_no_indent() { + const UNINDENTED: &str = unindent!(SAMPLE_3_NO_INDENT); + assert_eq!(UNINDENTED, UNINDENTED_3); + } + + #[test] + fn test_unindent_macro_runtime() { + // this variation on the test ensures full coverage (const eval not included in coverage) + const INDENTED: &str = SAMPLE_1_WITH_FIRST_LINE; + const LEN: usize = INDENTED.len(); + let (unindented, unindented_size) = unindent_sized::(INDENTED.as_bytes()); + let unindented = str::from_utf8(&unindented[..unindented_size]).unwrap(); + assert_eq!(unindented, UNINDENTED_1); + } + + #[test] + fn test_unindent_function() { + for (indented, expected) in ALL_CASES { + let unindented = unindent(indented); + assert_eq!(&unindented, expected); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index e62365d4296..35ac1ba1c10 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -398,13 +398,6 @@ pub mod class { } } -#[cfg(feature = "macros")] -#[doc(hidden)] -pub use { - indoc, // Re-exported for py_run - unindent, // Re-exported for py_run -}; - #[cfg(all(feature = "macros", feature = "multiple-pymethods"))] #[doc(hidden)] pub use inventory; // Re-exported for `#[pyclass]` and `#[pymethods]` with `multiple-pymethods`. diff --git a/src/macros.rs b/src/macros.rs index b6d501b9361..311cdf9f434 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -85,17 +85,20 @@ /// ``` #[macro_export] macro_rules! py_run { + // TODO: support c string literals? + // unindent the code at compile time ($py:expr, $($val:ident)+, $code:literal) => {{ - $crate::py_run_impl!($py, $($val)+, $crate::indoc::indoc!($code)) - }}; - ($py:expr, $($val:ident)+, $code:expr) => {{ - $crate::py_run_impl!($py, $($val)+, $crate::unindent::unindent($code)) + $crate::py_run_impl!($py, $($val)+, $crate::impl_::unindent::unindent!($code)) }}; ($py:expr, *$dict:expr, $code:literal) => {{ - $crate::py_run_impl!($py, *$dict, $crate::indoc::indoc!($code)) + $crate::py_run_impl!($py, *$dict, $crate::impl_::unindent::unindent!($code)) + }}; + // unindent the code at runtime, TODO: support C strings somehow? + ($py:expr, $($val:ident)+, $code:expr) => {{ + $crate::py_run_impl!($py, $($val)+, $crate::impl_::unindent::unindent($code)) }}; ($py:expr, *$dict:expr, $code:expr) => {{ - $crate::py_run_impl!($py, *$dict, $crate::unindent::unindent($code)) + $crate::py_run_impl!($py, *$dict, $crate::impl_::unindent::unindent($code)) }}; } diff --git a/tests/test_coroutine.rs b/tests/test_coroutine.rs index 24e007ab5a3..d0e89a716ec 100644 --- a/tests/test_coroutine.rs +++ b/tests/test_coroutine.rs @@ -22,7 +22,7 @@ fn handle_windows(test: &str) -> String { if sys.platform == "win32": asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) "#; - pyo3::unindent::unindent(set_event_loop_policy) + &pyo3::unindent::unindent(test) + pyo3::impl_::unindent::unindent(set_event_loop_policy) + &pyo3::impl_::unindent::unindent(test) } #[test] @@ -149,7 +149,7 @@ fn cancelled_coroutine() { globals.set_item("sleep", sleep).unwrap(); let err = py .run( - &CString::new(pyo3::unindent::unindent(&handle_windows(test))).unwrap(), + &CString::new(pyo3::impl_::unindent::unindent(&handle_windows(test))).unwrap(), Some(&globals), None, ) @@ -189,7 +189,7 @@ fn coroutine_cancel_handle() { .set_item("cancellable_sleep", cancellable_sleep) .unwrap(); py.run( - &CString::new(pyo3::unindent::unindent(&handle_windows(test))).unwrap(), + &CString::new(pyo3::impl_::unindent::unindent(&handle_windows(test))).unwrap(), Some(&globals), None, ) @@ -219,7 +219,7 @@ fn coroutine_is_cancelled() { let globals = PyDict::new(py); globals.set_item("sleep_loop", sleep_loop).unwrap(); py.run( - &CString::new(pyo3::unindent::unindent(&handle_windows(test))).unwrap(), + &CString::new(pyo3::impl_::unindent::unindent(&handle_windows(test))).unwrap(), Some(&globals), None, ) From 985aa04164148c5e7df0a047d88602669499109f Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Sat, 8 Nov 2025 17:53:00 +0000 Subject: [PATCH 2/4] clippy, msrv, remove indoc fully, newsfragment --- Cargo.toml | 2 -- newsfragments/5608.packaging.md | 1 + src/conversions/num_bigint.rs | 17 ++++++++--------- src/impl_/concat.rs | 2 +- src/impl_/unindent.rs | 11 ++++++----- src/macros.rs | 8 ++++++-- tests/test_class_new.rs | 4 ++-- tests/test_proto_methods.rs | 4 ++-- 8 files changed, 26 insertions(+), 23 deletions(-) create mode 100644 newsfragments/5608.packaging.md diff --git a/Cargo.toml b/Cargo.toml index 65f04a47b9c..1a7588b5f95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,8 +69,6 @@ portable-atomic = "1.0" assert_approx_eq = "1.1.0" chrono = "0.4.25" chrono-tz = ">= 0.10, < 0.11" -# FIXME: should be able to remove this -indoc = { version = "2.0.1" } # Required for "and $N others" normalization trybuild = ">=1.0.70" proptest = { version = "1.0", default-features = false, features = ["std"] } diff --git a/newsfragments/5608.packaging.md b/newsfragments/5608.packaging.md new file mode 100644 index 00000000000..e18750a764e --- /dev/null +++ b/newsfragments/5608.packaging.md @@ -0,0 +1 @@ +Drop `indoc` and `unindent` dependencies. diff --git a/src/conversions/num_bigint.rs b/src/conversions/num_bigint.rs index 3b8ed209529..38945cd869b 100644 --- a/src/conversions/num_bigint.rs +++ b/src/conversions/num_bigint.rs @@ -328,7 +328,6 @@ mod tests { use crate::exceptions::PyTypeError; use crate::test_utils::generate_unique_module_name; use crate::types::{PyAnyMethods as _, PyDict, PyModule}; - use indoc::indoc; use pyo3_ffi::c_str; fn rust_fib() -> impl Iterator @@ -390,15 +389,15 @@ mod tests { } fn python_index_class(py: Python<'_>) -> Bound<'_, PyModule> { - let index_code = c_str!(indoc!( + let index_code = c_str!( r#" - class C: - def __init__(self, x): - self.x = x - def __index__(self): - return self.x - "# - )); +class C: + def __init__(self, x): + self.x = x + def __index__(self): + return self.x +"# + ); PyModule::from_code( py, index_code, diff --git a/src/impl_/concat.rs b/src/impl_/concat.rs index 9f2c0e33c17..fbd244f9bbc 100644 --- a/src/impl_/concat.rs +++ b/src/impl_/concat.rs @@ -37,7 +37,7 @@ pub const fn combine_to_array(pieces: &[&[u8]]) -> [u8; LEN] { } /// Replacement for `slice::copy_from_slice`, which is const from 1.87 -const fn slice_copy_from_slice(out: &mut [u8], src: &[u8]) { +pub(crate) const fn slice_copy_from_slice(out: &mut [u8], src: &[u8]) { let mut i = 0; while i < src.len() { out[i] = src[i]; diff --git a/src/impl_/unindent.rs b/src/impl_/unindent.rs index 6e4cb6900a2..25766ddebb4 100644 --- a/src/impl_/unindent.rs +++ b/src/impl_/unindent.rs @@ -1,4 +1,4 @@ -use core::panic; +use crate::impl_::concat::slice_copy_from_slice; /// This is a reimplementation of the `indoc` crate's unindent functionality: /// @@ -75,9 +75,10 @@ const fn unindent_bytes(bytes: &mut [u8]) -> usize { read_idx += 1; } - if leading_spaces_skipped < to_unindent && consume_eol(bytes, read_idx).is_none() { - panic!("removed fewer spaces than expected on non-empty line"); - } + assert!( + leading_spaces_skipped == to_unindent || consume_eol(bytes, read_idx).is_some(), + "removed fewer spaces than expected on non-empty line" + ); // copy remainder of line while read_idx < bytes.len() { @@ -128,7 +129,7 @@ const fn consume_eol(bytes: &[u8], i: usize) -> Option { pub const fn unindent_sized(src: &[u8]) -> ([u8; N], usize) { let mut out: [u8; N] = [0; N]; - out.copy_from_slice(src); + slice_copy_from_slice(&mut out, src); let new_len = unindent_bytes(&mut out); (out, new_len) } diff --git a/src/macros.rs b/src/macros.rs index 311cdf9f434..1f21fcd6718 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -85,7 +85,6 @@ /// ``` #[macro_export] macro_rules! py_run { - // TODO: support c string literals? // unindent the code at compile time ($py:expr, $($val:ident)+, $code:literal) => {{ $crate::py_run_impl!($py, $($val)+, $crate::impl_::unindent::unindent!($code)) @@ -93,7 +92,7 @@ macro_rules! py_run { ($py:expr, *$dict:expr, $code:literal) => {{ $crate::py_run_impl!($py, *$dict, $crate::impl_::unindent::unindent!($code)) }}; - // unindent the code at runtime, TODO: support C strings somehow? + // unindent the code at runtime ($py:expr, $($val:ident)+, $code:expr) => {{ $crate::py_run_impl!($py, $($val)+, $crate::impl_::unindent::unindent($code)) }}; @@ -102,6 +101,11 @@ macro_rules! py_run { }}; } +/// Internal implementation of the `py_run!` macro. +/// +/// FIXME: this currently unconditionally allocates a `CString`. We should consider making this not so: +/// - Maybe require users to pass `&CStr` / `CString`? +/// - Maybe adjust the `unindent` code to produce `&Cstr` / `Cstring`? #[macro_export] #[doc(hidden)] macro_rules! py_run_impl { diff --git a/tests/test_class_new.rs b/tests/test_class_new.rs index 6a35e681500..945df02b6a6 100644 --- a/tests/test_class_new.rs +++ b/tests/test_class_new.rs @@ -156,7 +156,7 @@ impl SuperClass { fn subclass_new() { Python::attach(|py| { let super_cls = py.get_type::(); - let source = pyo3_ffi::c_str!(pyo3::indoc::indoc!( + let source = pyo3_ffi::c_str!( r#" class Class(SuperClass): def __new__(cls): @@ -168,7 +168,7 @@ class Class(SuperClass): c = Class() assert c.from_rust is False "# - )); + ); let globals = PyModule::import(py, "__main__").unwrap().dict(); globals.set_item("SuperClass", super_cls).unwrap(); py.run(source, Some(&globals), None) diff --git a/tests/test_proto_methods.rs b/tests/test_proto_methods.rs index c3112c21d91..a44025cb45e 100644 --- a/tests/test_proto_methods.rs +++ b/tests/test_proto_methods.rs @@ -790,7 +790,7 @@ impl DescrCounter { fn descr_getset() { Python::attach(|py| { let counter = py.get_type::(); - let source = pyo3_ffi::c_str!(indoc::indoc!( + let source = pyo3_ffi::c_str!( r#" class Class: counter = Counter() @@ -814,7 +814,7 @@ assert c.counter.count == 4 del c.counter assert c.counter.count == 1 "# - )); + ); let globals = PyModule::import(py, "__main__").unwrap().dict(); globals.set_item("Counter", counter).unwrap(); py.run(source, Some(&globals), None) From 80f8d06b6118dbd9c290edb1d7b423f13b18f561 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Sat, 8 Nov 2025 18:23:23 +0000 Subject: [PATCH 3/4] msrv --- src/impl_/unindent.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/impl_/unindent.rs b/src/impl_/unindent.rs index 25766ddebb4..2be83021300 100644 --- a/src/impl_/unindent.rs +++ b/src/impl_/unindent.rs @@ -215,7 +215,7 @@ no indent const INDENTED: &str = SAMPLE_1_WITH_FIRST_LINE; const LEN: usize = INDENTED.len(); let (unindented, unindented_size) = unindent_sized::(INDENTED.as_bytes()); - let unindented = str::from_utf8(&unindented[..unindented_size]).unwrap(); + let unindented = std::str::from_utf8(&unindented[..unindented_size]).unwrap(); assert_eq!(unindented, UNINDENTED_1); } From 6f86c1ea9f28eaaaacf7818dc83303c4b14e59e3 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Tue, 18 Nov 2025 11:53:26 +0000 Subject: [PATCH 4/4] refactor `unindent_bytes` function --- src/impl_/unindent.rs | 143 ++++++++++++++++++++++++------------------ 1 file changed, 82 insertions(+), 61 deletions(-) diff --git a/src/impl_/unindent.rs b/src/impl_/unindent.rs index 2be83021300..ed35cf3ba6f 100644 --- a/src/impl_/unindent.rs +++ b/src/impl_/unindent.rs @@ -7,92 +7,86 @@ use crate::impl_::concat::slice_copy_from_slice; /// 3. If the first line is empty i.e. the string begins with a newline, remove the first line. /// 4. Remove the computed number of spaces from the beginning of each line. const fn unindent_bytes(bytes: &mut [u8]) -> usize { - if bytes.is_empty() { - // nothing to do + // (1) + (2) - count leading spaces, take the minimum + let Some(to_unindent) = get_minimum_leading_spaces(bytes) else { + // all lines were empty, nothing to unindent return bytes.len(); + }; + + // now copy from the original buffer, bringing values forward as needed + let mut read_idx = 0; + let mut write_idx = 0; + + // (3) - remove first line if it is empty + match consume_eol(bytes, read_idx) { + // skip empty first line + Some(eol) => read_idx = eol, + // copy non-empty first line as-is + None => { + (read_idx, write_idx) = copy_forward_until_eol(bytes, read_idx, write_idx); + } + }; + + // (4) - unindent remaining lines + while read_idx < bytes.len() { + let leading_spaces = count_spaces(bytes, read_idx); + + if leading_spaces < to_unindent { + read_idx += leading_spaces; + assert!( + consume_eol(bytes, read_idx).is_some(), + "removed fewer spaces than expected on non-empty line" + ); + } else { + // leading_spaces may be equal to or larger than to_unindent, only need to unindent + // the required amount, additional indentation is meaningful + read_idx += to_unindent; + } + + // copy remainder of line + (read_idx, write_idx) = copy_forward_until_eol(bytes, read_idx, write_idx); } + write_idx +} + +/// Counts the minimum leading spaces of all non-empty lines except the first line. +/// +/// Returns `None` if there are no non-empty lines except the first line. +const fn get_minimum_leading_spaces(bytes: &[u8]) -> Option { // scan for leading spaces (ignoring first line and empty lines) let mut i = 0; // skip first line i = advance_to_next_line(bytes, i); - let mut to_unindent = usize::MAX; + let mut to_unindent = None; // for remaining lines, count leading spaces - 'lines: while i < bytes.len() { + while i < bytes.len() { let line_leading_spaces = count_spaces(bytes, i); i += line_leading_spaces; // line only had spaces, ignore for the count if let Some(eol) = consume_eol(bytes, i) { i = eol; - continue 'lines; + continue; } // this line has content, consider its leading spaces - if line_leading_spaces < to_unindent { - to_unindent = line_leading_spaces; - } - - i = advance_to_next_line(bytes, i); - } - - if to_unindent == usize::MAX { - // all lines were empty, nothing to unindent - return bytes.len(); - } - - // now copy from the original buffer, bringing values forward as needed - let mut read_idx = 0; - let mut write_idx = 0; - - match consume_eol(bytes, read_idx) { - // skip empty first line - Some(eol) => read_idx = eol, - // copy non-empty first line as-is - None => { - while read_idx < bytes.len() { - let value = bytes[read_idx]; - bytes[write_idx] = value; - read_idx += 1; - write_idx += 1; - if value == b'\n' { - break; - } + if let Some(current) = to_unindent { + // .unwrap_or(usize::MAX) not available in const fn + if line_leading_spaces < current { + to_unindent = Some(line_leading_spaces); } + } else { + to_unindent = Some(line_leading_spaces); } - }; - - while read_idx < bytes.len() { - let mut leading_spaces_skipped = 0; - while leading_spaces_skipped < to_unindent - && read_idx < bytes.len() - && bytes[read_idx] == b' ' - { - leading_spaces_skipped += 1; - read_idx += 1; - } - - assert!( - leading_spaces_skipped == to_unindent || consume_eol(bytes, read_idx).is_some(), - "removed fewer spaces than expected on non-empty line" - ); - // copy remainder of line - while read_idx < bytes.len() { - let value = bytes[read_idx]; - bytes[write_idx] = value; - read_idx += 1; - write_idx += 1; - if value == b'\n' { - break; - } - } + i = advance_to_next_line(bytes, i); } - write_idx + to_unindent } const fn advance_to_next_line(bytes: &[u8], mut i: usize) -> usize { @@ -105,6 +99,27 @@ const fn advance_to_next_line(bytes: &[u8], mut i: usize) -> usize { i } +/// Brings elements in `bytes` forward until `\n` (inclusive) or end of `source`. +/// +/// `read_idx` must be greater than or equal to `write_idx`. +const fn copy_forward_until_eol( + bytes: &mut [u8], + mut read_idx: usize, + mut write_idx: usize, +) -> (usize, usize) { + assert!(read_idx >= write_idx); + while read_idx < bytes.len() { + let value = bytes[read_idx]; + bytes[write_idx] = value; + read_idx += 1; + write_idx += 1; + if value == b'\n' { + break; + } + } + (read_idx, write_idx) +} + const fn count_spaces(bytes: &[u8], mut i: usize) -> usize { let mut count = 0; while i < bytes.len() && bytes[i] == b' ' { @@ -183,10 +198,16 @@ no indent const UNINDENTED_3: &str = "no indent\n here"; + const SAMPLE_4_NOOP: &str = "no indent\nhere\n but here"; + + const SAMPLE_5_EMPTY: &str = " \n \n"; + const ALL_CASES: &[(&str, &str)] = &[ (SAMPLE_1_WITH_FIRST_LINE, UNINDENTED_1), (SAMPLE_2_EMPTY_FIRST_LINE, UNINDENTED_2), (SAMPLE_3_NO_INDENT, UNINDENTED_3), + (SAMPLE_4_NOOP, SAMPLE_4_NOOP), + (SAMPLE_5_EMPTY, SAMPLE_5_EMPTY), ]; // run const tests for each sample to ensure they work at compile time