diff --git a/Cargo.toml b/Cargo.toml index c5628a94d4e..1a7588b5f95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,8 +33,6 @@ pyo3-ffi = { path = "pyo3-ffi", version = "=0.27.1" } # support crates for macros feature pyo3-macros = { path = "pyo3-macros", version = "=0.27.1", optional = true } -indoc = { version = "2.0.1", optional = true } -unindent = { version = "0.2.1", optional = true } # support crate for multiple-pymethods feature inventory = { version = "0.3.5", optional = true } @@ -98,7 +96,7 @@ experimental-async = ["macros", "pyo3-macros/experimental-async"] experimental-inspect = ["pyo3-macros/experimental-inspect"] # Enables macros: #[pyclass], #[pymodule], #[pyfunction] etc. -macros = ["pyo3-macros", "indoc", "unindent"] +macros = ["pyo3-macros"] # Enables multiple #[pymethods] per #[pyclass] multiple-pymethods = ["inventory", "pyo3-macros/multiple-pymethods"] diff --git a/newsfragments/5608.packaging.md b/newsfragments/5608.packaging.md new file mode 100644 index 00000000000..e18750a764e --- /dev/null +++ b/newsfragments/5608.packaging.md @@ -0,0 +1 @@ +Drop `indoc` and `unindent` dependencies. diff --git a/src/conversions/num_bigint.rs b/src/conversions/num_bigint.rs index 3b8ed209529..38945cd869b 100644 --- a/src/conversions/num_bigint.rs +++ b/src/conversions/num_bigint.rs @@ -328,7 +328,6 @@ mod tests { use crate::exceptions::PyTypeError; use crate::test_utils::generate_unique_module_name; use crate::types::{PyAnyMethods as _, PyDict, PyModule}; - use indoc::indoc; use pyo3_ffi::c_str; fn rust_fib() -> impl Iterator @@ -390,15 +389,15 @@ mod tests { } fn python_index_class(py: Python<'_>) -> Bound<'_, PyModule> { - let index_code = c_str!(indoc!( + let index_code = c_str!( r#" - class C: - def __init__(self, x): - self.x = x - def __index__(self): - return self.x - "# - )); +class C: + def __init__(self, x): + self.x = x + def __index__(self): + return self.x +"# + ); PyModule::from_code( py, index_code, diff --git a/src/impl_.rs b/src/impl_.rs index 17c1f08c36b..364f43ca4f8 100644 --- a/src/impl_.rs +++ b/src/impl_.rs @@ -26,4 +26,5 @@ pub mod pymethods; pub mod pymodule; #[doc(hidden)] pub mod trampoline; +pub mod unindent; pub mod wrap; diff --git a/src/impl_/concat.rs b/src/impl_/concat.rs index 9f2c0e33c17..fbd244f9bbc 100644 --- a/src/impl_/concat.rs +++ b/src/impl_/concat.rs @@ -37,7 +37,7 @@ pub const fn combine_to_array(pieces: &[&[u8]]) -> [u8; LEN] { } /// Replacement for `slice::copy_from_slice`, which is const from 1.87 -const fn slice_copy_from_slice(out: &mut [u8], src: &[u8]) { +pub(crate) const fn slice_copy_from_slice(out: &mut [u8], src: &[u8]) { let mut i = 0; while i < src.len() { out[i] = src[i]; diff --git a/src/impl_/unindent.rs b/src/impl_/unindent.rs new file mode 100644 index 00000000000..ed35cf3ba6f --- /dev/null +++ b/src/impl_/unindent.rs @@ -0,0 +1,250 @@ +use crate::impl_::concat::slice_copy_from_slice; + +/// This is a reimplementation of the `indoc` crate's unindent functionality: +/// +/// 1. Count the leading spaces of each line, ignoring the first line and any lines that are empty or contain spaces only. +/// 2. Take the minimum. +/// 3. If the first line is empty i.e. the string begins with a newline, remove the first line. +/// 4. Remove the computed number of spaces from the beginning of each line. +const fn unindent_bytes(bytes: &mut [u8]) -> usize { + // (1) + (2) - count leading spaces, take the minimum + let Some(to_unindent) = get_minimum_leading_spaces(bytes) else { + // all lines were empty, nothing to unindent + return bytes.len(); + }; + + // now copy from the original buffer, bringing values forward as needed + let mut read_idx = 0; + let mut write_idx = 0; + + // (3) - remove first line if it is empty + match consume_eol(bytes, read_idx) { + // skip empty first line + Some(eol) => read_idx = eol, + // copy non-empty first line as-is + None => { + (read_idx, write_idx) = copy_forward_until_eol(bytes, read_idx, write_idx); + } + }; + + // (4) - unindent remaining lines + while read_idx < bytes.len() { + let leading_spaces = count_spaces(bytes, read_idx); + + if leading_spaces < to_unindent { + read_idx += leading_spaces; + assert!( + consume_eol(bytes, read_idx).is_some(), + "removed fewer spaces than expected on non-empty line" + ); + } else { + // leading_spaces may be equal to or larger than to_unindent, only need to unindent + // the required amount, additional indentation is meaningful + read_idx += to_unindent; + } + + // copy remainder of line + (read_idx, write_idx) = copy_forward_until_eol(bytes, read_idx, write_idx); + } + + write_idx +} + +/// Counts the minimum leading spaces of all non-empty lines except the first line. +/// +/// Returns `None` if there are no non-empty lines except the first line. +const fn get_minimum_leading_spaces(bytes: &[u8]) -> Option { + // scan for leading spaces (ignoring first line and empty lines) + let mut i = 0; + + // skip first line + i = advance_to_next_line(bytes, i); + + let mut to_unindent = None; + + // for remaining lines, count leading spaces + while i < bytes.len() { + let line_leading_spaces = count_spaces(bytes, i); + i += line_leading_spaces; + + // line only had spaces, ignore for the count + if let Some(eol) = consume_eol(bytes, i) { + i = eol; + continue; + } + + // this line has content, consider its leading spaces + if let Some(current) = to_unindent { + // .unwrap_or(usize::MAX) not available in const fn + if line_leading_spaces < current { + to_unindent = Some(line_leading_spaces); + } + } else { + to_unindent = Some(line_leading_spaces); + } + + i = advance_to_next_line(bytes, i); + } + + to_unindent +} + +const fn advance_to_next_line(bytes: &[u8], mut i: usize) -> usize { + while i < bytes.len() { + if let Some(eol) = consume_eol(bytes, i) { + return eol; + } + i += 1; + } + i +} + +/// Brings elements in `bytes` forward until `\n` (inclusive) or end of `source`. +/// +/// `read_idx` must be greater than or equal to `write_idx`. +const fn copy_forward_until_eol( + bytes: &mut [u8], + mut read_idx: usize, + mut write_idx: usize, +) -> (usize, usize) { + assert!(read_idx >= write_idx); + while read_idx < bytes.len() { + let value = bytes[read_idx]; + bytes[write_idx] = value; + read_idx += 1; + write_idx += 1; + if value == b'\n' { + break; + } + } + (read_idx, write_idx) +} + +const fn count_spaces(bytes: &[u8], mut i: usize) -> usize { + let mut count = 0; + while i < bytes.len() && bytes[i] == b' ' { + count += 1; + i += 1; + } + count +} + +const fn consume_eol(bytes: &[u8], i: usize) -> Option { + if bytes.len() == i { + // special case: treat end of buffer as EOL without consuming anything + Some(i) + } else if bytes.len() > i && bytes[i] == b'\n' { + Some(i + 1) + } else if bytes[i] == b'\r' && bytes.len() > i + 1 && bytes[i + 1] == b'\n' { + Some(i + 2) + } else { + None + } +} + +pub const fn unindent_sized(src: &[u8]) -> ([u8; N], usize) { + let mut out: [u8; N] = [0; N]; + slice_copy_from_slice(&mut out, src); + let new_len = unindent_bytes(&mut out); + (out, new_len) +} + +/// Helper for `py_run!` macro which unindents a string at compile time. +#[macro_export] +#[doc(hidden)] +macro_rules! unindent { + ($value:expr) => {{ + const RAW: &str = $value; + const LEN: usize = RAW.len(); + const UNINDENTED: ([u8; LEN], usize) = + $crate::impl_::unindent::unindent_sized::(RAW.as_bytes()); + // SAFETY: this removes only spaces and preserves all other contents + unsafe { ::core::str::from_utf8_unchecked(UNINDENTED.0.split_at(UNINDENTED.1).0) } + }}; +} + +pub use crate::unindent; + +/// Equivalent of the `unindent!` macro, but works at runtime. +pub fn unindent(s: &str) -> String { + let mut bytes = s.as_bytes().to_owned(); + let unindented_size = unindent_bytes(&mut bytes); + bytes.resize(unindented_size, 0); + String::from_utf8(bytes).unwrap() +} + +#[cfg(test)] +mod tests { + use super::*; + + const SAMPLE_1_WITH_FIRST_LINE: &str = " first line + line one + + line two + "; + + const UNINDENTED_1: &str = " first line\nline one\n\n line two\n"; + + const SAMPLE_2_EMPTY_FIRST_LINE: &str = " + line one + + line two + "; + const UNINDENTED_2: &str = "line one\n\n line two\n"; + + const SAMPLE_3_NO_INDENT: &str = " +no indent + here"; + + const UNINDENTED_3: &str = "no indent\n here"; + + const SAMPLE_4_NOOP: &str = "no indent\nhere\n but here"; + + const SAMPLE_5_EMPTY: &str = " \n \n"; + + const ALL_CASES: &[(&str, &str)] = &[ + (SAMPLE_1_WITH_FIRST_LINE, UNINDENTED_1), + (SAMPLE_2_EMPTY_FIRST_LINE, UNINDENTED_2), + (SAMPLE_3_NO_INDENT, UNINDENTED_3), + (SAMPLE_4_NOOP, SAMPLE_4_NOOP), + (SAMPLE_5_EMPTY, SAMPLE_5_EMPTY), + ]; + + // run const tests for each sample to ensure they work at compile time + + #[test] + fn test_unindent_const() { + const UNINDENTED: &str = unindent!(SAMPLE_1_WITH_FIRST_LINE); + assert_eq!(UNINDENTED, UNINDENTED_1); + } + + #[test] + fn test_unindent_const_removes_empty_first_line() { + const UNINDENTED: &str = unindent!(SAMPLE_2_EMPTY_FIRST_LINE); + assert_eq!(UNINDENTED, UNINDENTED_2); + } + + #[test] + fn test_unindent_const_no_indent() { + const UNINDENTED: &str = unindent!(SAMPLE_3_NO_INDENT); + assert_eq!(UNINDENTED, UNINDENTED_3); + } + + #[test] + fn test_unindent_macro_runtime() { + // this variation on the test ensures full coverage (const eval not included in coverage) + const INDENTED: &str = SAMPLE_1_WITH_FIRST_LINE; + const LEN: usize = INDENTED.len(); + let (unindented, unindented_size) = unindent_sized::(INDENTED.as_bytes()); + let unindented = std::str::from_utf8(&unindented[..unindented_size]).unwrap(); + assert_eq!(unindented, UNINDENTED_1); + } + + #[test] + fn test_unindent_function() { + for (indented, expected) in ALL_CASES { + let unindented = unindent(indented); + assert_eq!(&unindented, expected); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index e62365d4296..35ac1ba1c10 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -398,13 +398,6 @@ pub mod class { } } -#[cfg(feature = "macros")] -#[doc(hidden)] -pub use { - indoc, // Re-exported for py_run - unindent, // Re-exported for py_run -}; - #[cfg(all(feature = "macros", feature = "multiple-pymethods"))] #[doc(hidden)] pub use inventory; // Re-exported for `#[pyclass]` and `#[pymethods]` with `multiple-pymethods`. diff --git a/src/macros.rs b/src/macros.rs index b6d501b9361..1f21fcd6718 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -85,20 +85,27 @@ /// ``` #[macro_export] macro_rules! py_run { + // unindent the code at compile time ($py:expr, $($val:ident)+, $code:literal) => {{ - $crate::py_run_impl!($py, $($val)+, $crate::indoc::indoc!($code)) - }}; - ($py:expr, $($val:ident)+, $code:expr) => {{ - $crate::py_run_impl!($py, $($val)+, $crate::unindent::unindent($code)) + $crate::py_run_impl!($py, $($val)+, $crate::impl_::unindent::unindent!($code)) }}; ($py:expr, *$dict:expr, $code:literal) => {{ - $crate::py_run_impl!($py, *$dict, $crate::indoc::indoc!($code)) + $crate::py_run_impl!($py, *$dict, $crate::impl_::unindent::unindent!($code)) + }}; + // unindent the code at runtime + ($py:expr, $($val:ident)+, $code:expr) => {{ + $crate::py_run_impl!($py, $($val)+, $crate::impl_::unindent::unindent($code)) }}; ($py:expr, *$dict:expr, $code:expr) => {{ - $crate::py_run_impl!($py, *$dict, $crate::unindent::unindent($code)) + $crate::py_run_impl!($py, *$dict, $crate::impl_::unindent::unindent($code)) }}; } +/// Internal implementation of the `py_run!` macro. +/// +/// FIXME: this currently unconditionally allocates a `CString`. We should consider making this not so: +/// - Maybe require users to pass `&CStr` / `CString`? +/// - Maybe adjust the `unindent` code to produce `&Cstr` / `Cstring`? #[macro_export] #[doc(hidden)] macro_rules! py_run_impl { diff --git a/tests/test_class_new.rs b/tests/test_class_new.rs index 6a35e681500..945df02b6a6 100644 --- a/tests/test_class_new.rs +++ b/tests/test_class_new.rs @@ -156,7 +156,7 @@ impl SuperClass { fn subclass_new() { Python::attach(|py| { let super_cls = py.get_type::(); - let source = pyo3_ffi::c_str!(pyo3::indoc::indoc!( + let source = pyo3_ffi::c_str!( r#" class Class(SuperClass): def __new__(cls): @@ -168,7 +168,7 @@ class Class(SuperClass): c = Class() assert c.from_rust is False "# - )); + ); let globals = PyModule::import(py, "__main__").unwrap().dict(); globals.set_item("SuperClass", super_cls).unwrap(); py.run(source, Some(&globals), None) diff --git a/tests/test_coroutine.rs b/tests/test_coroutine.rs index 24e007ab5a3..d0e89a716ec 100644 --- a/tests/test_coroutine.rs +++ b/tests/test_coroutine.rs @@ -22,7 +22,7 @@ fn handle_windows(test: &str) -> String { if sys.platform == "win32": asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) "#; - pyo3::unindent::unindent(set_event_loop_policy) + &pyo3::unindent::unindent(test) + pyo3::impl_::unindent::unindent(set_event_loop_policy) + &pyo3::impl_::unindent::unindent(test) } #[test] @@ -149,7 +149,7 @@ fn cancelled_coroutine() { globals.set_item("sleep", sleep).unwrap(); let err = py .run( - &CString::new(pyo3::unindent::unindent(&handle_windows(test))).unwrap(), + &CString::new(pyo3::impl_::unindent::unindent(&handle_windows(test))).unwrap(), Some(&globals), None, ) @@ -189,7 +189,7 @@ fn coroutine_cancel_handle() { .set_item("cancellable_sleep", cancellable_sleep) .unwrap(); py.run( - &CString::new(pyo3::unindent::unindent(&handle_windows(test))).unwrap(), + &CString::new(pyo3::impl_::unindent::unindent(&handle_windows(test))).unwrap(), Some(&globals), None, ) @@ -219,7 +219,7 @@ fn coroutine_is_cancelled() { let globals = PyDict::new(py); globals.set_item("sleep_loop", sleep_loop).unwrap(); py.run( - &CString::new(pyo3::unindent::unindent(&handle_windows(test))).unwrap(), + &CString::new(pyo3::impl_::unindent::unindent(&handle_windows(test))).unwrap(), Some(&globals), None, ) diff --git a/tests/test_proto_methods.rs b/tests/test_proto_methods.rs index c3112c21d91..a44025cb45e 100644 --- a/tests/test_proto_methods.rs +++ b/tests/test_proto_methods.rs @@ -790,7 +790,7 @@ impl DescrCounter { fn descr_getset() { Python::attach(|py| { let counter = py.get_type::(); - let source = pyo3_ffi::c_str!(indoc::indoc!( + let source = pyo3_ffi::c_str!( r#" class Class: counter = Counter() @@ -814,7 +814,7 @@ assert c.counter.count == 4 del c.counter assert c.counter.count == 1 "# - )); + ); let globals = PyModule::import(py, "__main__").unwrap().dict(); globals.set_item("Counter", counter).unwrap(); py.run(source, Some(&globals), None)