Skip to content

Commit 272e08b

Browse files
fix OsStr conversion for non-utf8 strings on windows
1 parent e0fe042 commit 272e08b

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

newsfragments/5444.fixed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
fix `OsStr` conversion for non-utf8 strings on windows

src/conversions/std/osstr.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ impl FromPyObject<'_, '_> for OsString {
110110
unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) };
111111
crate::err::error_on_minusone(ob.py(), size)?;
112112

113+
debug_assert!(
114+
size > 0,
115+
"PyUnicode_AsWideChar should return at least 1 for null terminator"
116+
);
117+
let size = size - 1; // exclude null terminator
118+
113119
let mut buffer = vec![0; size as usize];
114120
let bytes_read =
115121
unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) };
@@ -169,7 +175,7 @@ impl<'py> IntoPyObject<'py> for &OsString {
169175

170176
#[cfg(test)]
171177
mod tests {
172-
use crate::types::{PyString, PyStringMethods};
178+
use crate::types::{PyAnyMethods, PyString, PyStringMethods};
173179
use crate::{BoundObject, IntoPyObject, Python};
174180
use std::fmt::Debug;
175181
use std::{
@@ -219,4 +225,32 @@ mod tests {
219225
test_roundtrip::<OsString>(py, os_str.to_os_string());
220226
});
221227
}
228+
229+
#[test]
230+
#[cfg(windows)]
231+
fn test_windows_non_utf8_osstring_roundtrip() {
232+
use std::os::windows::ffi::{OsStrExt, OsStringExt};
233+
234+
Python::attach(|py| {
235+
// Example: Unpaired surrogate (0xD800) is not valid UTF-8, but valid in Windows OsString
236+
let wide: &[u16] = &['A' as u16, 0xD800, 'B' as u16]; // 'A', unpaired surrogate, 'B'
237+
let os_str = OsString::from_wide(wide);
238+
239+
assert_eq!(os_str.to_string_lossy(), "A�B");
240+
241+
// This cannot be represented as UTF-8, so .to_str() would return None
242+
assert!(os_str.to_str().is_none());
243+
244+
// Convert to Python and back
245+
let py_str = os_str.as_os_str().into_pyobject(py).unwrap();
246+
let os_str_2 = py_str.extract::<OsString>().unwrap();
247+
248+
// The roundtrip should preserve the original wide data
249+
assert_eq!(os_str, os_str_2);
250+
251+
// Show that encode_wide is necessary: direct UTF-8 conversion would lose information
252+
let encoded: Vec<u16> = os_str.encode_wide().collect();
253+
assert_eq!(encoded, wide);
254+
});
255+
}
222256
}

0 commit comments

Comments
 (0)