Skip to content

Commit 5e10ea1

Browse files
fix OsStr conversion for non-utf8 strings on windows (#5444)
1 parent 4e0174c commit 5e10ea1

File tree

2 files changed

+36
-2
lines changed

2 files changed

+36
-2
lines changed

newsfragments/5444.fixed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
fix `OsStr` conversion for non-utf8 strings on windows

src/conversions/std/osstr.rs

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ impl FromPyObject<'_, '_> for OsString {
110110
unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) };
111111
crate::err::error_on_minusone(ob.py(), size)?;
112112

113+
debug_assert!(
114+
size > 0,
115+
"PyUnicode_AsWideChar should return at least 1 for null terminator"
116+
);
117+
let size = size - 1; // exclude null terminator
118+
113119
let mut buffer = vec![0; size as usize];
114120
let bytes_read =
115121
unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) };
@@ -169,7 +175,7 @@ impl<'py> IntoPyObject<'py> for &OsString {
169175

170176
#[cfg(test)]
171177
mod tests {
172-
use crate::types::{PyString, PyStringMethods};
178+
use crate::types::{PyAnyMethods, PyString, PyStringMethods};
173179
use crate::{BoundObject, IntoPyObject, Python};
174180
use std::fmt::Debug;
175181
use std::{
@@ -181,7 +187,6 @@ mod tests {
181187
#[cfg(not(windows))]
182188
fn test_non_utf8_conversion() {
183189
Python::attach(|py| {
184-
use crate::types::PyAnyMethods;
185190
#[cfg(not(target_os = "wasi"))]
186191
use std::os::unix::ffi::OsStrExt;
187192
#[cfg(target_os = "wasi")]
@@ -219,4 +224,32 @@ mod tests {
219224
test_roundtrip::<OsString>(py, os_str.to_os_string());
220225
});
221226
}
227+
228+
#[test]
229+
#[cfg(windows)]
230+
fn test_windows_non_utf8_osstring_roundtrip() {
231+
use std::os::windows::ffi::{OsStrExt, OsStringExt};
232+
233+
Python::attach(|py| {
234+
// Example: Unpaired surrogate (0xD800) is not valid UTF-8, but valid in Windows OsString
235+
let wide: &[u16] = &['A' as u16, 0xD800, 'B' as u16]; // 'A', unpaired surrogate, 'B'
236+
let os_str = OsString::from_wide(wide);
237+
238+
assert_eq!(os_str.to_string_lossy(), "A�B");
239+
240+
// This cannot be represented as UTF-8, so .to_str() would return None
241+
assert!(os_str.to_str().is_none());
242+
243+
// Convert to Python and back
244+
let py_str = os_str.as_os_str().into_pyobject(py).unwrap();
245+
let os_str_2 = py_str.extract::<OsString>().unwrap();
246+
247+
// The roundtrip should preserve the original wide data
248+
assert_eq!(os_str, os_str_2);
249+
250+
// Show that encode_wide is necessary: direct UTF-8 conversion would lose information
251+
let encoded: Vec<u16> = os_str.encode_wide().collect();
252+
assert_eq!(encoded, wide);
253+
});
254+
}
222255
}

0 commit comments

Comments
 (0)