PyO3 · davidhewitt · Sep 19, 2025 · Sep 17, 2025 · davidhewitt · Sep 18, 2025
diff --git a/newsfragments/5444.fixed.md b/newsfragments/5444.fixed.md
@@ -0,0 +1 @@
+fix `OsStr` conversion for non-utf8 strings on windows
diff --git a/src/conversions/std/osstr.rs b/src/conversions/std/osstr.rs
@@ -110,6 +110,12 @@ impl FromPyObject<'_, '_> for OsString {
                 unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) };
             crate::err::error_on_minusone(ob.py(), size)?;
 
+            debug_assert!(
+                size > 0,
+                "PyUnicode_AsWideChar should return at least 1 for null terminator"
+            );
+            let size = size - 1; // exclude null terminator
+
             let mut buffer = vec![0; size as usize];
             let bytes_read =
                 unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) };
@@ -169,7 +175,7 @@ impl<'py> IntoPyObject<'py> for &OsString {
 
 #[cfg(test)]
 mod tests {
-    use crate::types::{PyString, PyStringMethods};
+    use crate::types::{PyAnyMethods, PyString, PyStringMethods};
     use crate::{BoundObject, IntoPyObject, Python};
     use std::fmt::Debug;
     use std::{
@@ -181,7 +187,6 @@ mod tests {
     #[cfg(not(windows))]
     fn test_non_utf8_conversion() {
         Python::attach(|py| {
-            use crate::types::PyAnyMethods;
             #[cfg(not(target_os = "wasi"))]
             use std::os::unix::ffi::OsStrExt;
             #[cfg(target_os = "wasi")]
@@ -219,4 +224,32 @@ mod tests {
             test_roundtrip::<OsString>(py, os_str.to_os_string());
         });
     }
+
+    #[test]
+    #[cfg(windows)]
+    fn test_windows_non_utf8_osstring_roundtrip() {
+        use std::os::windows::ffi::{OsStrExt, OsStringExt};
+
+        Python::attach(|py| {
+            // Example: Unpaired surrogate (0xD800) is not valid UTF-8, but valid in Windows OsString
+            let wide: &[u16] = &['A' as u16, 0xD800, 'B' as u16]; // 'A', unpaired surrogate, 'B'
+            let os_str = OsString::from_wide(wide);
+
+            assert_eq!(os_str.to_string_lossy(), "A�B");
+
+            // This cannot be represented as UTF-8, so .to_str() would return None
+            assert!(os_str.to_str().is_none());
+
+            // Convert to Python and back
+            let py_str = os_str.as_os_str().into_pyobject(py).unwrap();
+            let os_str_2 = py_str.extract::<OsString>().unwrap();
+
+            // The roundtrip should preserve the original wide data
+            assert_eq!(os_str, os_str_2);
+
+            // Show that encode_wide is necessary: direct UTF-8 conversion would lose information
+            let encoded: Vec<u16> = os_str.encode_wide().collect();
+            assert_eq!(encoded, wide);
+        });
+    }
 }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		fix `OsStr` conversion for non-utf8 strings on windows