@@ -110,6 +110,12 @@ impl FromPyObject<'_, '_> for OsString {
110110 unsafe { ffi:: PyUnicode_AsWideChar ( pystring. as_ptr ( ) , std:: ptr:: null_mut ( ) , 0 ) } ;
111111 crate :: err:: error_on_minusone ( ob. py ( ) , size) ?;
112112
113+ debug_assert ! (
114+ size > 0 ,
115+ "PyUnicode_AsWideChar should return at least 1 for null terminator"
116+ ) ;
117+ let size = size - 1 ; // exclude null terminator
118+
113119 let mut buffer = vec ! [ 0 ; size as usize ] ;
114120 let bytes_read =
115121 unsafe { ffi:: PyUnicode_AsWideChar ( pystring. as_ptr ( ) , buffer. as_mut_ptr ( ) , size) } ;
@@ -169,7 +175,7 @@ impl<'py> IntoPyObject<'py> for &OsString {
169175
170176#[ cfg( test) ]
171177mod tests {
172- use crate :: types:: { PyString , PyStringMethods } ;
178+ use crate :: types:: { PyAnyMethods , PyString , PyStringMethods } ;
173179 use crate :: { BoundObject , IntoPyObject , Python } ;
174180 use std:: fmt:: Debug ;
175181 use std:: {
@@ -181,7 +187,6 @@ mod tests {
181187 #[ cfg( not( windows) ) ]
182188 fn test_non_utf8_conversion ( ) {
183189 Python :: attach ( |py| {
184- use crate :: types:: PyAnyMethods ;
185190 #[ cfg( not( target_os = "wasi" ) ) ]
186191 use std:: os:: unix:: ffi:: OsStrExt ;
187192 #[ cfg( target_os = "wasi" ) ]
@@ -219,4 +224,32 @@ mod tests {
219224 test_roundtrip :: < OsString > ( py, os_str. to_os_string ( ) ) ;
220225 } ) ;
221226 }
227+
228+ #[ test]
229+ #[ cfg( windows) ]
230+ fn test_windows_non_utf8_osstring_roundtrip ( ) {
231+ use std:: os:: windows:: ffi:: { OsStrExt , OsStringExt } ;
232+
233+ Python :: attach ( |py| {
234+ // Example: Unpaired surrogate (0xD800) is not valid UTF-8, but valid in Windows OsString
235+ let wide: & [ u16 ] = & [ 'A' as u16 , 0xD800 , 'B' as u16 ] ; // 'A', unpaired surrogate, 'B'
236+ let os_str = OsString :: from_wide ( wide) ;
237+
238+ assert_eq ! ( os_str. to_string_lossy( ) , "A�B" ) ;
239+
240+ // This cannot be represented as UTF-8, so .to_str() would return None
241+ assert ! ( os_str. to_str( ) . is_none( ) ) ;
242+
243+ // Convert to Python and back
244+ let py_str = os_str. as_os_str ( ) . into_pyobject ( py) . unwrap ( ) ;
245+ let os_str_2 = py_str. extract :: < OsString > ( ) . unwrap ( ) ;
246+
247+ // The roundtrip should preserve the original wide data
248+ assert_eq ! ( os_str, os_str_2) ;
249+
250+ // Show that encode_wide is necessary: direct UTF-8 conversion would lose information
251+ let encoded: Vec < u16 > = os_str. encode_wide ( ) . collect ( ) ;
252+ assert_eq ! ( encoded, wide) ;
253+ } ) ;
254+ }
222255}
0 commit comments