@@ -110,6 +110,12 @@ impl FromPyObject<'_, '_> for OsString {
110110 unsafe { ffi:: PyUnicode_AsWideChar ( pystring. as_ptr ( ) , std:: ptr:: null_mut ( ) , 0 ) } ;
111111 crate :: err:: error_on_minusone ( ob. py ( ) , size) ?;
112112
113+ debug_assert ! (
114+ size > 0 ,
115+ "PyUnicode_AsWideChar should return at least 1 for null terminator"
116+ ) ;
117+ let size = size - 1 ; // exclude null terminator
118+
113119 let mut buffer = vec ! [ 0 ; size as usize ] ;
114120 let bytes_read =
115121 unsafe { ffi:: PyUnicode_AsWideChar ( pystring. as_ptr ( ) , buffer. as_mut_ptr ( ) , size) } ;
@@ -169,7 +175,7 @@ impl<'py> IntoPyObject<'py> for &OsString {
169175
170176#[ cfg( test) ]
171177mod tests {
172- use crate :: types:: { PyString , PyStringMethods } ;
178+ use crate :: types:: { PyAnyMethods , PyString , PyStringMethods } ;
173179 use crate :: { BoundObject , IntoPyObject , Python } ;
174180 use std:: fmt:: Debug ;
175181 use std:: {
@@ -219,4 +225,32 @@ mod tests {
219225 test_roundtrip :: < OsString > ( py, os_str. to_os_string ( ) ) ;
220226 } ) ;
221227 }
228+
229+ #[ test]
230+ #[ cfg( windows) ]
231+ fn test_windows_non_utf8_osstring_roundtrip ( ) {
232+ use std:: os:: windows:: ffi:: { OsStrExt , OsStringExt } ;
233+
234+ Python :: attach ( |py| {
235+ // Example: Unpaired surrogate (0xD800) is not valid UTF-8, but valid in Windows OsString
236+ let wide: & [ u16 ] = & [ 'A' as u16 , 0xD800 , 'B' as u16 ] ; // 'A', unpaired surrogate, 'B'
237+ let os_str = OsString :: from_wide ( wide) ;
238+
239+ assert_eq ! ( os_str. to_string_lossy( ) , "A�B" ) ;
240+
241+ // This cannot be represented as UTF-8, so .to_str() would return None
242+ assert ! ( os_str. to_str( ) . is_none( ) ) ;
243+
244+ // Convert to Python and back
245+ let py_str = os_str. as_os_str ( ) . into_pyobject ( py) . unwrap ( ) ;
246+ let os_str_2 = py_str. extract :: < OsString > ( ) . unwrap ( ) ;
247+
248+ // The roundtrip should preserve the original wide data
249+ assert_eq ! ( os_str, os_str_2) ;
250+
251+ // Show that encode_wide is necessary: direct UTF-8 conversion would lose information
252+ let encoded: Vec < u16 > = os_str. encode_wide ( ) . collect ( ) ;
253+ assert_eq ! ( encoded, wide) ;
254+ } ) ;
255+ }
222256}
0 commit comments