@@ -26,18 +26,21 @@ pub(super) const fn utf8_is_cont_byte(byte: u8) -> bool {
2626} 
2727
2828/// Reads the next code point out of a byte iterator (assuming a 
29- /// UTF-8-like encoding). 
29+ /// UTF-8-like encoding) and returns it along with its width . 
3030/// 
3131/// # Safety 
3232/// 
3333/// `bytes` must produce a valid UTF-8-like (UTF-8 or WTF-8) string 
3434#[ unstable( feature = "str_internals" ,  issue = "none" ) ]  
3535#[ inline]  
36- pub  unsafe  fn  next_code_point < ' a ,  I :  Iterator < Item  = & ' a  u8 > > ( bytes :  & mut  I )  -> Option < u32 >  { 
36+ #[ allow( dead_code) ]  
37+ pub  unsafe  fn  next_code_point_with_width < ' a ,  I :  Iterator < Item  = & ' a  u8 > > ( 
38+     bytes :  & mut  I , 
39+ )  -> Option < ( u32 ,  usize ) >  { 
3740    // Decode UTF-8 
3841    let  x = * bytes. next ( ) ?; 
3942    if  x < 128  { 
40-         return  Some ( x as  u32 ) ; 
43+         return  Some ( ( x as  u32 ,   1 ) ) ; 
4144    } 
4245
4346    // Multibyte case follows 
@@ -47,13 +50,15 @@ pub unsafe fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) ->
4750    // SAFETY: `bytes` produces an UTF-8-like string, 
4851    // so the iterator must produce a value here. 
4952    let  y = unsafe  {  * bytes. next ( ) . unwrap_unchecked ( )  } ; 
53+     let  mut  width = 2 ; 
5054    let  mut  ch = utf8_acc_cont_byte ( init,  y) ; 
5155    if  x >= 0xE0  { 
5256        // [[x y z] w] case 
5357        // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid 
5458        // SAFETY: `bytes` produces an UTF-8-like string, 
5559        // so the iterator must produce a value here. 
5660        let  z = unsafe  {  * bytes. next ( ) . unwrap_unchecked ( )  } ; 
61+         width = 3 ; 
5762        let  y_z = utf8_acc_cont_byte ( ( y &  CONT_MASK )  as  u32 ,  z) ; 
5863        ch = init << 12  | y_z; 
5964        if  x >= 0xF0  { 
@@ -62,11 +67,25 @@ pub unsafe fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) ->
6267            // SAFETY: `bytes` produces an UTF-8-like string, 
6368            // so the iterator must produce a value here. 
6469            let  w = unsafe  {  * bytes. next ( ) . unwrap_unchecked ( )  } ; 
70+             width = 4 ; 
6571            ch = ( init &  7 )  << 18  | utf8_acc_cont_byte ( y_z,  w) ; 
6672        } 
6773    } 
6874
69-     Some ( ch) 
75+     Some ( ( ch,  width) ) 
76+ } 
77+ 
78+ /// Reads the next code point out of a byte iterator (assuming a 
79+ /// UTF-8-like encoding). 
80+ /// 
81+ /// # Safety 
82+ /// 
83+ /// `bytes` must produce a valid UTF-8-like (UTF-8 or WTF-8) string 
84+ #[ unstable( feature = "str_internals" ,  issue = "none" ) ]  
85+ #[ inline]  
86+ pub  unsafe  fn  next_code_point < ' a ,  I :  Iterator < Item  = & ' a  u8 > > ( bytes :  & mut  I )  -> Option < u32 >  { 
87+     // SAFETY: same call condition 
88+     Some ( unsafe  {  next_code_point_with_width ( bytes)  } ?. 0 ) 
7089} 
7190
7291/// Reads the last code point out of a byte iterator (assuming a 
0 commit comments