@@ -672,32 +672,28 @@ impl [u8] {
672672 #[ unstable( issue = "none" , feature = "std_internals" ) ]
673673 #[ allow( dead_code) ]
674674 /// Safety:
675- /// - Must be UTF-8
675+ /// - Must be valid UTF-8
676676 pub unsafe fn make_utf8_uppercase ( & mut self ) -> Result < usize , VecDeque < u8 > > {
677677 let mut queue = VecDeque :: new ( ) ;
678678
679679 let mut read_offset = 0 ;
680680 let mut write_offset = 0 ;
681681
682- let mut buffer = [ 0 ; 4 ] ;
683682 while let Some ( ( codepoint, width) ) =
684683 unsafe { core:: str:: next_code_point_with_width ( & mut self [ read_offset..] . iter ( ) ) }
685684 {
686685 read_offset += width;
686+ // Queue must be flushed before encode_to_slice_or_else_to_queue is
687+ // called to ensure proper order of bytes
688+ dump_queue ( & mut queue, & mut self [ ..read_offset] , & mut write_offset) ;
687689 let lowercase_char = unsafe { char:: from_u32_unchecked ( codepoint) } ;
688690 for c in lowercase_char. to_uppercase ( ) {
689- let l = c. len_utf8 ( ) ;
690- c. encode_utf8 ( & mut buffer) ;
691- queue. extend ( & buffer[ ..l] ) ;
692- }
693- while write_offset < read_offset {
694- match queue. pop_front ( ) {
695- Some ( b) => {
696- self [ write_offset] = b;
697- write_offset += 1 ;
698- }
699- None => break ,
700- }
691+ encode_to_slice_or_else_to_queue (
692+ c,
693+ & mut queue,
694+ & mut self [ ..read_offset] ,
695+ & mut write_offset,
696+ ) ;
701697 }
702698 }
703699 assert_eq ! ( read_offset, self . len( ) ) ;
@@ -708,19 +704,21 @@ impl [u8] {
708704 #[ unstable( issue = "none" , feature = "std_internals" ) ]
709705 #[ allow( dead_code) ]
710706 /// Safety:
711- /// - Must be UTF-8
707+ /// - Must be valid UTF-8
712708 pub unsafe fn make_utf8_lowercase ( & mut self ) -> Result < usize , VecDeque < u8 > > {
713709 let mut queue = VecDeque :: new ( ) ;
714710
715711 let mut read_offset = 0 ;
716712 let mut write_offset = 0 ;
717713
718- let mut buffer = [ 0 ; 4 ] ;
719714 let mut final_sigma_automata = FinalSigmaAutomata :: new ( ) ;
720715 while let Some ( ( codepoint, width) ) =
721716 unsafe { core:: str:: next_code_point_with_width ( & mut self [ read_offset..] . iter ( ) ) }
722717 {
723718 read_offset += width;
719+ // Queue must be flushed before encode_to_slice_or_else_to_queue is
720+ // called to ensure proper order of bytes
721+ dump_queue ( & mut queue, & mut self [ ..read_offset] , & mut write_offset) ;
724722 let uppercase_char = unsafe { char:: from_u32_unchecked ( codepoint) } ;
725723 if uppercase_char == 'Σ' {
726724 // Σ maps to σ, except at the end of a word where it maps to ς.
@@ -729,26 +727,23 @@ impl [u8] {
729727 let is_word_final =
730728 final_sigma_automata. is_accepting ( ) && !case_ignorable_then_cased ( rest. chars ( ) ) ;
731729 let sigma_lowercase = if is_word_final { 'ς' } else { 'σ' } ;
732- let l = sigma_lowercase. len_utf8 ( ) ;
733- sigma_lowercase. encode_utf8 ( & mut buffer) ;
734- queue. extend ( & buffer[ ..l] ) ;
730+ encode_to_slice_or_else_to_queue (
731+ sigma_lowercase,
732+ & mut queue,
733+ & mut self [ ..read_offset] ,
734+ & mut write_offset,
735+ ) ;
735736 } else {
736737 for c in uppercase_char. to_lowercase ( ) {
737- let l = c. len_utf8 ( ) ;
738- c. encode_utf8 ( & mut buffer) ;
739- queue. extend ( & buffer[ ..l] ) ;
738+ encode_to_slice_or_else_to_queue (
739+ c,
740+ & mut queue,
741+ & mut self [ ..read_offset] ,
742+ & mut write_offset,
743+ ) ;
740744 }
741745 }
742746 final_sigma_automata. step ( uppercase_char) ;
743- while write_offset < read_offset {
744- match queue. pop_front ( ) {
745- Some ( b) => {
746- self [ write_offset] = b;
747- write_offset += 1 ;
748- }
749- None => break ,
750- }
751- }
752747 }
753748 assert_eq ! ( read_offset, self . len( ) ) ;
754749 return if write_offset < read_offset { Ok ( write_offset) } else { Err ( queue) } ;
@@ -764,6 +759,33 @@ impl [u8] {
764759 }
765760}
766761
762+ fn encode_to_slice_or_else_to_queue (
763+ c : char ,
764+ queue : & mut VecDeque < u8 > ,
765+ slice : & mut [ u8 ] ,
766+ write_offset : & mut usize ,
767+ ) {
768+ let mut buffer = [ 0 ; 4 ] ;
769+ let len = c. encode_utf8 ( & mut buffer) . len ( ) ;
770+ let writable_slice = & mut slice[ * write_offset..] ;
771+ let direct_copy_length = core:: cmp:: min ( len, writable_slice. len ( ) ) ;
772+ writable_slice[ ..direct_copy_length] . copy_from_slice ( & buffer[ ..direct_copy_length] ) ;
773+ * write_offset += direct_copy_length;
774+ queue. extend ( & buffer[ direct_copy_length..len] ) ;
775+ }
776+
777+ fn dump_queue ( queue : & mut VecDeque < u8 > , slice : & mut [ u8 ] , write_offset : & mut usize ) {
778+ while * write_offset < slice. len ( ) {
779+ match queue. pop_front ( ) {
780+ Some ( b) => {
781+ slice[ * write_offset] = b;
782+ * write_offset += 1 ;
783+ }
784+ None => break ,
785+ }
786+ }
787+ }
788+
767789#[ derive( Clone ) ]
768790enum FinalSigmaAutomata {
769791 Init ,
0 commit comments