Skip to content

Commit

Permalink
Auto merge of rust-lang#130511 - bjoernager:const-char-encode-utf8, r…
Browse files Browse the repository at this point in the history
…=dtolnay

Support `char::encode_utf8` in const scenarios.

This PR implements [`rust-lang/rfcs#3696`](rust-lang/rfcs#3696).

This assumes [`const_slice_from_raw_parts_mut`](rust-lang#67456).
  • Loading branch information
bors committed Sep 19, 2024
2 parents 1509944 + e94c080 commit 6210ecb
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 18 deletions.
32 changes: 14 additions & 18 deletions core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -672,8 +672,9 @@ impl char {
/// 'ß'.encode_utf8(&mut b);
/// ```
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
#[rustc_const_unstable(feature = "const_char_encode_utf8", issue = "130512")]
#[inline]
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
// SAFETY: `char` is not a surrogate, so this is valid UTF-8.
unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
}
Expand Down Expand Up @@ -1735,14 +1736,11 @@ impl EscapeDebugExtArgs {

#[inline]
const fn len_utf8(code: u32) -> usize {
if code < MAX_ONE_B {
1
} else if code < MAX_TWO_B {
2
} else if code < MAX_THREE_B {
3
} else {
4
match code {
..MAX_ONE_B => 1,
..MAX_TWO_B => 2,
..MAX_THREE_B => 3,
_ => 4,
}
}

Expand All @@ -1760,11 +1758,12 @@ const fn len_utf8(code: u32) -> usize {
/// Panics if the buffer is not large enough.
/// A buffer of length four is large enough to encode any `char`.
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
#[rustc_const_unstable(feature = "const_char_encode_utf8", issue = "130512")]
#[doc(hidden)]
#[inline]
pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
let len = len_utf8(code);
match (len, &mut dst[..]) {
match (len, &mut *dst) {
(1, [a, ..]) => {
*a = code as u8;
}
Expand All @@ -1783,14 +1782,11 @@ pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
*c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*d = (code & 0x3F) as u8 | TAG_CONT;
}
_ => panic!(
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
len,
code,
dst.len(),
),
// Note that we cannot format in constant expressions.
_ => panic!("encode_utf8: buffer does not have enough bytes to encode code point"),
};
&mut dst[..len]
// SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
}

/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
Expand Down
1 change: 1 addition & 0 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
#![feature(const_bigint_helper_methods)]
#![feature(const_black_box)]
#![feature(const_cell_into_inner)]
#![feature(const_char_encode_utf8)]
#![feature(const_eval_select)]
#![feature(const_exact_div)]
#![feature(const_float_classify)]
Expand Down

0 comments on commit 6210ecb

Please sign in to comment.