Skip to content

Commit

Permalink
Auto merge of #119791 - Mark-Simulacrum:cut-leb128, r=cjgillot
Browse files Browse the repository at this point in the history
Remove a large amount of leb128-coded integers

This removes ~41%[^1] of the leb128-encoded integers serialized during libcore compilation by changing enum tags to opportunistically use `u8` where feasible instead of the leb128 coding via `usize`.

This should have effectively zero impact on metadata file sizes, since almost all or all enum tags fit into the 7 bits available in leb128 for single-byte encodings. Perf results indicate this is basically neutral across the board except for an improvement in bootstrap time.

[^1]: More than half the remaining integers still fit into <= 128, so the leb128 coding still makes sense. 32% are zero, and 46% are <= 4.
  • Loading branch information
bors committed Jan 11, 2024
2 parents d73bd3f + 1d2005b commit 68acb39
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 42 deletions.
30 changes: 24 additions & 6 deletions compiler/rustc_macros/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,17 @@ fn decodable_body(
ty_name,
variants.len()
);
let tag = if variants.len() < u8::MAX as usize {
quote! {
::rustc_serialize::Decoder::read_u8(__decoder) as usize
}
} else {
quote! {
::rustc_serialize::Decoder::read_usize(__decoder)
}
};
quote! {
match ::rustc_serialize::Decoder::read_usize(__decoder) {
match #tag {
#match_inner
n => panic!(#message, n),
}
Expand Down Expand Up @@ -206,11 +215,20 @@ fn encodable_body(
variant_idx += 1;
result
});
quote! {
let disc = match *self {
#encode_inner
};
::rustc_serialize::Encoder::emit_usize(__encoder, disc);
if variant_idx < u8::MAX as usize {
quote! {
let disc = match *self {
#encode_inner
};
::rustc_serialize::Encoder::emit_u8(__encoder, disc as u8);
}
} else {
quote! {
let disc = match *self {
#encode_inner
};
::rustc_serialize::Encoder::emit_usize(__encoder, disc);
}
}
};

Expand Down
33 changes: 15 additions & 18 deletions compiler/rustc_serialize/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,6 @@ pub trait Encoder {
}

fn emit_raw_bytes(&mut self, s: &[u8]);

fn emit_enum_variant<F>(&mut self, v_id: usize, f: F)
where
F: FnOnce(&mut Self),
{
self.emit_usize(v_id);
f(self);
}
}

// Note: all the methods in this trait are infallible, which may be surprising.
Expand Down Expand Up @@ -132,10 +124,6 @@ pub trait Decoder {

fn read_raw_bytes(&mut self, len: usize) -> &[u8];

// Although there is an `emit_enum_variant` method in `Encoder`, the code
// patterns in decoding are different enough to encoding that there is no
// need for a corresponding `read_enum_variant` method here.

fn peek_byte(&self) -> u8;
fn position(&self) -> usize;
}
Expand Down Expand Up @@ -372,15 +360,18 @@ impl<'a, D: Decoder> Decodable<D> for Cow<'a, str> {
impl<S: Encoder, T: Encodable<S>> Encodable<S> for Option<T> {
fn encode(&self, s: &mut S) {
match *self {
None => s.emit_enum_variant(0, |_| {}),
Some(ref v) => s.emit_enum_variant(1, |s| v.encode(s)),
None => s.emit_u8(0),
Some(ref v) => {
s.emit_u8(1);
v.encode(s);
}
}
}
}

impl<D: Decoder, T: Decodable<D>> Decodable<D> for Option<T> {
fn decode(d: &mut D) -> Option<T> {
match d.read_usize() {
match d.read_u8() {
0 => None,
1 => Some(Decodable::decode(d)),
_ => panic!("Encountered invalid discriminant while decoding `Option`."),
Expand All @@ -391,15 +382,21 @@ impl<D: Decoder, T: Decodable<D>> Decodable<D> for Option<T> {
impl<S: Encoder, T1: Encodable<S>, T2: Encodable<S>> Encodable<S> for Result<T1, T2> {
fn encode(&self, s: &mut S) {
match *self {
Ok(ref v) => s.emit_enum_variant(0, |s| v.encode(s)),
Err(ref v) => s.emit_enum_variant(1, |s| v.encode(s)),
Ok(ref v) => {
s.emit_u8(0);
v.encode(s);
}
Err(ref v) => {
s.emit_u8(1);
v.encode(s);
}
}
}
}

impl<D: Decoder, T1: Decodable<D>, T2: Decodable<D>> Decodable<D> for Result<T1, T2> {
fn decode(d: &mut D) -> Result<T1, T2> {
match d.read_usize() {
match d.read_u8() {
0 => Ok(T1::decode(d)),
1 => Err(T2::decode(d)),
_ => panic!("Encountered invalid discriminant while decoding `Result`."),
Expand Down
23 changes: 12 additions & 11 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,18 +203,19 @@ impl Hash for RealFileName {
impl<S: Encoder> Encodable<S> for RealFileName {
fn encode(&self, encoder: &mut S) {
match *self {
RealFileName::LocalPath(ref local_path) => encoder.emit_enum_variant(0, |encoder| {
RealFileName::LocalPath(ref local_path) => {
encoder.emit_u8(0);
local_path.encode(encoder);
}),

RealFileName::Remapped { ref local_path, ref virtual_name } => encoder
.emit_enum_variant(1, |encoder| {
// For privacy and build reproducibility, we must not embed host-dependant path
// in artifacts if they have been remapped by --remap-path-prefix
assert!(local_path.is_none());
local_path.encode(encoder);
virtual_name.encode(encoder);
}),
}

RealFileName::Remapped { ref local_path, ref virtual_name } => {
encoder.emit_u8(1);
// For privacy and build reproducibility, we must not embed host-dependant path
// in artifacts if they have been remapped by --remap-path-prefix
assert!(local_path.is_none());
local_path.encode(encoder);
virtual_name.encode(encoder);
}
}
}
}
Expand Down
17 changes: 10 additions & 7 deletions compiler/rustc_target/src/spec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3396,19 +3396,22 @@ impl Hash for TargetTriple {
impl<S: Encoder> Encodable<S> for TargetTriple {
fn encode(&self, s: &mut S) {
match self {
TargetTriple::TargetTriple(triple) => s.emit_enum_variant(0, |s| s.emit_str(triple)),
TargetTriple::TargetJson { path_for_rustdoc: _, triple, contents } => s
.emit_enum_variant(1, |s| {
s.emit_str(triple);
s.emit_str(contents)
}),
TargetTriple::TargetTriple(triple) => {
s.emit_u8(0);
s.emit_str(triple);
}
TargetTriple::TargetJson { path_for_rustdoc: _, triple, contents } => {
s.emit_u8(1);
s.emit_str(triple);
s.emit_str(contents);
}
}
}
}

impl<D: Decoder> Decodable<D> for TargetTriple {
fn decode(d: &mut D) -> Self {
match d.read_usize() {
match d.read_u8() {
0 => TargetTriple::TargetTriple(d.read_str().to_owned()),
1 => TargetTriple::TargetJson {
path_for_rustdoc: PathBuf::new(),
Expand Down

0 comments on commit 68acb39

Please sign in to comment.