Skip to content

Commit

Permalink
Recompress required artifacts for manifest generation
Browse files Browse the repository at this point in the history
This allows build-manifest to run successfully; upstream patch
rust-lang/rust#110436 to make this fully
unnecessary is pending review and release across all channels.
  • Loading branch information
Mark-Simulacrum committed May 7, 2023
1 parent af00fd2 commit 9700e9c
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 108 deletions.
13 changes: 13 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use chrono::Utc;
use curl::easy::Easy;
use fs2::FileExt;
use github::{CreateTag, Github};
use rayon::prelude::*;

use crate::config::{Channel, Config};

Expand Down Expand Up @@ -172,6 +173,18 @@ impl Context {

self.assert_all_components_present()?;

// Quickly produce gzip compressed artifacts that are needed for successful manifest
// building.
let recompress = [
self.dl_dir()
.join("rust-nightly-x86_64-unknown-linux-gnu.tar.xz"),
self.dl_dir()
.join("cargo-nightly-x86_64-unknown-linux-gnu.tar.xz"),
];
recompress.par_iter().try_for_each(|tarball| {
recompress::recompress_file(tarball, false, flate2::Compression::fast(), false)
})?;

// Ok we've now determined that a release needs to be done.

let mut signer = Signer::new(&self.config)?;
Expand Down
224 changes: 116 additions & 108 deletions src/recompress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,121 @@ use std::path::Path;
use std::time::{Duration, Instant};
use xz2::read::XzDecoder;

pub(crate) fn recompress_file(
xz_path: &Path,
recompress_gz: bool,
gz_compression_level: flate2::Compression,
recompress_xz: bool,
) -> anyhow::Result<()> {
println!("recompressing {}...", xz_path.display());
let file_start = Instant::now();
let gz_path = xz_path.with_extension("gz");

let mut destinations: Vec<(&str, Box<dyn io::Write>)> = Vec::new();

// Produce gzip if explicitly enabled or the destination file doesn't exist.
if recompress_gz || !gz_path.is_file() {
let gz = File::create(gz_path)?;
destinations.push((
"gz",
Box::new(flate2::write::GzEncoder::new(gz, gz_compression_level)),
));
}

// xz recompression with more aggressive settings than we want to take the time
// for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs.
//
// Note that this is using a single-threaded compressor as we're parallelizing
// via rayon already. In rust-lang/rust we were trying to use parallel
// compression, but the default block size for that is 3*dict_size so we
// weren't actually using more than one core in most of the builders with
// <192MB uncompressed tarballs. In promote-release since we're recompressing
// 100s of tarballs there's no need for each individual compression to be
// parallel.
let xz_recompressed = xz_path.with_extension("xz_recompressed");
if recompress_xz {
let mut filters = xz2::stream::Filters::new();
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
// This sets the overall dictionary size, which is also how much memory (baseline)
// is needed for decompression.
lzma_ops.dict_size(64 * 1024 * 1024);
// Use the best match finder for compression ratio.
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
lzma_ops.mode(xz2::stream::Mode::Normal);
// Set nice len to the maximum for best compression ratio
lzma_ops.nice_len(273);
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
// good results.
lzma_ops.depth(1000);
// 2 is the default and does well for most files
lzma_ops.position_bits(2);
// 0 is the default and does well for most files
lzma_ops.literal_position_bits(0);
// 3 is the default and does well for most files
lzma_ops.literal_context_bits(3);

filters.lzma2(&lzma_ops);

// FIXME: Do we want a checksum as part of compression?
let stream =
xz2::stream::Stream::new_stream_encoder(&filters, xz2::stream::Check::None).unwrap();
let xz_out = File::create(&xz_recompressed)?;
destinations.push((
"xz",
Box::new(xz2::write::XzEncoder::new_stream(
std::io::BufWriter::new(xz_out),
stream,
)),
));
}

// We only decompress once and then write into each of the compressors before
// moving on.
//
// This code assumes that compression with `write_all` will never fail (i.e., we
// can take arbitrary amounts of data as input). That seems like a reasonable
// assumption though.
let mut decompressor = XzDecoder::new(File::open(xz_path)?);
let mut buffer = vec![0u8; 4 * 1024 * 1024];
let mut decompress_time = Duration::ZERO;
let mut time_by_dest = vec![Duration::ZERO; destinations.len()];
loop {
let start = Instant::now();
let length = decompressor.read(&mut buffer)?;
decompress_time += start.elapsed();
if length == 0 {
break;
}
for (idx, (_, destination)) in destinations.iter_mut().enumerate() {
let start = std::time::Instant::now();
destination.write_all(&buffer[..length])?;
time_by_dest[idx] += start.elapsed();
}
}

let mut compression_times = String::new();
for (idx, (name, _)) in destinations.iter().enumerate() {
write!(
compression_times,
", {:.2?} {} compression",
time_by_dest[idx], name
)?;
}
println!(
"recompressed {}: {:.2?} total, {:.2?} decompression{}",
xz_path.display(),
file_start.elapsed(),
decompress_time,
compression_times
);

if recompress_xz {
fs::rename(&xz_recompressed, xz_path)?;
}

Ok(())
}

impl Context {
pub fn recompress(&self, directory: &Path) -> anyhow::Result<()> {
let mut to_recompress = Vec::new();
Expand Down Expand Up @@ -77,114 +192,7 @@ impl Context {
let path = to_recompress.lock().unwrap().pop();
path
} {
println!("recompressing {}...", xz_path.display());
let file_start = Instant::now();
let gz_path = xz_path.with_extension("gz");

let mut destinations: Vec<(&str, Box<dyn io::Write>)> = Vec::new();

// Produce gzip if explicitly enabled or the destination file doesn't exist.
if recompress_gz || !gz_path.is_file() {
let gz = File::create(gz_path)?;
destinations.push((
"gz",
Box::new(flate2::write::GzEncoder::new(gz, compression_level)),
));
}

// xz recompression with more aggressive settings than we want to take the time
// for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs.
//
// Note that this is using a single-threaded compressor as we're parallelizing
// via rayon already. In rust-lang/rust we were trying to use parallel
// compression, but the default block size for that is 3*dict_size so we
// weren't actually using more than one core in most of the builders with
// <192MB uncompressed tarballs. In promote-release since we're recompressing
// 100s of tarballs there's no need for each individual compression to be
// parallel.
let xz_recompressed = xz_path.with_extension("xz_recompressed");
if recompress_xz {
let mut filters = xz2::stream::Filters::new();
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
// This sets the overall dictionary size, which is also how much memory (baseline)
// is needed for decompression.
lzma_ops.dict_size(64 * 1024 * 1024);
// Use the best match finder for compression ratio.
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
lzma_ops.mode(xz2::stream::Mode::Normal);
// Set nice len to the maximum for best compression ratio
lzma_ops.nice_len(273);
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
// good results.
lzma_ops.depth(1000);
// 2 is the default and does well for most files
lzma_ops.position_bits(2);
// 0 is the default and does well for most files
lzma_ops.literal_position_bits(0);
// 3 is the default and does well for most files
lzma_ops.literal_context_bits(3);

filters.lzma2(&lzma_ops);

// FIXME: Do we want a checksum as part of compression?
let stream = xz2::stream::Stream::new_stream_encoder(
&filters,
xz2::stream::Check::None,
)
.unwrap();
let xz_out = File::create(&xz_recompressed)?;
destinations.push((
"xz",
Box::new(xz2::write::XzEncoder::new_stream(
std::io::BufWriter::new(xz_out),
stream,
)),
));
}

// We only decompress once and then write into each of the compressors before
// moving on.
//
// This code assumes that compression with `write_all` will never fail (i.e., we
// can take arbitrary amounts of data as input). That seems like a reasonable
// assumption though.
let mut decompressor = XzDecoder::new(File::open(&xz_path)?);
let mut buffer = vec![0u8; 4 * 1024 * 1024];
let mut decompress_time = Duration::ZERO;
let mut time_by_dest = vec![Duration::ZERO; destinations.len()];
loop {
let start = Instant::now();
let length = decompressor.read(&mut buffer)?;
decompress_time += start.elapsed();
if length == 0 {
break;
}
for (idx, (_, destination)) in destinations.iter_mut().enumerate() {
let start = std::time::Instant::now();
destination.write_all(&buffer[..length])?;
time_by_dest[idx] += start.elapsed();
}
}

let mut compression_times = String::new();
for (idx, (name, _)) in destinations.iter().enumerate() {
write!(
compression_times,
", {:.2?} {} compression",
time_by_dest[idx], name
)?;
}
println!(
"recompressed {}: {:.2?} total, {:.2?} decompression{}",
xz_path.display(),
file_start.elapsed(),
decompress_time,
compression_times
);

if recompress_xz {
fs::rename(&xz_recompressed, xz_path)?;
}
recompress_file(&xz_path, recompress_gz, compression_level, recompress_xz)?;
}

Ok::<_, anyhow::Error>(())
Expand Down

0 comments on commit 9700e9c

Please sign in to comment.