From 1bcaea30239d044404f7ee99090cd1d305c55830 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 9 Aug 2025 17:28:22 +0200 Subject: [PATCH] tr: refactor and introduce simd to get important perf win --- Cargo.lock | 1 + fuzz/Cargo.lock | 1 + src/uu/tr/Cargo.toml | 1 + src/uu/tr/src/operation.rs | 82 ++++++++++++++++++++++------ src/uu/tr/src/simd.rs | 108 +++++++++++++++++++++++++++++++++++++ src/uu/tr/src/tr.rs | 25 +++------ 6 files changed, 185 insertions(+), 33 deletions(-) create mode 100644 src/uu/tr/src/simd.rs diff --git a/Cargo.lock b/Cargo.lock index 17c25822981..859d09b3a48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3908,6 +3908,7 @@ dependencies = [ name = "uu_tr" version = "0.1.0" dependencies = [ + "bytecount", "clap", "fluent", "nom 8.0.0", diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 72fd643dd34..bc3481f7560 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -1571,6 +1571,7 @@ dependencies = [ name = "uu_tr" version = "0.1.0" dependencies = [ + "bytecount", "clap", "fluent", "nom", diff --git a/src/uu/tr/Cargo.toml b/src/uu/tr/Cargo.toml index 76fe403bbec..c20e102d11e 100644 --- a/src/uu/tr/Cargo.toml +++ b/src/uu/tr/Cargo.toml @@ -22,6 +22,7 @@ nom = { workspace = true } clap = { workspace = true } uucore = { workspace = true, features = ["fs"] } fluent = { workspace = true } +bytecount = { workspace = true, features = ["runtime-dispatch-simd"] } [[bin]] name = "tr" diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 792aec7a46e..1984f18b94d 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -26,6 +26,11 @@ use uucore::translate; use uucore::show_warning; +/// Common trait for operations that can process chunks of data +pub trait ChunkProcessor { + fn process_chunk(&self, input: &[u8], output: &mut Vec); +} + #[derive(Debug, Clone)] pub enum BadSequence { MissingCharClassName, @@ -592,7 +597,7 @@ fn set_to_bitmap(set: &[u8]) -> [bool; 256] { #[derive(Debug)] pub struct DeleteOperation { - delete_table: [bool; 256], + pub(crate) delete_table: [bool; 256], } impl DeleteOperation { @@ -610,9 +615,30 @@ impl SymbolTranslator for DeleteOperation { } } +impl ChunkProcessor for DeleteOperation { + fn process_chunk(&self, input: &[u8], output: &mut Vec) { + use crate::simd::{find_single_change, process_single_delete}; + + // Check if this is single character deletion + if let Some((delete_char, _)) = + find_single_change(&self.delete_table, |_, &should_delete| should_delete) + { + process_single_delete(input, output, delete_char); + } else { + // Standard deletion + output.extend( + input + .iter() + .filter(|&&b| !self.delete_table[b as usize]) + .copied(), + ); + } + } +} + #[derive(Debug)] pub struct TranslateOperation { - translation_table: [u8; 256], + pub(crate) translation_table: [u8; 256], } impl TranslateOperation { @@ -645,6 +671,23 @@ impl SymbolTranslator for TranslateOperation { } } +impl ChunkProcessor for TranslateOperation { + fn process_chunk(&self, input: &[u8], output: &mut Vec) { + use crate::simd::{find_single_change, process_single_char_replace}; + + // Check if this is a simple single-character translation + if let Some((source, target)) = + find_single_change(&self.translation_table, |i, &val| val != i as u8) + { + // Use SIMD-optimized single character replacement + process_single_char_replace(input, output, source, target); + } else { + // Standard translation using table lookup + output.extend(input.iter().map(|&b| self.translation_table[b as usize])); + } + } +} + #[derive(Debug, Clone)] pub struct SqueezeOperation { squeeze_table: [bool; 256], @@ -683,7 +726,7 @@ where { const BUFFER_SIZE: usize = 32768; // Large buffer for better throughput let mut buf = [0; BUFFER_SIZE]; - let mut output_buf = Vec::with_capacity(buf.len()); + let mut output_buf = Vec::with_capacity(BUFFER_SIZE); loop { let length = match input.read(&mut buf[..]) { @@ -701,21 +744,28 @@ where } } - #[cfg(not(target_os = "windows"))] - output - .write_all(&output_buf) - .map_err_context(|| translate!("tr-error-write-error"))?; - - // SIGPIPE is not available on Windows. - #[cfg(target_os = "windows")] - if let Err(err) = output.write_all(&output_buf) { - if err.kind() == std::io::ErrorKind::BrokenPipe { - std::process::exit(13); - } else { - return Err(err.map_err_context(|| translate!("tr-error-write-error"))); - } + if !output_buf.is_empty() { + crate::simd::write_output(output, &output_buf)?; } } Ok(()) } + +/// Platform-specific flush operation +#[inline] +pub fn flush_output(output: &mut W) -> UResult<()> { + #[cfg(not(target_os = "windows"))] + return output + .flush() + .map_err_context(|| translate!("tr-error-write-error")); + + #[cfg(target_os = "windows")] + match output.flush() { + Ok(()) => Ok(()), + Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => { + std::process::exit(13); + } + Err(err) => Err(err.map_err_context(|| translate!("tr-error-write-error"))), + } +} diff --git a/src/uu/tr/src/simd.rs b/src/uu/tr/src/simd.rs new file mode 100644 index 00000000000..7bf2427daf6 --- /dev/null +++ b/src/uu/tr/src/simd.rs @@ -0,0 +1,108 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! I/O processing infrastructure for tr operations with SIMD optimizations + +use crate::operation::ChunkProcessor; +use std::io::{BufRead, Write}; +use uucore::error::{FromIo, UResult}; +use uucore::translate; + +/// Helper to detect single-character operations for optimization +pub fn find_single_change(table: &[T; 256], check: F) -> Option<(u8, T)> +where + F: Fn(usize, &T) -> bool, + T: Copy, +{ + let matches: Vec<_> = table + .iter() + .enumerate() + .filter_map(|(i, val)| check(i, val).then_some((i as u8, *val))) + .take(2) + .collect(); + + (matches.len() == 1).then(|| matches[0]) +} + +/// SIMD-optimized single character replacement +#[inline] +pub fn process_single_char_replace( + input: &[u8], + output: &mut Vec, + source_char: u8, + target_char: u8, +) { + let count = bytecount::count(input, source_char); + if count == 0 { + output.extend_from_slice(input); + } else if count == input.len() { + output.resize(output.len() + input.len(), target_char); + } else { + output.extend( + input + .iter() + .map(|&b| if b == source_char { target_char } else { b }), + ); + } +} + +/// SIMD-optimized delete operation for single character +pub fn process_single_delete(input: &[u8], output: &mut Vec, delete_char: u8) { + let count = bytecount::count(input, delete_char); + if count == 0 { + output.extend_from_slice(input); + } else if count < input.len() { + output.extend(input.iter().filter(|&&b| b != delete_char).copied()); + } + // If count == input.len(), all deleted, output nothing +} + +/// Unified I/O processing for all operations +pub fn process_input(input: &mut R, output: &mut W, processor: &P) -> UResult<()> +where + R: BufRead, + W: Write, + P: ChunkProcessor + ?Sized, +{ + const BUFFER_SIZE: usize = 32768; + let mut buf = [0; BUFFER_SIZE]; + let mut output_buf = Vec::with_capacity(BUFFER_SIZE); + + loop { + let length = match input.read(&mut buf[..]) { + Ok(0) => break, + Ok(len) => len, + Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e.map_err_context(|| translate!("tr-error-read-error"))), + }; + + output_buf.clear(); + processor.process_chunk(&buf[..length], &mut output_buf); + + if !output_buf.is_empty() { + write_output(output, &output_buf)?; + } + } + + Ok(()) +} + +/// Helper function to handle platform-specific write operations +#[inline] +pub fn write_output(output: &mut W, buf: &[u8]) -> UResult<()> { + #[cfg(not(target_os = "windows"))] + return output + .write_all(buf) + .map_err_context(|| translate!("tr-error-write-error")); + + #[cfg(target_os = "windows")] + match output.write_all(buf) { + Ok(()) => Ok(()), + Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => { + std::process::exit(13); + } + Err(err) => Err(err.map_err_context(|| translate!("tr-error-write-error"))), + } +} diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 709cf227b6a..0a374cd0ebd 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -4,18 +4,20 @@ // file that was distributed with this source code. mod operation; +mod simd; mod unicode_table; use clap::{Arg, ArgAction, Command, value_parser}; use operation::{ DeleteOperation, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation, - translate_input, + flush_output, translate_input, }; +use simd::process_input; use std::ffi::OsString; -use std::io::{Write, stdin, stdout}; +use std::io::{stdin, stdout}; use uucore::LocalizedCommand; use uucore::display::Quotable; -use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; +use uucore::error::{UResult, USimpleError, UUsageError}; use uucore::fs::is_stdin_directory; #[cfg(not(target_os = "windows"))] use uucore::libc; @@ -135,7 +137,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { translate_input(&mut locked_stdin, &mut locked_stdout, op)?; } else { let op = DeleteOperation::new(set1); - translate_input(&mut locked_stdin, &mut locked_stdout, op)?; + process_input(&mut locked_stdin, &mut locked_stdout, &op)?; } } else if squeeze_flag { if sets_len == 1 { @@ -149,21 +151,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } } else { let op = TranslateOperation::new(set1, set2)?; - translate_input(&mut locked_stdin, &mut locked_stdout, op)?; + process_input(&mut locked_stdin, &mut locked_stdout, &op)?; } - #[cfg(not(target_os = "windows"))] - locked_stdout - .flush() - .map_err_context(|| translate!("tr-error-write-error"))?; - - // SIGPIPE is not available on Windows. - #[cfg(target_os = "windows")] - match locked_stdout.flush() { - Ok(()) => {} - Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => std::process::exit(13), - Err(err) => return Err(err.map_err_context(|| translate!("tr-error-write-error"))), - } + flush_output(&mut locked_stdout)?; Ok(()) }