Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/uu/tr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ nom = { workspace = true }
clap = { workspace = true }
uucore = { workspace = true, features = ["fs"] }
fluent = { workspace = true }
bytecount = { workspace = true, features = ["runtime-dispatch-simd"] }

[[bin]]
name = "tr"
Expand Down
82 changes: 66 additions & 16 deletions src/uu/tr/src/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ use uucore::translate;

use uucore::show_warning;

/// Common trait for operations that can process chunks of data
pub trait ChunkProcessor {
fn process_chunk(&self, input: &[u8], output: &mut Vec<u8>);
}

#[derive(Debug, Clone)]
pub enum BadSequence {
MissingCharClassName,
Expand Down Expand Up @@ -592,7 +597,7 @@ fn set_to_bitmap(set: &[u8]) -> [bool; 256] {

#[derive(Debug)]
pub struct DeleteOperation {
delete_table: [bool; 256],
pub(crate) delete_table: [bool; 256],
}

impl DeleteOperation {
Expand All @@ -610,9 +615,30 @@ impl SymbolTranslator for DeleteOperation {
}
}

impl ChunkProcessor for DeleteOperation {
fn process_chunk(&self, input: &[u8], output: &mut Vec<u8>) {
use crate::simd::{find_single_change, process_single_delete};

// Check if this is single character deletion
if let Some((delete_char, _)) =
find_single_change(&self.delete_table, |_, &should_delete| should_delete)
{
process_single_delete(input, output, delete_char);
} else {
// Standard deletion
output.extend(
input
.iter()
.filter(|&&b| !self.delete_table[b as usize])
.copied(),
);
}
}
}

#[derive(Debug)]
pub struct TranslateOperation {
translation_table: [u8; 256],
pub(crate) translation_table: [u8; 256],
}

impl TranslateOperation {
Expand Down Expand Up @@ -645,6 +671,23 @@ impl SymbolTranslator for TranslateOperation {
}
}

impl ChunkProcessor for TranslateOperation {
fn process_chunk(&self, input: &[u8], output: &mut Vec<u8>) {
use crate::simd::{find_single_change, process_single_char_replace};

// Check if this is a simple single-character translation
if let Some((source, target)) =
find_single_change(&self.translation_table, |i, &val| val != i as u8)
{
// Use SIMD-optimized single character replacement
process_single_char_replace(input, output, source, target);
} else {
// Standard translation using table lookup
output.extend(input.iter().map(|&b| self.translation_table[b as usize]));
}
}
}

#[derive(Debug, Clone)]
pub struct SqueezeOperation {
squeeze_table: [bool; 256],
Expand Down Expand Up @@ -683,7 +726,7 @@ where
{
const BUFFER_SIZE: usize = 32768; // Large buffer for better throughput
let mut buf = [0; BUFFER_SIZE];
let mut output_buf = Vec::with_capacity(buf.len());
let mut output_buf = Vec::with_capacity(BUFFER_SIZE);

loop {
let length = match input.read(&mut buf[..]) {
Expand All @@ -701,21 +744,28 @@ where
}
}

#[cfg(not(target_os = "windows"))]
output
.write_all(&output_buf)
.map_err_context(|| translate!("tr-error-write-error"))?;

// SIGPIPE is not available on Windows.
#[cfg(target_os = "windows")]
if let Err(err) = output.write_all(&output_buf) {
if err.kind() == std::io::ErrorKind::BrokenPipe {
std::process::exit(13);
} else {
return Err(err.map_err_context(|| translate!("tr-error-write-error")));
}
if !output_buf.is_empty() {
crate::simd::write_output(output, &output_buf)?;
}
}

Ok(())
}

/// Platform-specific flush operation
#[inline]
pub fn flush_output<W: Write>(output: &mut W) -> UResult<()> {
#[cfg(not(target_os = "windows"))]
return output
.flush()
.map_err_context(|| translate!("tr-error-write-error"));

#[cfg(target_os = "windows")]
match output.flush() {
Ok(()) => Ok(()),
Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => {
std::process::exit(13);
}
Err(err) => Err(err.map_err_context(|| translate!("tr-error-write-error"))),
}
}
108 changes: 108 additions & 0 deletions src/uu/tr/src/simd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

//! I/O processing infrastructure for tr operations with SIMD optimizations

use crate::operation::ChunkProcessor;
use std::io::{BufRead, Write};
use uucore::error::{FromIo, UResult};
use uucore::translate;

/// Helper to detect single-character operations for optimization
pub fn find_single_change<T, F>(table: &[T; 256], check: F) -> Option<(u8, T)>
where
F: Fn(usize, &T) -> bool,
T: Copy,
{
let matches: Vec<_> = table
.iter()
.enumerate()
.filter_map(|(i, val)| check(i, val).then_some((i as u8, *val)))
.take(2)
.collect();

(matches.len() == 1).then(|| matches[0])
}

/// SIMD-optimized single character replacement
#[inline]
pub fn process_single_char_replace(
input: &[u8],
output: &mut Vec<u8>,
source_char: u8,
target_char: u8,
) {
let count = bytecount::count(input, source_char);
if count == 0 {
output.extend_from_slice(input);
} else if count == input.len() {
output.resize(output.len() + input.len(), target_char);
} else {
output.extend(
input
.iter()
.map(|&b| if b == source_char { target_char } else { b }),
);
}
}

/// SIMD-optimized delete operation for single character
pub fn process_single_delete(input: &[u8], output: &mut Vec<u8>, delete_char: u8) {
let count = bytecount::count(input, delete_char);
if count == 0 {
output.extend_from_slice(input);
} else if count < input.len() {
output.extend(input.iter().filter(|&&b| b != delete_char).copied());
}
// If count == input.len(), all deleted, output nothing
}

/// Unified I/O processing for all operations
pub fn process_input<R, W, P>(input: &mut R, output: &mut W, processor: &P) -> UResult<()>
where
R: BufRead,
W: Write,
P: ChunkProcessor + ?Sized,
{
const BUFFER_SIZE: usize = 32768;
let mut buf = [0; BUFFER_SIZE];
let mut output_buf = Vec::with_capacity(BUFFER_SIZE);

loop {
let length = match input.read(&mut buf[..]) {
Ok(0) => break,
Ok(len) => len,
Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e.map_err_context(|| translate!("tr-error-read-error"))),
};

output_buf.clear();
processor.process_chunk(&buf[..length], &mut output_buf);

if !output_buf.is_empty() {
write_output(output, &output_buf)?;
}
}

Ok(())
}

/// Helper function to handle platform-specific write operations
#[inline]
pub fn write_output<W: Write>(output: &mut W, buf: &[u8]) -> UResult<()> {
#[cfg(not(target_os = "windows"))]
return output
.write_all(buf)
.map_err_context(|| translate!("tr-error-write-error"));

#[cfg(target_os = "windows")]
match output.write_all(buf) {
Ok(()) => Ok(()),
Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => {
std::process::exit(13);
}
Err(err) => Err(err.map_err_context(|| translate!("tr-error-write-error"))),
}
}
25 changes: 8 additions & 17 deletions src/uu/tr/src/tr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@
// file that was distributed with this source code.

mod operation;
mod simd;
mod unicode_table;

use clap::{Arg, ArgAction, Command, value_parser};
use operation::{
DeleteOperation, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation,
translate_input,
flush_output, translate_input,
};
use simd::process_input;
use std::ffi::OsString;
use std::io::{Write, stdin, stdout};
use std::io::{stdin, stdout};
use uucore::LocalizedCommand;
use uucore::display::Quotable;
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
use uucore::error::{UResult, USimpleError, UUsageError};
use uucore::fs::is_stdin_directory;
#[cfg(not(target_os = "windows"))]
use uucore::libc;
Expand Down Expand Up @@ -135,7 +137,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
translate_input(&mut locked_stdin, &mut locked_stdout, op)?;
} else {
let op = DeleteOperation::new(set1);
translate_input(&mut locked_stdin, &mut locked_stdout, op)?;
process_input(&mut locked_stdin, &mut locked_stdout, &op)?;
}
} else if squeeze_flag {
if sets_len == 1 {
Expand All @@ -149,21 +151,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
}
} else {
let op = TranslateOperation::new(set1, set2)?;
translate_input(&mut locked_stdin, &mut locked_stdout, op)?;
process_input(&mut locked_stdin, &mut locked_stdout, &op)?;
}

#[cfg(not(target_os = "windows"))]
locked_stdout
.flush()
.map_err_context(|| translate!("tr-error-write-error"))?;

// SIGPIPE is not available on Windows.
#[cfg(target_os = "windows")]
match locked_stdout.flush() {
Ok(()) => {}
Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => std::process::exit(13),
Err(err) => return Err(err.map_err_context(|| translate!("tr-error-write-error"))),
}
flush_output(&mut locked_stdout)?;

Ok(())
}
Expand Down
Loading