Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/uu/sort/BENCHMARKING.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,19 @@ Run `cargo build --release` before benchmarking after you make a change!

## Sorting numbers

- Generate a list of numbers: `seq 0 100000 | sort -R > shuffled_numbers.txt`.
- Benchmark numeric sorting with hyperfine: `hyperfine "target/release/coreutils sort shuffled_numbers.txt -n -o output.txt"`.
- Generate a list of numbers:
```
shuf -i 1-1000000 -n 1000000 > shuffled_numbers.txt
# or
seq 1 1000000 | sort -R > shuffled_numbers.txt
```
- Benchmark numeric sorting with hyperfine
```
hyperfine --warmup 3 \
'/tmp/gnu-sort -n /tmp/shuffled_numbers.txt'
'/tmp/uu_before sort -n /tmp/shuffled_numbers.txt'
'/tmp/uu_after sort -n /tmp/shuffled_numbers.txt'
```

## Sorting numbers with -g

Expand Down
9 changes: 9 additions & 0 deletions src/uu/sort/src/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub struct LineData<'a> {
pub selections: Vec<&'a str>,
pub num_infos: Vec<NumInfo>,
pub parsed_floats: Vec<GeneralF64ParseResult>,
pub line_num_floats: Vec<Option<f64>>,
}

impl Chunk {
Expand All @@ -52,6 +53,7 @@ impl Chunk {
contents.line_data.selections.clear();
contents.line_data.num_infos.clear();
contents.line_data.parsed_floats.clear();
contents.line_data.line_num_floats.clear();
let lines = unsafe {
// SAFETY: It is safe to (temporarily) transmute to a vector of lines with a longer lifetime,
// because the vector is empty.
Expand All @@ -73,13 +75,15 @@ impl Chunk {
selections,
std::mem::take(&mut contents.line_data.num_infos),
std::mem::take(&mut contents.line_data.parsed_floats),
std::mem::take(&mut contents.line_data.line_num_floats),
)
});
RecycledChunk {
lines: recycled_contents.0,
selections: recycled_contents.1,
num_infos: recycled_contents.2,
parsed_floats: recycled_contents.3,
line_num_floats: recycled_contents.4,
buffer: self.into_owner(),
}
}
Expand All @@ -97,6 +101,7 @@ pub struct RecycledChunk {
selections: Vec<&'static str>,
num_infos: Vec<NumInfo>,
parsed_floats: Vec<GeneralF64ParseResult>,
line_num_floats: Vec<Option<f64>>,
buffer: Vec<u8>,
}

Expand All @@ -107,6 +112,7 @@ impl RecycledChunk {
selections: Vec::new(),
num_infos: Vec::new(),
parsed_floats: Vec::new(),
line_num_floats: Vec::new(),
buffer: vec![0; capacity],
}
}
Expand Down Expand Up @@ -149,6 +155,7 @@ pub fn read<T: Read>(
selections,
num_infos,
parsed_floats,
line_num_floats,
mut buffer,
} = recycled_chunk;
if buffer.len() < carry_over.len() {
Expand Down Expand Up @@ -184,6 +191,7 @@ pub fn read<T: Read>(
selections,
num_infos,
parsed_floats,
line_num_floats,
};
parse_lines(read, &mut lines, &mut line_data, separator, settings);
Ok(ChunkContents { lines, line_data })
Expand All @@ -207,6 +215,7 @@ fn parse_lines<'a>(
assert!(line_data.selections.is_empty());
assert!(line_data.num_infos.is_empty());
assert!(line_data.parsed_floats.is_empty());
assert!(line_data.line_num_floats.is_empty());
let mut token_buffer = vec![];
lines.extend(
read.split(separator as char)
Expand Down
25 changes: 25 additions & 0 deletions src/uu/sort/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,13 @@ impl<'a> Line<'a> {
if settings.precomputed.needs_tokens {
tokenize(line, settings.separator, token_buffer);
}
if settings.mode == SortMode::Numeric {
// exclude inf, nan, scientific notation
let line_num_float = (!line.contains(char::is_alphabetic))
.then(|| line.parse::<f64>().ok())
.flatten();
line_data.line_num_floats.push(line_num_float);
}
for (selector, selection) in settings
.selectors
.iter()
Expand Down Expand Up @@ -1563,6 +1570,24 @@ fn compare_by<'a>(
let mut selection_index = 0;
let mut num_info_index = 0;
let mut parsed_float_index = 0;

if let (Some(Some(a_f64)), Some(Some(b_f64))) = (
a_line_data.line_num_floats.get(a.index),
b_line_data.line_num_floats.get(b.index),
) {
// we don't use total_cmp() because it always sorts -0 before 0
if let Some(cmp) = a_f64.partial_cmp(b_f64) {
// don't trust `Ordering::Equal` if lines are not fully equal
if cmp != Ordering::Equal || a.line == b.line {
return if global_settings.reverse {
cmp.reverse()
} else {
cmp
};
}
}
}

for selector in &global_settings.selectors {
let (a_str, b_str) = if selector.needs_selection {
let selections = (
Expand Down
Loading