Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,9 @@ glob = "0.3.1"
half = "2.4.1"
hostname = "0.4"
icu_collator = "2.0.0"
icu_decimal = "2.0.0"
icu_locale = "2.0.0"
icu_provider = "2.0.0"
indicatif = "0.18.0"
itertools = "0.14.0"
jiff = { version = "0.2.10", default-features = false, features = [
Expand Down
2 changes: 1 addition & 1 deletion src/uu/expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ clap = { workspace = true }
num-bigint = { workspace = true }
num-traits = { workspace = true }
onig = { workspace = true }
uucore = { workspace = true }
uucore = { workspace = true, features = ["i18n-collator"] }
thiserror = { workspace = true }

[[bin]]
Expand Down
1 change: 1 addition & 0 deletions src/uu/expr/locales/en-US.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,4 @@ expr-error-unmatched-opening-brace = Unmatched {"\\{"}
expr-error-invalid-bracket-content = Invalid content of {"\\{\\}"}
expr-error-trailing-backslash = Trailing backslash
expr-error-too-big-range-quantifier-index = Regular expression too big
expr-error-match-utf8 = match does not support invalid UTF-8 encoding in { $arg }
1 change: 1 addition & 0 deletions src/uu/expr/locales/fr-FR.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,4 @@ expr-error-unmatched-opening-brace = Accolade ouvrante {"\\{"} non appariée
expr-error-invalid-bracket-content = Contenu invalide de {"\\{\\}"}
expr-error-trailing-backslash = Barre oblique inverse en fin
expr-error-too-big-range-quantifier-index = Expression régulière trop grande
expr-error-match-utf8 = match ne supporte pas l'encodage UTF-8 invalide dans { $arg }
23 changes: 15 additions & 8 deletions src/uu/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,18 @@

use clap::{Arg, ArgAction, Command};
use std::collections::HashMap;
use std::io::Write;
use syntax_tree::{AstNode, is_truthy};
use thiserror::Error;
use uucore::locale::{get_message, get_message_with_args};
use uucore::os_string_to_vec;
use uucore::{
display::Quotable,
error::{UError, UResult},
format_usage,
};

mod locale_aware;
mod syntax_tree;

mod options {
Expand Down Expand Up @@ -54,6 +57,8 @@ pub enum ExprError {
TrailingBackslash,
#[error("{}", get_message("expr-error-too-big-range-quantifier-index"))]
TooBigRangeQuantifierIndex,
#[error("{}", get_message_with_args("expr-error-match-utf8", HashMap::from([("arg".to_string(), _0.quote().to_string())])))]
UnsupportedNonUtf8Match(String),
}

impl UError for ExprError {
Expand Down Expand Up @@ -98,25 +103,27 @@ pub fn uu_app() -> Command {
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// For expr utility we do not want getopts.
// The following usage should work without escaping hyphens: `expr -15 = 1 + 2 \* \( 3 - -4 \)`
let args: Vec<String> = args
let args = args
.skip(1) // Skip binary name
.map(|a| a.to_string_lossy().to_string())
.collect();
.map(os_string_to_vec)
.collect::<Result<Vec<_>, _>>()?;

if args.len() == 1 && args[0] == "--help" {
if args.len() == 1 && args[0] == b"--help" {
let _ = uu_app().print_help();
} else if args.len() == 1 && args[0] == "--version" {
} else if args.len() == 1 && args[0] == b"--version" {
println!("{} {}", uucore::util_name(), uucore::crate_version!());
} else {
// The first argument may be "--" and should be be ignored.
let args = if !args.is_empty() && args[0] == "--" {
let args = if !args.is_empty() && args[0] == b"--" {
&args[1..]
} else {
&args
};

let res: String = AstNode::parse(args)?.eval()?.eval_as_string();
println!("{res}");
let res = AstNode::parse(args)?.eval()?.eval_as_string();
let _ = std::io::stdout().write_all(&res);
let _ = std::io::stdout().write_all(b"\n");

if !is_truthy(&res.into()) {
return Err(1.into());
}
Expand Down
111 changes: 111 additions & 0 deletions src/uu/expr/src/locale_aware.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

use std::cmp::Ordering;

use uucore::{
CharByte, IntoCharByteIterator,
i18n::{
UEncoding,
collator::{AlternateHandling, CollatorOptions, locale_cmp, try_init_collator},
get_locale_encoding,
},
};

use crate::syntax_tree::{MaybeNonUtf8Str, MaybeNonUtf8String};

/// Perform a locale-aware string comparison using the current locale's
/// collator.
pub(crate) fn locale_comparison(a: &MaybeNonUtf8Str, b: &MaybeNonUtf8Str) -> Ordering {
// Initialize the collator
let mut opts = CollatorOptions::default();
opts.alternate_handling = Some(AlternateHandling::Shifted); // This is black magic
let _ = try_init_collator(opts);
Comment on lines +23 to +25
Copy link
Collaborator Author

@RenjiSann RenjiSann Jul 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, I don't exactly understand how the CollatorOptions affect the collating. I've just toyed with it until #5378 passed. That's why I said it's black magic.


locale_cmp(a, b)
}

/// Perform an index search with an approach that differs with regard to the
/// given locale.
fn index_with_locale(
left: &MaybeNonUtf8Str,
right: &MaybeNonUtf8Str,
encoding: UEncoding,
) -> usize {
match encoding {
UEncoding::Utf8 => {
// In the UTF-8 case, we try to decode the strings on the fly. We
// compare UTf-8 characters as long as the stream is valid, and
// switch to byte comparison when the byte is an invalid sequence.
left.iter_char_bytes()
.position(|ch_h| right.iter_char_bytes().any(|ch_n| ch_n == ch_h))
.map_or(0, |idx| idx + 1)
}
UEncoding::Ascii => {
// In the default case, we just perform byte-wise comparison on the
// arrays.
left.iter()
.position(|ch_h| right.iter().any(|ch_n| ch_n == ch_h))
.map_or(0, |idx| idx + 1)
}
}
}

/// Perform an index search with an approach that differs with regard to the
/// current locale.
pub(crate) fn locale_aware_index(left: &MaybeNonUtf8Str, right: &MaybeNonUtf8Str) -> usize {
index_with_locale(left, right, get_locale_encoding())
}

/// Perform a string length calculation depending on the current locale. In
/// UTF-8 locale, it will count valid UTF-8 chars, and fallback to counting
/// bytes otherwise. In Non UTF-8 locale, directly return input byte length.
pub(crate) fn locale_aware_length(input: &MaybeNonUtf8Str) -> usize {
match get_locale_encoding() {
UEncoding::Utf8 => std::str::from_utf8(input).map_or(input.len(), |s| s.chars().count()),
UEncoding::Ascii => input.len(),
}
}

fn substr_with_locale(
s: MaybeNonUtf8String,
pos: usize,
len: usize,
encoding: UEncoding,
) -> MaybeNonUtf8String {
match encoding {
UEncoding::Utf8 => {
// Create a buffer with the heuristic that all the chars are ASCII
// and are 1-byte long.
let mut string = MaybeNonUtf8String::with_capacity(len);
let mut buf = [0; 4];

// Iterate on char-bytes, and skip them accordingly.
// For each character (or byte) in the right range,
// push it to the string.
for cb in s.iter_char_bytes().skip(pos).take(len) {
match cb {
CharByte::Char(c) => {
let len = c.encode_utf8(&mut buf).len();
string.extend(&buf[..len]);
}
CharByte::Byte(b) => string.push(b),
}
}
string
}
UEncoding::Ascii => s.into_iter().skip(pos).take(len).collect(),
}
}

/// Given a byte sequence, a position and a length, return the corresponding
/// substring depending on the current locale.
pub(crate) fn locale_aware_substr(
s: MaybeNonUtf8String,
pos: usize,
len: usize,
) -> MaybeNonUtf8String {
substr_with_locale(s, pos, len, get_locale_encoding())
}
Loading
Loading