diff --git a/Cargo.lock b/Cargo.lock
index d3a59a95d00..5a9af63f9cf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -990,6 +990,17 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "fixed_decimal"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35943d22b2f19c0cb198ecf915910a8158e94541c89dcc63300d7799d46c2c5e"
+dependencies = [
+ "displaydoc",
+ "smallvec",
+ "writeable",
+]
+
 [[package]]
 name = "flate2"
 version = "1.1.2"
@@ -1290,6 +1301,29 @@ dependencies = [
  "zerovec",
 ]
 
+[[package]]
+name = "icu_decimal"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fec61c43fdc4e368a9f450272833123a8ef0d7083a44597660ce94d791b8a2e2"
+dependencies = [
+ "displaydoc",
+ "fixed_decimal",
+ "icu_decimal_data",
+ "icu_locale",
+ "icu_locale_core",
+ "icu_provider",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_decimal_data"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b70963bc35f9bdf1bc66a5c1f458f4991c1dc71760e00fa06016b2c76b2738d5"
+
 [[package]]
 name = "icu_locale"
 version = "2.0.0"
@@ -3946,7 +3980,9 @@ dependencies = [
  "glob",
  "hex",
  "icu_collator",
+ "icu_decimal",
  "icu_locale",
+ "icu_provider",
  "itertools 0.14.0",
  "libc",
  "md-5",
diff --git a/Cargo.toml b/Cargo.toml
index 5d9479bc8f4..0ec32fb39b9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -314,7 +314,9 @@ glob = "0.3.1"
 half = "2.4.1"
 hostname = "0.4"
 icu_collator = "2.0.0"
+icu_decimal = "2.0.0"
 icu_locale = "2.0.0"
+icu_provider = "2.0.0"
 indicatif = "0.18.0"
 itertools = "0.14.0"
 jiff = { version = "0.2.10", default-features = false, features = [
diff --git a/src/uu/expr/Cargo.toml b/src/uu/expr/Cargo.toml
index 00e3e3cab03..54b831bc487 100644
--- a/src/uu/expr/Cargo.toml
+++ b/src/uu/expr/Cargo.toml
@@ -22,7 +22,7 @@ clap = { workspace = true }
 num-bigint = { workspace = true }
 num-traits = { workspace = true }
 onig = { workspace = true }
-uucore = { workspace = true }
+uucore = { workspace = true, features = ["i18n-collator"] }
 thiserror = { workspace = true }
 
 [[bin]]
diff --git a/src/uu/expr/locales/en-US.ftl b/src/uu/expr/locales/en-US.ftl
index 8d26566e9a6..2c09eee7840 100644
--- a/src/uu/expr/locales/en-US.ftl
+++ b/src/uu/expr/locales/en-US.ftl
@@ -63,3 +63,4 @@ expr-error-unmatched-opening-brace = Unmatched {"\\{"}
 expr-error-invalid-bracket-content = Invalid content of {"\\{\\}"}
 expr-error-trailing-backslash = Trailing backslash
 expr-error-too-big-range-quantifier-index = Regular expression too big
+expr-error-match-utf8 = match does not support invalid UTF-8 encoding in { $arg }
diff --git a/src/uu/expr/locales/fr-FR.ftl b/src/uu/expr/locales/fr-FR.ftl
index f496b270ce7..b529db9d523 100644
--- a/src/uu/expr/locales/fr-FR.ftl
+++ b/src/uu/expr/locales/fr-FR.ftl
@@ -63,3 +63,4 @@ expr-error-unmatched-opening-brace = Accolade ouvrante {"\\{"} non appariée
 expr-error-invalid-bracket-content = Contenu invalide de {"\\{\\}"}
 expr-error-trailing-backslash = Barre oblique inverse en fin
 expr-error-too-big-range-quantifier-index = Expression régulière trop grande
+expr-error-match-utf8 = match ne supporte pas l'encodage UTF-8 invalide dans { $arg }
diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs
index 7225f986cf9..4db419efaf3 100644
--- a/src/uu/expr/src/expr.rs
+++ b/src/uu/expr/src/expr.rs
@@ -5,15 +5,18 @@
 
 use clap::{Arg, ArgAction, Command};
 use std::collections::HashMap;
+use std::io::Write;
 use syntax_tree::{AstNode, is_truthy};
 use thiserror::Error;
 use uucore::locale::{get_message, get_message_with_args};
+use uucore::os_string_to_vec;
 use uucore::{
     display::Quotable,
     error::{UError, UResult},
     format_usage,
 };
 
+mod locale_aware;
 mod syntax_tree;
 
 mod options {
@@ -54,6 +57,8 @@ pub enum ExprError {
     TrailingBackslash,
     #[error("{}", get_message("expr-error-too-big-range-quantifier-index"))]
     TooBigRangeQuantifierIndex,
+    #[error("{}", get_message_with_args("expr-error-match-utf8", HashMap::from([("arg".to_string(), _0.quote().to_string())])))]
+    UnsupportedNonUtf8Match(String),
 }
 
 impl UError for ExprError {
@@ -98,25 +103,27 @@ pub fn uu_app() -> Command {
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
     // For expr utility we do not want getopts.
     // The following usage should work without escaping hyphens: `expr -15 = 1 + 2 \* \( 3 - -4 \)`
-    let args: Vec<String> = args
+    let args = args
         .skip(1) // Skip binary name
-        .map(|a| a.to_string_lossy().to_string())
-        .collect();
+        .map(os_string_to_vec)
+        .collect::<Result<Vec<_>, _>>()?;
 
-    if args.len() == 1 && args[0] == "--help" {
+    if args.len() == 1 && args[0] == b"--help" {
         let _ = uu_app().print_help();
-    } else if args.len() == 1 && args[0] == "--version" {
+    } else if args.len() == 1 && args[0] == b"--version" {
         println!("{} {}", uucore::util_name(), uucore::crate_version!());
     } else {
         // The first argument may be "--" and should be be ignored.
-        let args = if !args.is_empty() && args[0] == "--" {
+        let args = if !args.is_empty() && args[0] == b"--" {
             &args[1..]
         } else {
             &args
         };
 
-        let res: String = AstNode::parse(args)?.eval()?.eval_as_string();
-        println!("{res}");
+        let res = AstNode::parse(args)?.eval()?.eval_as_string();
+        let _ = std::io::stdout().write_all(&res);
+        let _ = std::io::stdout().write_all(b"\n");
+
         if !is_truthy(&res.into()) {
             return Err(1.into());
         }
diff --git a/src/uu/expr/src/locale_aware.rs b/src/uu/expr/src/locale_aware.rs
new file mode 100644
index 00000000000..c8a4e73e33e
--- /dev/null
+++ b/src/uu/expr/src/locale_aware.rs
@@ -0,0 +1,111 @@
+// This file is part of the uutils coreutils package.
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+
+use std::cmp::Ordering;
+
+use uucore::{
+    CharByte, IntoCharByteIterator,
+    i18n::{
+        UEncoding,
+        collator::{AlternateHandling, CollatorOptions, locale_cmp, try_init_collator},
+        get_locale_encoding,
+    },
+};
+
+use crate::syntax_tree::{MaybeNonUtf8Str, MaybeNonUtf8String};
+
+/// Perform a locale-aware string comparison using the current locale's
+/// collator.
+pub(crate) fn locale_comparison(a: &MaybeNonUtf8Str, b: &MaybeNonUtf8Str) -> Ordering {
+    // Initialize the collator
+    let mut opts = CollatorOptions::default();
+    opts.alternate_handling = Some(AlternateHandling::Shifted); // This is black magic
+    let _ = try_init_collator(opts);
+
+    locale_cmp(a, b)
+}
+
+/// Perform an index search with an approach that differs with regard to the
+/// given locale.
+fn index_with_locale(
+    left: &MaybeNonUtf8Str,
+    right: &MaybeNonUtf8Str,
+    encoding: UEncoding,
+) -> usize {
+    match encoding {
+        UEncoding::Utf8 => {
+            // In the UTF-8 case, we try to decode the strings on the fly. We
+            // compare UTf-8 characters as long as the stream is valid, and
+            // switch to byte comparison when the byte is an invalid sequence.
+            left.iter_char_bytes()
+                .position(|ch_h| right.iter_char_bytes().any(|ch_n| ch_n == ch_h))
+                .map_or(0, |idx| idx + 1)
+        }
+        UEncoding::Ascii => {
+            // In the default case, we just perform byte-wise comparison on the
+            // arrays.
+            left.iter()
+                .position(|ch_h| right.iter().any(|ch_n| ch_n == ch_h))
+                .map_or(0, |idx| idx + 1)
+        }
+    }
+}
+
+/// Perform an index search with an approach that differs with regard to the
+/// current locale.
+pub(crate) fn locale_aware_index(left: &MaybeNonUtf8Str, right: &MaybeNonUtf8Str) -> usize {
+    index_with_locale(left, right, get_locale_encoding())
+}
+
+/// Perform a string length calculation depending on the current locale. In
+/// UTF-8 locale, it will count valid UTF-8 chars, and fallback to counting
+/// bytes otherwise. In Non UTF-8 locale, directly return input byte length.
+pub(crate) fn locale_aware_length(input: &MaybeNonUtf8Str) -> usize {
+    match get_locale_encoding() {
+        UEncoding::Utf8 => std::str::from_utf8(input).map_or(input.len(), |s| s.chars().count()),
+        UEncoding::Ascii => input.len(),
+    }
+}
+
+fn substr_with_locale(
+    s: MaybeNonUtf8String,
+    pos: usize,
+    len: usize,
+    encoding: UEncoding,
+) -> MaybeNonUtf8String {
+    match encoding {
+        UEncoding::Utf8 => {
+            // Create a buffer with the heuristic that all the chars are ASCII
+            // and are 1-byte long.
+            let mut string = MaybeNonUtf8String::with_capacity(len);
+            let mut buf = [0; 4];
+
+            // Iterate on char-bytes, and skip them accordingly.
+            // For each character (or byte) in the right range,
+            // push it to the string.
+            for cb in s.iter_char_bytes().skip(pos).take(len) {
+                match cb {
+                    CharByte::Char(c) => {
+                        let len = c.encode_utf8(&mut buf).len();
+                        string.extend(&buf[..len]);
+                    }
+                    CharByte::Byte(b) => string.push(b),
+                }
+            }
+            string
+        }
+        UEncoding::Ascii => s.into_iter().skip(pos).take(len).collect(),
+    }
+}
+
+/// Given a byte sequence, a position and a length, return the corresponding
+/// substring depending on the current locale.
+pub(crate) fn locale_aware_substr(
+    s: MaybeNonUtf8String,
+    pos: usize,
+    len: usize,
+) -> MaybeNonUtf8String {
+    substr_with_locale(s, pos, len, get_locale_encoding())
+}
diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs
index b0ae0142f92..f2e56717368 100644
--- a/src/uu/expr/src/syntax_tree.rs
+++ b/src/uu/expr/src/syntax_tree.rs
@@ -7,11 +7,19 @@
 
 use std::{cell::Cell, collections::BTreeMap};
 
-use num_bigint::{BigInt, ParseBigIntError};
+use num_bigint::BigInt;
 use num_traits::ToPrimitive;
 use onig::{Regex, RegexOptions, Syntax};
 
-use crate::{ExprError, ExprResult};
+use crate::{
+    ExprError, ExprResult,
+    locale_aware::{
+        locale_aware_index, locale_aware_length, locale_aware_substr, locale_comparison,
+    },
+};
+
+pub(crate) type MaybeNonUtf8String = Vec<u8>;
+pub(crate) type MaybeNonUtf8Str = [u8];
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum BinOp {
@@ -63,29 +71,27 @@ impl BinOp {
 
 impl RelationOp {
     fn eval(&self, a: ExprResult<NumOrStr>, b: ExprResult<NumOrStr>) -> ExprResult<NumOrStr> {
+        // Make sure that the given comparison validates the relational operator.
+        let check_cmp = |cmp| {
+            use RelationOp::{Eq, Geq, Gt, Leq, Lt, Neq};
+            use std::cmp::Ordering::{Equal, Greater, Less};
+            matches!(
+                (self, cmp),
+                (Lt | Leq | Neq, Less) | (Leq | Eq | Geq, Equal) | (Gt | Geq | Neq, Greater)
+            )
+        };
+
         let a = a?;
         let b = b?;
-        let b = if let (Ok(a), Ok(b)) = (&a.to_bigint(), &b.to_bigint()) {
-            match self {
-                Self::Lt => a < b,
-                Self::Leq => a <= b,
-                Self::Eq => a == b,
-                Self::Neq => a != b,
-                Self::Gt => a > b,
-                Self::Geq => a >= b,
-            }
+        let b = if let (Some(a), Some(b)) = (&a.to_bigint(), &b.to_bigint()) {
+            check_cmp(a.cmp(b))
         } else {
             // These comparisons should be using locale settings
+
             let a = a.eval_as_string();
             let b = b.eval_as_string();
-            match self {
-                Self::Lt => a < b,
-                Self::Leq => a <= b,
-                Self::Eq => a == b,
-                Self::Neq => a != b,
-                Self::Gt => a > b,
-                Self::Geq => a >= b,
-            }
+
+            check_cmp(locale_comparison(&a, &b))
         };
         if b { Ok(1.into()) } else { Ok(0.into()) }
     }
@@ -147,8 +153,17 @@ impl StringOp {
                 Ok(left)
             }
             Self::Match => {
-                let left = left?.eval_as_string();
-                let right = right?.eval_as_string();
+                let left = String::from_utf8(left?.eval_as_string()).map_err(|u| {
+                    ExprError::UnsupportedNonUtf8Match(
+                        String::from_utf8_lossy(u.as_bytes()).into_owned(),
+                    )
+                })?;
+                let right = String::from_utf8(right?.eval_as_string()).map_err(|u| {
+                    ExprError::UnsupportedNonUtf8Match(
+                        String::from_utf8_lossy(u.as_bytes()).into_owned(),
+                    )
+                })?;
+
                 check_posix_regex_errors(&right)?;
 
                 // Transpile the input pattern from BRE syntax to `onig` crate's `Syntax::grep`
@@ -237,14 +252,8 @@ impl StringOp {
             Self::Index => {
                 let left = left?.eval_as_string();
                 let right = right?.eval_as_string();
-                for (current_idx, ch_h) in left.chars().enumerate() {
-                    for ch_n in right.to_string().chars() {
-                        if ch_n == ch_h {
-                            return Ok((current_idx + 1).into());
-                        }
-                    }
-                }
-                Ok(0.into())
+
+                Ok(locale_aware_index(&left, &right).into())
             }
         }
     }
@@ -361,33 +370,33 @@ fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> {
 }
 
 /// Precedence for infix binary operators
-const PRECEDENCE: &[&[(&str, BinOp)]] = &[
-    &[("|", BinOp::String(StringOp::Or))],
-    &[("&", BinOp::String(StringOp::And))],
+const PRECEDENCE: &[&[(&MaybeNonUtf8Str, BinOp)]] = &[
+    &[(b"|", BinOp::String(StringOp::Or))],
+    &[(b"&", BinOp::String(StringOp::And))],
     &[
-        ("<", BinOp::Relation(RelationOp::Lt)),
-        ("<=", BinOp::Relation(RelationOp::Leq)),
-        ("=", BinOp::Relation(RelationOp::Eq)),
-        ("!=", BinOp::Relation(RelationOp::Neq)),
-        (">=", BinOp::Relation(RelationOp::Geq)),
-        (">", BinOp::Relation(RelationOp::Gt)),
+        (b"<", BinOp::Relation(RelationOp::Lt)),
+        (b"<=", BinOp::Relation(RelationOp::Leq)),
+        (b"=", BinOp::Relation(RelationOp::Eq)),
+        (b"!=", BinOp::Relation(RelationOp::Neq)),
+        (b">=", BinOp::Relation(RelationOp::Geq)),
+        (b">", BinOp::Relation(RelationOp::Gt)),
     ],
     &[
-        ("+", BinOp::Numeric(NumericOp::Add)),
-        ("-", BinOp::Numeric(NumericOp::Sub)),
+        (b"+", BinOp::Numeric(NumericOp::Add)),
+        (b"-", BinOp::Numeric(NumericOp::Sub)),
     ],
     &[
-        ("*", BinOp::Numeric(NumericOp::Mul)),
-        ("/", BinOp::Numeric(NumericOp::Div)),
-        ("%", BinOp::Numeric(NumericOp::Mod)),
+        (b"*", BinOp::Numeric(NumericOp::Mul)),
+        (b"/", BinOp::Numeric(NumericOp::Div)),
+        (b"%", BinOp::Numeric(NumericOp::Mod)),
     ],
-    &[(":", BinOp::String(StringOp::Match))],
+    &[(b":", BinOp::String(StringOp::Match))],
 ];
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum NumOrStr {
     Num(BigInt),
-    Str(String),
+    Str(MaybeNonUtf8String),
 }
 
 impl From<usize> for NumOrStr {
@@ -404,30 +413,37 @@ impl From<BigInt> for NumOrStr {
 
 impl From<String> for NumOrStr {
     fn from(str: String) -> Self {
+        Self::Str(str.into())
+    }
+}
+
+impl From<MaybeNonUtf8String> for NumOrStr {
+    fn from(str: MaybeNonUtf8String) -> Self {
         Self::Str(str)
     }
 }
 
 impl NumOrStr {
-    pub fn to_bigint(&self) -> Result<BigInt, ParseBigIntError> {
+    pub fn to_bigint(&self) -> Option<BigInt> {
         match self {
-            Self::Num(num) => Ok(num.clone()),
-            Self::Str(str) => str.parse::<BigInt>(),
+            Self::Num(num) => Some(num.clone()),
+            Self::Str(str) => std::str::from_utf8(str).ok()?.parse::<BigInt>().ok(),
         }
     }
 
     pub fn eval_as_bigint(self) -> ExprResult<BigInt> {
         match self {
             Self::Num(num) => Ok(num),
-            Self::Str(str) => str
+            Self::Str(str) => String::from_utf8(str)
+                .map_err(|_| ExprError::NonIntegerArgument)?
                 .parse::<BigInt>()
                 .map_err(|_| ExprError::NonIntegerArgument),
         }
     }
 
-    pub fn eval_as_string(self) -> String {
+    pub fn eval_as_string(self) -> MaybeNonUtf8String {
         match self {
-            Self::Num(num) => num.to_string(),
+            Self::Num(num) => num.to_string().into(),
             Self::Str(str) => str,
         }
     }
@@ -447,7 +463,7 @@ pub enum AstNodeInner {
         value: NumOrStr,
     },
     Leaf {
-        value: String,
+        value: MaybeNonUtf8String,
     },
     BinOp {
         op_type: BinOp,
@@ -465,7 +481,7 @@ pub enum AstNodeInner {
 }
 
 impl AstNode {
-    pub fn parse(input: &[impl AsRef<str>]) -> ExprResult<Self> {
+    pub fn parse(input: &[impl AsRef<MaybeNonUtf8Str>]) -> ExprResult<Self> {
         Parser::new(input).parse()
     }
 
@@ -492,7 +508,7 @@ impl AstNode {
                     result_stack.insert(node.id, Ok(value.clone()));
                 }
                 AstNodeInner::Leaf { value, .. } => {
-                    result_stack.insert(node.id, Ok(value.to_string().into()));
+                    result_stack.insert(node.id, Ok(value.to_owned().into()));
                 }
                 AstNodeInner::BinOp {
                     op_type,
@@ -529,7 +545,7 @@ impl AstNode {
                         continue;
                     };
 
-                    let string: String = string?.eval_as_string();
+                    let string: MaybeNonUtf8String = string?.eval_as_string();
 
                     // The GNU docs say:
                     //
@@ -550,7 +566,7 @@ impl AstNode {
                         .unwrap_or(0);
 
                     if let (Some(pos), Some(_)) = (pos.checked_sub(1), length.checked_sub(1)) {
-                        let result = string.chars().skip(pos).take(length).collect::<String>();
+                        let result = locale_aware_substr(string, pos, length);
                         result_stack.insert(node.id, Ok(result.into()));
                     } else {
                         result_stack.insert(node.id, Ok(String::new().into()));
@@ -565,7 +581,7 @@ impl AstNode {
                         continue;
                     };
 
-                    let length = string?.eval_as_string().chars().count();
+                    let length = locale_aware_length(&string?.eval_as_string());
                     result_stack.insert(node.id, Ok(length.into()));
                 }
             }
@@ -591,17 +607,17 @@ fn get_next_id() -> u32 {
     })
 }
 
-struct Parser<'a, S: AsRef<str>> {
+struct Parser<'a, S: AsRef<MaybeNonUtf8Str>> {
     input: &'a [S],
     index: usize,
 }
 
-impl<'a, S: AsRef<str>> Parser<'a, S> {
+impl<'a, S: AsRef<MaybeNonUtf8Str>> Parser<'a, S> {
     fn new(input: &'a [S]) -> Self {
         Self { input, index: 0 }
     }
 
-    fn next(&mut self) -> ExprResult<&'a str> {
+    fn next(&mut self) -> ExprResult<&'a MaybeNonUtf8Str> {
         let next = self.input.get(self.index);
         if let Some(next) = next {
             self.index += 1;
@@ -610,12 +626,12 @@ impl<'a, S: AsRef<str>> Parser<'a, S> {
             // The indexing won't panic, because we know that the input size
             // is greater than zero.
             Err(ExprError::MissingArgument(
-                self.input[self.index - 1].as_ref().into(),
+                String::from_utf8_lossy(self.input[self.index - 1].as_ref()).into_owned(),
             ))
         }
     }
 
-    fn accept<T>(&mut self, f: impl Fn(&str) -> Option<T>) -> Option<T> {
+    fn accept<T>(&mut self, f: impl Fn(&MaybeNonUtf8Str) -> Option<T>) -> Option<T> {
         let next = self.input.get(self.index)?;
         let tok = f(next.as_ref());
         if let Some(tok) = tok {
@@ -632,7 +648,9 @@ impl<'a, S: AsRef<str>> Parser<'a, S> {
         }
         let res = self.parse_expression()?;
         if let Some(arg) = self.input.get(self.index) {
-            return Err(ExprError::UnexpectedArgument(arg.as_ref().into()));
+            return Err(ExprError::UnexpectedArgument(
+                String::from_utf8_lossy(arg.as_ref()).into_owned(),
+            ));
         }
         Ok(res)
     }
@@ -675,7 +693,7 @@ impl<'a, S: AsRef<str>> Parser<'a, S> {
     fn parse_simple_expression(&mut self) -> ExprResult<AstNode> {
         let first = self.next()?;
         let inner = match first {
-            "match" => {
+            b"match" => {
                 let left = self.parse_simple_expression()?;
                 let right = self.parse_simple_expression()?;
                 AstNodeInner::BinOp {
@@ -684,7 +702,7 @@ impl<'a, S: AsRef<str>> Parser<'a, S> {
                     right: Box::new(right),
                 }
             }
-            "substr" => {
+            b"substr" => {
                 let string = self.parse_simple_expression()?;
                 let pos = self.parse_simple_expression()?;
                 let length = self.parse_simple_expression()?;
@@ -694,7 +712,7 @@ impl<'a, S: AsRef<str>> Parser<'a, S> {
                     length: Box::new(length),
                 }
             }
-            "index" => {
+            b"index" => {
                 let left = self.parse_simple_expression()?;
                 let right = self.parse_simple_expression()?;
                 AstNodeInner::BinOp {
@@ -703,32 +721,32 @@ impl<'a, S: AsRef<str>> Parser<'a, S> {
                     right: Box::new(right),
                 }
             }
-            "length" => {
+            b"length" => {
                 let string = self.parse_simple_expression()?;
                 AstNodeInner::Length {
                     string: Box::new(string),
                 }
             }
-            "+" => AstNodeInner::Leaf {
+            b"+" => AstNodeInner::Leaf {
                 value: self.next()?.into(),
             },
-            "(" => {
+            b"(" => {
                 // Evaluate the node just after parsing to we detect arithmetic
                 // errors before checking for the closing parenthesis.
                 let s = self.parse_expression()?.evaluated()?;
 
                 match self.next() {
-                    Ok(")") => {}
+                    Ok(b")") => {}
                     // Since we have parsed at least a '(', there will be a token
                     // at `self.index - 1`. So this indexing won't panic.
                     Ok(_) => {
                         return Err(ExprError::ExpectedClosingBraceInsteadOf(
-                            self.input[self.index - 1].as_ref().into(),
+                            String::from_utf8_lossy(self.input[self.index - 1].as_ref()).into(),
                         ));
                     }
                     Err(ExprError::MissingArgument(_)) => {
                         return Err(ExprError::ExpectedClosingBraceAfter(
-                            self.input[self.index - 1].as_ref().into(),
+                            String::from_utf8_lossy(self.input[self.index - 1].as_ref()).into(),
                         ));
                     }
                     Err(e) => return Err(e),
@@ -752,11 +770,11 @@ pub fn is_truthy(s: &NumOrStr) -> bool {
         NumOrStr::Num(num) => num != &BigInt::from(0),
         NumOrStr::Str(str) => {
             // Edge case: `-` followed by nothing is truthy
-            if str == "-" {
+            if str == b"-" {
                 return true;
             }
 
-            let mut bytes = str.bytes();
+            let mut bytes = str.iter().copied();
 
             // Empty string is falsy
             let Some(first) = bytes.next() else {
@@ -922,7 +940,7 @@ mod test {
             .unwrap()
             .eval()
             .unwrap();
-        assert_eq!(result.eval_as_string(), "");
+        assert_eq!(result.eval_as_string(), b"");
     }
 
     #[test]
@@ -931,13 +949,13 @@ mod test {
             .unwrap()
             .eval()
             .unwrap();
-        assert_eq!(result.eval_as_string(), "0");
+        assert_eq!(result.eval_as_string(), b"0");
 
         let result = AstNode::parse(&["*cats", ":", r"*cats"])
             .unwrap()
             .eval()
             .unwrap();
-        assert_eq!(result.eval_as_string(), "5");
+        assert_eq!(result.eval_as_string(), b"5");
     }
 
     #[test]
@@ -946,7 +964,7 @@ mod test {
             .unwrap()
             .eval()
             .unwrap();
-        assert_eq!(result.eval_as_string(), "0");
+        assert_eq!(result.eval_as_string(), b"0");
     }
 
     #[test]
diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml
index 6ff74992c48..9a37f22faf8 100644
--- a/src/uucore/Cargo.toml
+++ b/src/uucore/Cargo.toml
@@ -27,10 +27,6 @@ dns-lookup = { workspace = true, optional = true }
 dunce = { version = "1.0.4", optional = true }
 wild = "2.2.1"
 glob = { workspace = true, optional = true }
-icu_collator = { workspace = true, optional = true, features = [
-  "compiled_data",
-] }
-icu_locale = { workspace = true, optional = true, features = ["compiled_data"] }
 itertools = { workspace = true, optional = true }
 time = { workspace = true, optional = true, features = [
   "formatting",
@@ -59,6 +55,16 @@ bigdecimal = { workspace = true, optional = true }
 num-traits = { workspace = true, optional = true }
 selinux = { workspace = true, optional = true }
 
+# icu stuff
+icu_collator = { workspace = true, optional = true, features = [
+  "compiled_data",
+] }
+icu_decimal = { workspace = true, optional = true, features = [
+  "compiled_data",
+] }
+icu_locale = { workspace = true, optional = true, features = ["compiled_data"] }
+icu_provider = { workspace = true, optional = true }
+
 # Fluent dependencies
 fluent = { workspace = true }
 fluent-syntax = { workspace = true }
@@ -108,7 +114,10 @@ format = [
   "num-traits",
   "quoting-style",
 ]
-i18n = ["icu_collator", "icu_locale"]
+i18n-all = ["i18n-collator", "i18n-decimal"]
+i18n-common = ["icu_locale"]
+i18n-collator = ["i18n-common", "icu_collator"]
+i18n-decimal = ["i18n-common", "icu_decimal", "icu_provider"]
 mode = ["libc"]
 perms = ["entries", "libc", "walkdir"]
 buf-copy = []
@@ -116,7 +125,7 @@ parser = ["extendedbigdecimal", "glob", "num-traits"]
 pipes = []
 process = ["libc"]
 proc-info = ["tty", "walkdir"]
-quoting-style = ["i18n"]
+quoting-style = ["i18n-common"]
 ranges = []
 ringbuffer = []
 selinux = ["dep:selinux"]
diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs
index fcc97b0f00a..3a622cd6857 100644
--- a/src/uucore/src/lib/features.rs
+++ b/src/uucore/src/lib/features.rs
@@ -26,7 +26,7 @@ pub mod format;
 pub mod fs;
 #[cfg(feature = "fsext")]
 pub mod fsext;
-#[cfg(feature = "i18n")]
+#[cfg(feature = "i18n-common")]
 pub mod i18n;
 #[cfg(feature = "lines")]
 pub mod lines;
diff --git a/src/uucore/src/lib/features/i18n/collator.rs b/src/uucore/src/lib/features/i18n/collator.rs
new file mode 100644
index 00000000000..fda8cd6e093
--- /dev/null
+++ b/src/uucore/src/lib/features/i18n/collator.rs
@@ -0,0 +1,44 @@
+// This file is part of the uutils coreutils package.
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+
+use std::{cmp::Ordering, sync::OnceLock};
+
+use icu_collator::{self, CollatorBorrowed};
+
+use crate::i18n::{DEFAULT_LOCALE, get_collating_locale};
+
+pub use icu_collator::options::{
+    AlternateHandling, CaseLevel, CollatorOptions, MaxVariable, Strength,
+};
+
+static COLLATOR: OnceLock<CollatorBorrowed> = OnceLock::new();
+
+/// Will initialize the collator if not already initialized.
+/// returns `true` if initialization happened
+pub fn try_init_collator(opts: CollatorOptions) -> bool {
+    COLLATOR
+        .set(CollatorBorrowed::try_new(get_collating_locale().0.clone().into(), opts).unwrap())
+        .is_ok()
+}
+
+/// Will initialize the collator and panic if already initialized.
+pub fn init_collator(opts: CollatorOptions) {
+    COLLATOR
+        .set(CollatorBorrowed::try_new(get_collating_locale().0.clone().into(), opts).unwrap())
+        .expect("Collator already initialized");
+}
+
+/// Compare both strings with regard to the current locale.
+pub fn locale_cmp(left: &[u8], right: &[u8]) -> Ordering {
+    // If the detected locale is 'C', just do byte-wise comparison
+    if get_collating_locale().0 == DEFAULT_LOCALE {
+        left.cmp(right)
+    } else {
+        COLLATOR
+            .get()
+            .expect("Collator was not initialized")
+            .compare_utf8(left, right)
+    }
+}
diff --git a/src/uucore/src/lib/features/i18n/decimal.rs b/src/uucore/src/lib/features/i18n/decimal.rs
new file mode 100644
index 00000000000..9fa2d8d7bc7
--- /dev/null
+++ b/src/uucore/src/lib/features/i18n/decimal.rs
@@ -0,0 +1,51 @@
+// This file is part of the uutils coreutils package.
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+
+use std::sync::OnceLock;
+
+use icu_decimal::provider::DecimalSymbolsV1;
+use icu_locale::Locale;
+use icu_provider::prelude::*;
+
+use crate::i18n::get_numeric_locale;
+
+/// Return the decimal separator for the given locale
+fn get_decimal_separator(loc: Locale) -> String {
+    let data_locale = DataLocale::from(loc);
+
+    let request = DataRequest {
+        id: DataIdentifierBorrowed::for_locale(&data_locale),
+        metadata: DataRequestMetadata::default(),
+    };
+
+    let response: DataResponse<DecimalSymbolsV1> =
+        icu_decimal::provider::Baked.load(request).unwrap();
+
+    response.payload.get().decimal_separator().to_string()
+}
+
+/// Return the decimal separator from the language we're working with.
+/// Example:
+///  Say we need to format 1000.5
+///     en_US: 1,000.5 -> decimal separator is '.'
+///     fr_FR: 1 000,5 -> decimal separator is ','
+pub fn locale_decimal_separator() -> &'static str {
+    static DECIMAL_SEP: OnceLock<String> = OnceLock::new();
+
+    DECIMAL_SEP.get_or_init(|| get_decimal_separator(get_numeric_locale().0.clone()))
+}
+
+#[cfg(test)]
+mod tests {
+    use icu_locale::locale;
+
+    use super::get_decimal_separator;
+
+    #[test]
+    fn test_simple_separator() {
+        assert_eq!(get_decimal_separator(locale!("en")), ".");
+        assert_eq!(get_decimal_separator(locale!("fr")), ",");
+    }
+}
diff --git a/src/uucore/src/lib/features/i18n/mod.rs b/src/uucore/src/lib/features/i18n/mod.rs
index 5a7cf8ea3f6..c42d41c7ea1 100644
--- a/src/uucore/src/lib/features/i18n/mod.rs
+++ b/src/uucore/src/lib/features/i18n/mod.rs
@@ -1,7 +1,17 @@
+// This file is part of the uutils coreutils package.
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+
 use std::sync::OnceLock;
 
 use icu_locale::{Locale, locale};
 
+#[cfg(feature = "i18n-collator")]
+pub mod collator;
+#[cfg(feature = "i18n-decimal")]
+pub mod decimal;
+
 /// The encoding specified by the locale, if specified
 /// Currently only supports ASCII and UTF-8 for the sake of simplicity.
 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
@@ -12,48 +22,59 @@ pub enum UEncoding {
 
 const DEFAULT_LOCALE: Locale = locale!("en-US-posix");
 
-/// Deduce the locale from the current environment
-fn get_collating_locale() -> &'static (Locale, UEncoding) {
-    static COLLATING_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new();
+/// Look at 3 environment variables in the following order
+///
+/// 1. LC_ALL
+/// 2. `locale_name`
+/// 3. LANG
+///
+/// Or fallback on Posix locale, with ASCII encoding.
+fn get_locale_from_env(locale_name: &str) -> (Locale, UEncoding) {
+    let locale_var = ["LC_ALL", locale_name, "LANG"]
+        .iter()
+        .find_map(|&key| std::env::var(key).ok());
 
-    COLLATING_LOCALE.get_or_init(|| {
-        // Look at 3 environment variables in the following order
-        //
-        // 1. LC_ALL
-        // 2. LC_COLLATE
-        // 3. LANG
-        //
-        // Or fallback on Posix locale, with ASCII encoding.
-
-        let locale_var = std::env::var("LC_ALL")
-            .or_else(|_| std::env::var("LC_COLLATE"))
-            .or_else(|_| std::env::var("LANG"));
-
-        if let Ok(locale_var_str) = locale_var {
-            let mut split = locale_var_str.split(&['.', '@']);
-
-            if let Some(simple) = split.next() {
-                let bcp47 = simple.replace("_", "-");
-                let locale = Locale::try_from_str(&bcp47).unwrap_or(DEFAULT_LOCALE);
-
-                // If locale parsing failed, parse the encoding part of the
-                // locale. Treat the special case of the given locale being "C"
-                // which becomes the default locale.
-                let encoding = if (locale != DEFAULT_LOCALE || bcp47 == "C")
-                    && split.next() == Some("UTF-8")
-                {
-                    UEncoding::Utf8
-                } else {
-                    UEncoding::Ascii
-                };
-                return (locale, encoding);
+    if let Some(locale_var_str) = locale_var {
+        let mut split = locale_var_str.split(&['.', '@']);
+
+        if let Some(simple) = split.next() {
+            // Naively convert the locale name to BCP47 tag format.
+            //
+            // See https://en.wikipedia.org/wiki/IETF_language_tag
+            let bcp47 = simple.replace("_", "-");
+            let locale = Locale::try_from_str(&bcp47).unwrap_or(DEFAULT_LOCALE);
+
+            // If locale parsing failed, parse the encoding part of the
+            // locale. Treat the special case of the given locale being "C"
+            // which becomes the default locale.
+            let encoding = if (locale != DEFAULT_LOCALE || bcp47 == "C")
+                && split
+                    .next()
+                    .is_some_and(|enc| enc.to_lowercase() == "utf-8")
+            {
+                UEncoding::Utf8
             } else {
-                return (DEFAULT_LOCALE, UEncoding::Ascii);
+                UEncoding::Ascii
             };
+            return (locale, encoding);
         }
-        // Default POSIX locale representing LC_ALL=C
-        (DEFAULT_LOCALE, UEncoding::Ascii)
-    })
+    }
+    // Default POSIX locale representing LC_ALL=C
+    (DEFAULT_LOCALE, UEncoding::Ascii)
+}
+
+/// Get the collating locale from the environment
+fn get_collating_locale() -> &'static (Locale, UEncoding) {
+    static COLLATING_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new();
+
+    COLLATING_LOCALE.get_or_init(|| get_locale_from_env("LC_COLLATE"))
+}
+
+/// Get the numeric locale from the environment
+pub fn get_numeric_locale() -> &'static (Locale, UEncoding) {
+    static NUMERIC_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new();
+
+    NUMERIC_LOCALE.get_or_init(|| get_locale_from_env("LC_NUMERIC"))
 }
 
 /// Return the encoding deduced from the locale environment variable.
diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs
index 6a137b78728..8059dac9355 100644
--- a/src/uucore/src/lib/lib.rs
+++ b/src/uucore/src/lib/lib.rs
@@ -51,7 +51,7 @@ pub use crate::features::fast_inc;
 pub use crate::features::format;
 #[cfg(feature = "fs")]
 pub use crate::features::fs;
-#[cfg(feature = "i18n")]
+#[cfg(feature = "i18n-common")]
 pub use crate::features::i18n;
 #[cfg(feature = "lines")]
 pub use crate::features::lines;
@@ -124,6 +124,7 @@ use std::iter;
 #[cfg(unix)]
 use std::os::unix::ffi::{OsStrExt, OsStringExt};
 use std::str;
+use std::str::Utf8Chunk;
 use std::sync::{LazyLock, atomic::Ordering};
 
 /// Disables the custom signal handlers installed by Rust for stack-overflow handling. With those custom signal handlers processes ignore the first SIGBUS and SIGSEGV signal they receive.
@@ -377,6 +378,24 @@ pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
     Ok(s)
 }
 
+/// Converts an `OsString` into a `Vec<u8>`, parsing as UTF-8 on non-unix platforms.
+///
+/// This always succeeds on unix platforms,
+/// and fails on other platforms if the bytes can't be parsed as UTF-8.
+pub fn os_string_to_vec(s: OsString) -> mods::error::UResult<Vec<u8>> {
+    #[cfg(unix)]
+    let v = s.into_vec();
+    #[cfg(not(unix))]
+    let v = s
+        .into_string()
+        .map_err(|_| {
+            mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
+        })?
+        .into();
+
+    Ok(v)
+}
+
 /// Equivalent to `std::BufRead::lines` which outputs each line as a `Vec<u8>`,
 /// which avoids panicking on non UTF-8 input.
 pub fn read_byte_lines<R: std::io::Read>(
@@ -443,6 +462,91 @@ macro_rules! prompt_yes(
     })
 );
 
+/// Represent either a character or a byte.
+/// Used to iterate on partially valid UTF-8 data
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CharByte {
+    Char(char),
+    Byte(u8),
+}
+
+impl From<char> for CharByte {
+    fn from(value: char) -> Self {
+        CharByte::Char(value)
+    }
+}
+
+impl From<u8> for CharByte {
+    fn from(value: u8) -> Self {
+        CharByte::Byte(value)
+    }
+}
+
+impl From<&u8> for CharByte {
+    fn from(value: &u8) -> Self {
+        CharByte::Byte(*value)
+    }
+}
+
+struct Utf8ChunkIterator<'a> {
+    iter: Box<dyn Iterator<Item = CharByte> + 'a>,
+}
+
+impl Iterator for Utf8ChunkIterator<'_> {
+    type Item = CharByte;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter.next()
+    }
+}
+
+impl<'a> From<Utf8Chunk<'a>> for Utf8ChunkIterator<'a> {
+    fn from(chk: Utf8Chunk<'a>) -> Utf8ChunkIterator<'a> {
+        Self {
+            iter: Box::new(
+                chk.valid()
+                    .chars()
+                    .map(CharByte::from)
+                    .chain(chk.invalid().iter().map(CharByte::from)),
+            ),
+        }
+    }
+}
+
+/// Iterates on the valid and invalid parts of a byte sequence with regard to
+/// the UTF-8 encoding.
+pub struct CharByteIterator<'a> {
+    iter: Box<dyn Iterator<Item = CharByte> + 'a>,
+}
+
+impl<'a> CharByteIterator<'a> {
+    /// Make a `CharByteIterator` from a byte slice.
+    /// [`CharByteIterator`]
+    pub fn new(input: &'a [u8]) -> CharByteIterator<'a> {
+        Self {
+            iter: Box::new(input.utf8_chunks().flat_map(Utf8ChunkIterator::from)),
+        }
+    }
+}
+
+impl Iterator for CharByteIterator<'_> {
+    type Item = CharByte;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter.next()
+    }
+}
+
+pub trait IntoCharByteIterator<'a> {
+    fn iter_char_bytes(self) -> CharByteIterator<'a>;
+}
+
+impl<'a> IntoCharByteIterator<'a> for &'a [u8] {
+    fn iter_char_bytes(self) -> CharByteIterator<'a> {
+        CharByteIterator::new(self)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs
index 55294dedaef..729b9129019 100644
--- a/tests/by-util/test_expr.rs
+++ b/tests/by-util/test_expr.rs
@@ -5,7 +5,7 @@
 // spell-checker:ignore αbcdef ; (people) kkos
 // spell-checker:ignore aabcccd aabcd aabd abbb abbbd abbcabc abbcac abbcbbbd abbcbd
 // spell-checker:ignore abbccd abcabc abcac acabc andand bigcmp bignum emptysub
-// spell-checker:ignore orempty oror
+// spell-checker:ignore orempty oror bcdef fedcb
 
 use uutests::new_ucmd;
 
@@ -207,43 +207,6 @@ fn test_and() {
     new_ucmd!().args(&["", "&", ""]).fails().stdout_only("0\n");
 }
 
-#[test]
-fn test_index() {
-    new_ucmd!()
-        .args(&["index", "αbcdef", "x"])
-        .fails_with_code(1)
-        .stdout_only("0\n");
-    new_ucmd!()
-        .args(&["index", "αbcdef", "α"])
-        .succeeds()
-        .stdout_only("1\n");
-    new_ucmd!()
-        .args(&["index", "αbc_δef", "δ"])
-        .succeeds()
-        .stdout_only("5\n");
-    new_ucmd!()
-        .args(&["index", "αbc_δef", "δf"])
-        .succeeds()
-        .stdout_only("5\n");
-    new_ucmd!()
-        .args(&["index", "αbcdef", "fb"])
-        .succeeds()
-        .stdout_only("2\n");
-    new_ucmd!()
-        .args(&["index", "αbcdef", "f"])
-        .succeeds()
-        .stdout_only("6\n");
-    new_ucmd!()
-        .args(&["index", "αbcdef_f", "f"])
-        .succeeds()
-        .stdout_only("6\n");
-
-    new_ucmd!()
-        .args(&["αbcdef", "index", "α"])
-        .fails_with_code(2)
-        .stderr_only("expr: syntax error: unexpected argument 'index'\n");
-}
-
 #[test]
 fn test_length_fail() {
     new_ucmd!().args(&["length", "αbcdef", "1"]).fails();
@@ -262,14 +225,6 @@ fn test_length() {
         .stderr_only("expr: syntax error: unexpected argument 'length'\n");
 }
 
-#[test]
-fn test_length_mb() {
-    new_ucmd!()
-        .args(&["length", "αbcdef"])
-        .succeeds()
-        .stdout_only("6\n");
-}
-
 #[test]
 fn test_regex_empty() {
     new_ucmd!().args(&["", ":", ""]).fails().stdout_only("0\n");
@@ -1504,3 +1459,471 @@ mod gnu_expr {
             .stderr_contains("syntax error: expecting ')' instead of 'a'");
     }
 }
+
+/// Test that `expr` correctly detects and handles locales
+mod locale_aware {
+    use uutests::new_ucmd;
+
+    #[test]
+    fn test_expr_collating() {
+        for (loc, code, output) in [
+            ("C", 0, "1\n"),
+            ("fr_FR.UTF-8", 1, "0\n"),
+            ("fr_FR.utf-8", 1, "0\n"),
+            ("en_US", 1, "0\n"),
+        ] {
+            new_ucmd!()
+                .args(&["50n", ">", "-51"])
+                .env("LC_ALL", loc)
+                .run()
+                .code_is(code)
+                .stdout_only(output);
+        }
+    }
+}
+
+/// This module reimplements the expr-multibyte.pl test
+#[cfg(target_os = "linux")]
+mod gnu_expr_multibyte {
+    use uutests::new_ucmd;
+
+    use uucore::os_str_from_bytes;
+
+    trait AsByteSlice<'a> {
+        fn into_bytes(self) -> &'a [u8];
+    }
+
+    impl<'a> AsByteSlice<'a> for &'a str {
+        fn into_bytes(self) -> &'a [u8] {
+            self.as_bytes()
+        }
+    }
+
+    impl<'a> AsByteSlice<'a> for &'a [u8] {
+        fn into_bytes(self) -> &'a [u8] {
+            self
+        }
+    }
+
+    impl<'a, const N: usize> AsByteSlice<'a> for &'a [u8; N] {
+        fn into_bytes(self) -> &'a [u8] {
+            self
+        }
+    }
+
+    const EXPRESSION: &[u8] =
+        "\u{1F14}\u{03BA}\u{03C6}\u{03C1}\u{03B1}\u{03C3}\u{03B9}\u{03C2}".as_bytes();
+
+    #[derive(Debug, Default, Clone, Copy)]
+    struct TestCase {
+        pub locale: &'static str,
+        pub out: Option<&'static [u8]>,
+        pub code: i32,
+    }
+
+    impl TestCase {
+        const FR: Self = Self::new("fr_FR.UTF-8");
+        const C: Self = Self::new("C");
+
+        const fn new(locale: &'static str) -> Self {
+            Self {
+                locale,
+                out: None,
+                code: 0,
+            }
+        }
+
+        fn out(mut self, out: impl AsByteSlice<'static>) -> Self {
+            self.out = Some(out.into_bytes());
+            self
+        }
+
+        fn code(mut self, code: i32) -> Self {
+            self.code = code;
+            self
+        }
+    }
+
+    fn check_test_case(args: &[&[u8]], tc: &TestCase) {
+        let args = args
+            .iter()
+            .map(|arg: &&[u8]| os_str_from_bytes(arg).unwrap())
+            .collect::<Vec<_>>();
+
+        let res = new_ucmd!().env("LC_ALL", tc.locale).args(&args).run();
+
+        res.code_is(tc.code);
+
+        if let Some(out) = tc.out {
+            let mut out = out.to_owned();
+            out.push(b'\n');
+            res.stdout_is_bytes(&out);
+        } else {
+            res.no_stdout();
+        }
+    }
+
+    // LENGTH EXPRESSIONS
+
+    // sanity check
+    #[test]
+    fn test_l1() {
+        let args: &[&[u8]] = &[b"length", b"abcdef"];
+
+        let cases = &[TestCase::FR.out("6"), TestCase::C.out("6")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // A single multibyte character in the beginning of the string \xCE\xB1 is
+    // UTF-8 for "U+03B1 GREEK SMALL LETTER ALPHA"
+    #[test]
+    fn test_l2() {
+        let args: &[&[u8]] = &[b"length", b"\xCE\xB1bcdef"];
+
+        let cases = &[TestCase::FR.out("6"), TestCase::C.out("7")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // A single multibyte character in the middle of the string \xCE\xB4 is
+    // UTF-8 for "U+03B4 GREEK SMALL LETTER DELTA"
+    #[test]
+    fn test_l3() {
+        let args: &[&[u8]] = &[b"length", b"abc\xCE\xB4ef"];
+
+        let cases = &[TestCase::FR.out("6"), TestCase::C.out("7")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // A single multibyte character in the end of the string
+    #[test]
+    fn test_l4() {
+        let args: &[&[u8]] = &[b"length", b"fedcb\xCE\xB1"];
+
+        let cases = &[TestCase::FR.out("6"), TestCase::C.out("7")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // A invalid multibyte sequence
+    #[test]
+    fn test_l5() {
+        let args: &[&[u8]] = &[b"length", b"\xB1aaa"];
+
+        let cases = &[TestCase::FR.out("4"), TestCase::C.out("4")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // An incomplete multibyte sequence at the end of the string
+    #[test]
+    fn test_l6() {
+        let args: &[&[u8]] = &[b"length", b"aaa\xCE"];
+
+        let cases = &[TestCase::FR.out("4"), TestCase::C.out("4")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // An incomplete multibyte sequence at the end of the string
+    #[test]
+    fn test_l7() {
+        let args: &[&[u8]] = &[b"length", EXPRESSION];
+
+        let cases = &[TestCase::FR.out("8"), TestCase::C.out("17")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // INDEX EXPRESSIONS
+
+    // sanity check
+    #[test]
+    fn test_i1() {
+        let args: &[&[u8]] = &[b"index", b"abcdef", b"fb"];
+
+        let cases = &[TestCase::FR.out("2"), TestCase::C.out("2")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // Search for a single-octet
+    #[test]
+    fn test_i2() {
+        let args: &[&[u8]] = &[b"index", b"\xCE\xB1bc\xCE\xB4ef", b"b"];
+
+        let cases = &[TestCase::FR.out("2"), TestCase::C.out("3")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    #[test]
+    fn test_i3() {
+        let args: &[&[u8]] = &[b"index", b"\xCE\xB1bc\xCE\xB4ef", b"f"];
+
+        let cases = &[TestCase::FR.out("6"), TestCase::C.out("8")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // Search for multibyte character.
+    // In the C locale, the search string is treated as two octets.
+    // the first of them (\xCE) matches the first octet of the input string.
+    #[test]
+    fn test_i4() {
+        let args: &[&[u8]] = &[b"index", b"\xCE\xB1bc\xCE\xB4ef", b"\xCE\xB4"];
+
+        let cases = &[TestCase::FR.out("4"), TestCase::C.out("1")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // Invalid multibyte sequence in the input string, treated as a single
+    // octet.
+    #[test]
+    fn test_i5() {
+        let args: &[&[u8]] = &[b"index", b"\xCEbc\xCE\xB4ef", b"\xCE\xB4"];
+
+        let cases = &[TestCase::FR.out("4"), TestCase::C.out("1")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // Invalid multibyte sequence in the search string, treated as a single
+    // octet. In multibyte locale, there should be no match, expr returns and
+    // prints zero, and terminates with exit-code 1 (as per POSIX).
+    #[test]
+    fn test_i6() {
+        let args: &[&[u8]] = &[b"index", b"\xCE\xB1bc\xCE\xB4ef", b"\xB4"];
+
+        let cases = &[TestCase::FR.out("0").code(1), TestCase::C.out("6")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // Edge-case: invalid multibyte sequence BOTH in the input string and in
+    // the search string: expr should find a match.
+    #[test]
+    fn test_i7() {
+        let args: &[&[u8]] = &[b"index", b"\xCE\xB1bc\xB4ef", b"\xB4"];
+
+        let cases = &[TestCase::FR.out("4")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // SUBSTR EXPRESSIONS
+
+    // sanity check
+    #[test]
+    fn test_s1() {
+        let args: &[&[u8]] = &[b"substr", b"abcdef", b"2", b"3"];
+
+        let cases = &[TestCase::FR.out("bcd"), TestCase::C.out("bcd")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    #[test]
+    fn test_s2() {
+        let args: &[&[u8]] = &[b"substr", b"\xCE\xB1bc\xCE\xB4ef", b"1", b"1"];
+
+        let cases = &[TestCase::FR.out(b"\xCE\xB1"), TestCase::C.out(b"\xCE")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    #[test]
+    fn test_s3() {
+        let args: &[&[u8]] = &[b"substr", b"\xCE\xB1bc\xCE\xB4ef", b"3", b"2"];
+
+        let cases = &[TestCase::FR.out(b"c\xCE\xB4"), TestCase::C.out("bc")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    #[test]
+    fn test_s4() {
+        let args: &[&[u8]] = &[b"substr", b"\xCE\xB1bc\xCE\xB4ef", b"4", b"1"];
+
+        let cases = &[TestCase::FR.out(b"\xCE\xB4"), TestCase::C.out("c")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    #[test]
+    fn test_s5() {
+        let args: &[&[u8]] = &[b"substr", b"\xCE\xB1bc\xCE\xB4ef", b"4", b"2"];
+
+        let cases = &[TestCase::FR.out(b"\xCE\xB4e"), TestCase::C.out(b"c\xCE")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    #[test]
+    fn test_s6() {
+        let args: &[&[u8]] = &[b"substr", b"\xCE\xB1bc\xCE\xB4ef", b"6", b"1"];
+
+        let cases = &[TestCase::FR.out(b"f"), TestCase::C.out(b"\xB4")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    #[test]
+    fn test_s7() {
+        let args: &[&[u8]] = &[b"substr", b"\xCE\xB1bc\xCE\xB4ef", b"7", b"1"];
+
+        let cases = &[TestCase::FR.out("").code(1), TestCase::C.out(b"e")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    #[test]
+    fn test_s8() {
+        let args: &[&[u8]] = &[b"substr", b"\xCE\xB1bc\xB4ef", b"3", b"3"];
+
+        let cases = &[TestCase::FR.out(b"c\xB4e"), TestCase::C.out(b"bc\xB4")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // MATCH EXPRESSIONS
+
+    // sanity check
+    #[test]
+    fn test_m1() {
+        let args: &[&[u8]] = &[b"match", b"abcdef", b"ab"];
+
+        let cases = &[TestCase::FR.out("2"), TestCase::C.out("2")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+    #[test]
+    fn test_m2() {
+        let args: &[&[u8]] = &[b"match", b"abcdef", b"\\(ab\\)"];
+
+        let cases = &[TestCase::FR.out("ab"), TestCase::C.out("ab")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // The regex engine should match the '.' to the first multibyte character.
+    #[test]
+    #[ignore = "not implemented"]
+    fn test_m3() {
+        let args: &[&[u8]] = &[b"match", b"\xCE\xB1bc\xCE\xB4ef", b".bc"];
+
+        let cases = &[TestCase::FR.out("3"), TestCase::C.code(1)];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // The opposite of the previous test: two dots should only match the two
+    // octets in single-byte locale.
+    #[test]
+    #[ignore = "not implemented"]
+    fn test_m4() {
+        let args: &[&[u8]] = &[b"match", b"\xCE\xB1bc\xCE\xB4ef", b"..bc"];
+
+        let cases = &[TestCase::FR.out("0").code(1), TestCase::C.out("4")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // Match with grouping - a single dot should return the two octets
+    #[test]
+    #[ignore = "not implemented"]
+    fn test_m5() {
+        let args: &[&[u8]] = &[b"match", b"\xCE\xB1bc\xCE\xB4ef", b"\\(.b\\)c"];
+
+        let cases = &[TestCase::FR.out(b"\xCE\xB1b"), TestCase::C.code(1)];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // Invalid multibyte sequences - regex should not match in multibyte locale
+    // (POSIX requirement)
+    #[test]
+    #[ignore = "not implemented"]
+    fn test_m6() {
+        let args: &[&[u8]] = &[b"match", b"\xCEbc\xCE\xB4ef", b"\\(.\\)"];
+
+        let cases = &[TestCase::FR.code(1), TestCase::C.out(b"\xCE")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+
+    // Character classes: in the multibyte case, the regex engine understands
+    // there is a single multibyte character in the brackets.
+    // In the single byte case, the regex engine sees two octets in the
+    // character class ('\xCE' and '\xB1') - and it matches the first one.
+    #[test]
+    #[ignore = "not implemented"]
+    fn test_m7() {
+        let args: &[&[u8]] = &[b"match", b"\xCE\xB1bc\xCE\xB4ef", b"\\(.\\)"];
+
+        let cases = &[TestCase::FR.out(b"\xCE\xB1"), TestCase::C.out(b"\xCE")];
+
+        for tc in cases {
+            check_test_case(args, tc);
+        }
+    }
+}